In [None]:
from fastai.tabular.all import *
from fastai.collab import *

In [None]:
path = untar_data(URLs.ML_100k)

ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None, names=['user','movie','rating','timestamp'])
ratings.head()

In [None]:
pd.crosstab(ratings.user, ratings.movie, values=ratings.rating, aggfunc='sum')

In [None]:
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                         usecols=(0,1), names=('movie','title'), header=None)
ratings = ratings.merge(movies)
ratings.head()

In [None]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

In [None]:
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)

In [None]:
learn.export('movie-recommender.pkl')

In [None]:
learn = load_learner('movie-recommender.pkl')

learn.model

In [None]:
user_ratings = [(318, 1), (50, 1), (260, 1), (527, 1), (1721, 1), (1685, 5)]

In [None]:
user_ratings_dicts = []
for (movie_id, rating) in user_ratings:
    user_ratings_dicts.append({"user": 10000, "movie": movie_id, "rating": rating})

new_ratings = pd.concat([ratings, pd.DataFrame(user_ratings_dicts)], ignore_index=True)
new_ratings.tail()

In [None]:
crosstab = pd.crosstab(new_ratings['user'], new_ratings['movie'], values=new_ratings['rating'], aggfunc='sum').fillna(0)
crosstab.tail()

In [None]:
other_users = crosstab.values[:-1]
new_user = crosstab.values[-1].reshape(1, -1)

similarities = nn.CosineSimilarity()(tensor(other_users), tensor(new_user))
similarities[:5]

In [None]:
top5 = similarities.topk(5)
top5

In [None]:
user_vectors = learn.u_weight.weight[1+top5.indices,:]

new_user_vector = user_vectors.mean(dim=0, keepdim=True)
new_user_vector

In [None]:
user_biases = learn.u_bias.weight[1+top5.indices,:]
new_user_bias = user_biases.mean()
new_user_bias

In [None]:
pred_ratings = torch.matmul(new_user_vector, learn.i_weight.weight.T) + learn.i_bias.weight.T + new_user_bias
pred_ratings

In [None]:
top5_ratings = pred_ratings.topk(5)
recommendations = learn.classes['title'][top5_ratings.indices.tolist()[0]]

for i, movie in enumerate(recommendations):
    print(f'{i+1}. {movie}')

1. Schindler's List (1993)
2. Shawshank Redemption, The (1994)
3. Good Will Hunting (1997)
4. Star Wars (1977)
5. Wallace & Gromit: The Best of Aardman Animation (1996)
