In [29]:
from fastai.tabular.all import *
from fastai.collab import *

In [30]:
movies_path = '/content/movies.csv'
ratings_path = '/content/ratings.csv'

ratings = pd.read_csv(ratings_path, delimiter=',', skiprows=1, header=None,
                      names=['user','movie','rating','timestamp'])


ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [31]:
pd.crosstab(ratings.user, ratings.movie, values=ratings.rating, aggfunc='sum')

movie,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


In [33]:
movies = pd.read_csv(movies_path, usecols=(0,1), names=('movie','title'), header=None)

movies['movie'] = movies['movie'].astype(str)
ratings['movie'] = ratings['movie'].astype(str)

ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,1,1,4.0,964982703,Toy Story (1995)
1,5,1,4.0,847434962,Toy Story (1995)
2,7,1,4.5,1106635946,Toy Story (1995)
3,15,1,2.5,1510577970,Toy Story (1995)
4,17,1,4.5,1305696483,Toy Story (1995)


In [34]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,596,The Amazing Spider-Man 2 (2014),2.5
1,586,Pirates of the Caribbean: On Stranger Tides (2011),5.0
2,232,Flags of Our Fathers (2006),3.5
3,358,Atonement (2007),5.0
4,184,Her (2013),4.5
5,239,"Family Man, The (2000)",4.0
6,382,Lucy (2014),3.5
7,249,Gangster Squad (2013),4.0
8,313,Galaxy Quest (1999),4.0
9,188,My Cousin Vinny (1992),5.0


In [48]:
title_to_movieid = dict(zip(movies['title'], movies['movie']))

In [35]:
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.768426,0.8364,00:21
1,0.485517,0.775776,00:21
2,0.365195,0.766665,00:21
3,0.29389,0.751408,00:23
4,0.270617,0.749384,00:20


In [37]:
learn.export('movie-recommender.pkl')

In [38]:
learn = load_learner('movie-recommender.pkl')

learn.model

EmbeddingDotBias(
  (u_weight): Embedding(611, 50)
  (i_weight): Embedding(9720, 50)
  (u_bias): Embedding(611, 1)
  (i_bias): Embedding(9720, 1)
)

In [39]:
user_ratings = [(318, 1), (50, 1), (260, 1), (527, 1), (1721, 1), (1685, 5)]

In [40]:
user_ratings_dicts = []
for (movie_id, rating) in user_ratings:
    user_ratings_dicts.append({"user": 10000, "movie": movie_id, "rating": rating})

new_ratings = pd.concat([ratings, pd.DataFrame(user_ratings_dicts)], ignore_index=True)
new_ratings.tail()

Unnamed: 0,user,movie,rating,timestamp,title
100837,10000,50,1.0,,
100838,10000,260,1.0,,
100839,10000,527,1.0,,
100840,10000,1721,1.0,,
100841,10000,1685,5.0,,


In [41]:
crosstab = pd.crosstab(new_ratings['user'], new_ratings['movie'],
                       values=new_ratings['rating'], aggfunc='sum').fillna(0)
crosstab.tail()

movie,50,260,318,527,1685,1721,1,10,100,100044,...,99750,99764,998,99813,99846,99853,999,99910,99917,99992
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
607,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,0.0,0.0,0.0,0.0,0.0,2.5,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
610,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0
10000,1.0,1.0,1.0,1.0,5.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
other_users = crosstab.values[:-1]
new_user = crosstab.values[-1].reshape(1, -1)

similarities = nn.CosineSimilarity()(tensor(other_users), tensor(new_user))
similarities[:5]

tensor([0., 0., 0., 0., 0.])

In [43]:
top5 = similarities.topk(5)
top5

torch.return_types.topk(
values=tensor([0., 0., 0., 0., 0.]),
indices=tensor([3, 4, 1, 0, 2]))

In [44]:
user_vectors = learn.u_weight.weight[1+top5.indices,:]

new_user_vector = user_vectors.mean(dim=0, keepdim=True)
new_user_vector

tensor([[-0.0388, -0.1180,  0.0705,  0.1780,  0.0150, -0.0490, -0.1450,  0.1001,
         -0.1862, -0.0080,  0.2080,  0.0170,  0.1754, -0.0235, -0.0753, -0.1150,
          0.0494, -0.1672, -0.0534,  0.1726,  0.0346,  0.0014,  0.2196,  0.1641,
         -0.1639,  0.1358, -0.0693, -0.0455,  0.0644, -0.0335,  0.0610, -0.0449,
          0.0695, -0.0562,  0.0512, -0.0498,  0.0303, -0.1296,  0.1170, -0.0690,
         -0.0657,  0.0179, -0.1097, -0.0230,  0.0340,  0.2340,  0.0428,  0.1686,
         -0.0322, -0.0562]], grad_fn=<MeanBackward1>)

In [45]:
user_biases = learn.u_bias.weight[1+top5.indices,:]
new_user_bias = user_biases.mean()
new_user_bias

tensor(0.1884, grad_fn=<MeanBackward0>)

In [46]:
pred_ratings = torch.matmul(new_user_vector, learn.i_weight.weight.T) + learn.i_bias.weight.T + new_user_bias
pred_ratings

tensor([[ 0.1913,  0.3141,  0.1874,  ..., -0.0356,  0.4245,  0.1468]],
       grad_fn=<AddBackward0>)

In [49]:
top5_ratings = pred_ratings.topk(5)
recommendations = learn.classes['title'][top5_ratings.indices.tolist()[0]]

# Print the top 5 rated movies
print("Top 5 Rated Movies:")
for i, title in enumerate(recommendations):
    movie_id = title_to_movieid.get(title, "Unknown")
    print(f"{i+1}. Movie ID: {movie_id}, Title: {title}")


Top 5 Rated Movies:
1. Movie ID: 912, Title: Casablanca (1942)
2. Movie ID: 3508, Title: Outlaw Josey Wales, The (1976)
3. Movie ID: 1193, Title: One Flew Over the Cuckoo's Nest (1975)
4. Movie ID: 1276, Title: Cool Hand Luke (1967)
5. Movie ID: 260, Title: Star Wars: Episode IV - A New Hope (1977)
