# Collaborative Filtering Recommender Systems (Movie Recommendation)


In [11]:
import torch
from utils import load_Movie_List_pd, load_ratings_small, normalizeRatings

## Prepare Data


In [12]:
Y, R = load_ratings_small()

num_movies = Y.shape[0]
num_users = Y.shape[1]
num_features = 10


print("Y", Y.shape, "R", R.shape)
print("num_features", num_features)
print("num_movies", num_movies)
print("num_users", num_users)

Y torch.Size([4778, 443]) R torch.Size([4778, 443])
num_features 10
num_movies 4778
num_users 443


In [13]:
#  From the matrix, we can compute statistics like average rating.
tsmean = Y[0, R[0, :].type(torch.bool)].mean()
print(f"Average rating for movie 1 : {tsmean:0.3f} / 5")

Average rating for movie 1 : 3.400 / 5


### New User rating


In [14]:
movieList, movieList_df = load_Movie_List_pd()

my_ratings = torch.zeros(num_movies)  #  Initialize my ratings

# Check the file small_movie_list.csv for id of each movie in our dataset
# For example, Toy Story 3 (2010) has ID 2700, so to rate it "5", you can set
my_ratings[2700] = 5

# Or suppose you did not enjoy Persuasion (2007), you can set
my_ratings[2609] = 2
# We have selected a few movies we liked / did not like and the ratings we
# gave are as follows:
my_ratings[929] = 5  # Lord of the Rings: The Return of the King, The
my_ratings[246] = 5  # Shrek (2001)
my_ratings[2716] = 3  # Inception
my_ratings[1150] = 5  # Incredibles, The (2004)
my_ratings[382] = 2  # Amelie (Fabuleux destin d'Amélie Poulain, Le)
my_ratings[
    366
] = 5  # Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
my_ratings[622] = 5  # Harry Potter and the Chamber of Secrets (2002)
my_ratings[988] = 3  # Eternal Sunshine of the Spotless Mind (2004)
my_ratings[2925] = 1  # Louis Theroux: Law & Disorder (2008)
my_ratings[2937] = 1  # Nothing to Declare (Rien à déclarer)
my_ratings[793] = 5  # Pirates of the Caribbean: The Curse of the Black Pearl (2003)
my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print("\nNew user ratings:\n")
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Rated {my_ratings[i]} for  {movieList_df.loc[i,"title"]}')


New user ratings:

Rated 5.0 for  Shrek (2001)
Rated 5.0 for  Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Rated 2.0 for  Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Rated 5.0 for  Harry Potter and the Chamber of Secrets (2002)
Rated 5.0 for  Pirates of the Caribbean: The Curse of the Black Pearl (2003)
Rated 5.0 for  Lord of the Rings: The Return of the King, The (2003)
Rated 3.0 for  Eternal Sunshine of the Spotless Mind (2004)
Rated 5.0 for  Incredibles, The (2004)
Rated 2.0 for  Persuasion (2007)
Rated 5.0 for  Toy Story 3 (2010)
Rated 3.0 for  Inception (2010)
Rated 1.0 for  Louis Theroux: Law & Disorder (2008)
Rated 1.0 for  Nothing to Declare (Rien à déclarer) (2010)


### Add new user rating


In [15]:
Y = torch.hstack((my_ratings.reshape(-1, 1), Y))
R = torch.hstack(((my_ratings != 0).type(torch.int).reshape(-1, 1), R))

print(Y.shape)

torch.Size([4778, 444])


### Normalize Ratings


In [16]:
Ynorm, Ymean = normalizeRatings(Y, R)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float64)


## Define the model


### Cost Function


In [17]:
def cost_func(X, W, b, Y, R, lambda_):
    j = (torch.matmul(X, W.T) + b - Y) * R
    J = 0.5 * (j**2).sum() + (lambda_ / 2) * ((X**2).sum() + (W**2).sum())
    return J

### Parameters


In [18]:
torch.manual_seed(9)
num_users = Y.shape[1]
num_movies = Y.shape[0]
num_features = 100

W = torch.nn.Parameter(torch.rand(num_users, num_features))
X = torch.nn.Parameter(torch.randn(num_movies, num_features))
b = torch.nn.Parameter(torch.rand(1, num_users))

optimizer = torch.optim.Adam(params=[X, W, b], lr=0.1)

### Training loop


In [19]:
iterations = 200
lambda_ = 1

for i in range(iterations):
    optimizer.zero_grad()

    loss = cost_func(X, W, b, Ynorm, R, lambda_)

    loss.backward()

    optimizer.step()

    print("iteration {}, loss {}".format(i + 1, loss.item()))

iteration 1, loss 907413.8224812628
iteration 2, loss 472059.04349389113
iteration 3, loss 393646.1438932706
iteration 4, loss 371618.7096401892
iteration 5, loss 335881.6126616173


iteration 6, loss 288678.05787695263
iteration 7, loss 246713.86581223033
iteration 8, loss 216629.77411275686
iteration 9, loss 196190.0255360391
iteration 10, loss 181063.21661795728
iteration 11, loss 168276.58348031927
iteration 12, loss 156558.83810750395
iteration 13, loss 145628.71112862014
iteration 14, loss 135545.85907648862
iteration 15, loss 126368.81787886874
iteration 16, loss 118054.32230331092
iteration 17, loss 110485.52060283281
iteration 18, loss 103536.18552830801
iteration 19, loss 97114.46109027092
iteration 20, loss 91171.1069433453
iteration 21, loss 85685.37277891516
iteration 22, loss 80645.14614747724
iteration 23, loss 76031.53024988665
iteration 24, loss 71811.81335169122
iteration 25, loss 67941.4039534928
iteration 26, loss 64371.83558020815
iteration 27, loss 61059.70256260285
iteration 28, loss 57972.30458878337
iteration 29, loss 55088.48680227787
iteration 30, loss 52396.050906194025
iteration 31, loss 49887.64096793784
iteration 32, loss 47556.560888

## Make Recommendations


In [20]:
# p for predictions (predicted recommendations)
p = (torch.matmul(X, W.T) + b).detach()
# restore mean
pm = p + Ymean

my_predictions = pm[:, 0]  # my_ratings were inserted in the first column

# sort my predictions from best ratings to bad ratings
ix = torch.argsort(my_predictions, descending=True)

# Top 17
for i in range(17):
    j = ix[i]
    if j not in my_rated:
        print(f"Predicting rating {my_predictions[j]:0.2f} for movie {movieList[j]}")

print("\n\nOriginal vs Predicted ratings:\n")
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(
            f"Original {my_ratings[i]}, Predicted {my_predictions[i]:0.2f} for {movieList[i]}"
        )

Predicting rating 4.59 for movie Dark Knight, The (2008)
Predicting rating 4.29 for movie Little Miss Sunshine (2006)
Predicting rating 4.05 for movie Colourful (Karafuru) (2010)
Predicting rating 4.04 for movie Shaun of the Dead (2004)
Predicting rating 4.04 for movie Deathgasm (2015)
Predicting rating 4.04 for movie 'Salem's Lot (2004)
Predicting rating 4.03 for movie Odd Life of Timothy Green, The (2012)
Predicting rating 4.03 for movie Particle Fever (2013)
Predicting rating 4.02 for movie I'm the One That I Want (2000)
Predicting rating 4.02 for movie Nine Lives of Tomas Katz, The (2000)


Original vs Predicted ratings:

Original 5.0, Predicted 4.93 for Shrek (2001)
Original 5.0, Predicted 4.94 for Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Original 2.0, Predicted 2.10 for Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Original 5.0, Predicted 4.89 for Harry Potter and the Chamber of Secrets (2002)
Original 5.0, Predicted 4.