In [1]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds

%config Completer.use_jedi = False

In [2]:
raw_ratings = pd.read_csv('./ratings.csv')

In [3]:
raw_ratings.head(5)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


We need to pivot the data so that we get m x n matrix (rows = users, columns = movies)

In [4]:
ratings = raw_ratings.pivot_table(index='userId', columns='movieId', values='rating')
ratings

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


Perfect. Let's do mean-centering now. We'll use the row-wise mean value.

In [5]:
users_mean_ratings = ratings.mean(axis=1)
users_mean_ratings.head(3)

userId
1    4.366379
2    3.948276
3    2.435897
dtype: float64

In [6]:
users_mean_ratings

userId
1      4.366379
2      3.948276
3      2.435897
4      3.555556
5      3.636364
         ...   
606    3.657399
607    3.786096
608    3.134176
609    3.270270
610    3.688556
Length: 610, dtype: float64

Perfect. Let's normalize our ratings now.

In [50]:
normalized_ratings = ratings.apply(lambda x: x - users_mean_ratings[x.name], axis=1)
normalized_ratings[:4]

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.366379,,-0.366379,,,-0.366379,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


Finally, let's fill in the NaN values with zeros.

In [52]:
normalized_ratings = normalized_ratings.fillna(0)
normalized_ratings.iloc[:4]

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.366379,0.0,-0.366379,0.0,0.0,-0.366379,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Perfect. We're ready to decompose and recompose our matrix now ;-)

In [9]:
U, sigma, Vt = svds(normalized_ratings.values, k = 2)

In [10]:
sigma = np.diag(sigma)

In [11]:
np.dot(np.dot(U, sigma), Vt)[0, 0:6] + users_mean_ratings[1]

array([4.44371748, 4.3414349 , 4.35635076, 4.3602882 , 4.3101328 ,
       4.41549084])

In [12]:
U, sigma, Vt = svds(normalized_ratings.values, k = 6)

In [13]:
sigma = np.diag(sigma)

In [14]:
np.dot(np.dot(U, sigma), Vt)[0, 0:6] + users_mean_ratings[1]

array([4.46231366, 4.34918476, 4.38913168, 4.35582835, 4.3048888 ,
       4.3799541 ])

In [53]:
U, sigma, Vt = svds(normalized_ratings.values, k = 50)

In [54]:
sigma = np.diag(sigma)

In [58]:
U[:3, :3]

array([[ 2.97402022e-02, -5.88769126e-02,  2.15642139e-02],
       [-2.86112252e-03, -6.64369792e-05,  9.38051346e-04],
       [-1.02969292e-02, -1.74163031e-02,  9.68613144e-04]])

In [59]:
sigma[:3, :3]

array([[20.52281041,  0.        ,  0.        ],
       [ 0.        , 20.66212229,  0.        ],
       [ 0.        ,  0.        , 20.91516737]])

In [60]:
Vt[:3, :3]

array([[-0.03090522,  0.00349103,  0.01632729],
       [ 0.03298506, -0.00216567, -0.04372385],
       [ 0.05095091, -0.0211382 , -0.01175884]])

In [61]:
predicted = np.dot(np.dot(U, sigma), Vt) + users_mean_ratings.values.reshape(-1, 1)
predicted.round(2)

array([[4.31, 4.3 , 4.41, ..., 4.37, 4.37, 4.37],
       [3.95, 3.93, 3.94, ..., 3.95, 3.95, 3.95],
       [2.43, 2.47, 2.4 , ..., 2.44, 2.44, 2.44],
       ...,
       [2.41, 1.93, 1.9 , ..., 3.13, 3.13, 3.12],
       [3.32, 3.27, 3.28, ..., 3.27, 3.27, 3.27],
       [5.04, 3.69, 3.64, ..., 3.69, 3.69, 3.7 ]])

In [18]:
predicted[0][20:30]

array([4.41941476, 4.29271834, 4.32466399, 4.28243484, 4.39922809,
       4.35227507, 4.36769508, 4.33414686, 4.44916955, 4.35175148])

In [19]:
# Let's try to evaluate all this ... First, split into train and test sets. Then train on TRAIN set, and then 
# use that to validate against test set

In [20]:
from sklearn.model_selection import train_test_split

In [21]:
raw_train_data, raw_test_data = train_test_split(raw_ratings, test_size=0.2)

In [22]:
raw_train_data.shape

(80668, 4)

In [23]:
raw_test_data.shape

(20168, 4)

Rebuild a matrix out of train data.

In [24]:
train_data = raw_train_data.pivot_table(index='userId', columns='movieId', values='rating')
train_data

movieId,1,2,3,4,5,6,7,8,9,10,...,191005,193565,193567,193571,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,,2.0,,,,,,,,,...,,,,,,,,,,
609,,,,,,,,,,4.0,...,,,,,,,,,,


In [25]:
users_mean_ratings = train_data.mean(axis=1)

In [26]:
normalized_train_data = train_data.apply(lambda x: x - users_mean_ratings[x.name], axis=1).fillna(0)
normalized_train_data

movieId,1,2,3,4,5,6,7,8,9,10,...,191005,193565,193567,193571,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.342105,0.000000,-0.342105,0.0,0.0,-0.342105,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.275000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,-1.159066,0.000000,0.000000,0.0,0.0,0.000000,-1.159066,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.270270,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.000000,-1.136029,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.677419,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
U, sigma, Vt = svds(normalized_train_data.values, k = 50)
sigma = np.diag(sigma)
predicted = np.dot(np.dot(U, sigma), Vt) + users_mean_ratings.values.reshape(-1, 1)

In [28]:
train_data_predicted = pd.DataFrame(index=train_data.index, columns=train_data.columns, data=predicted)
train_data_predicted

movieId,1,2,3,4,5,6,7,8,9,10,...,191005,193565,193567,193571,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.410727,4.345313,4.402501,4.305192,4.318092,4.148316,4.191991,4.347755,4.358760,4.177399,...,4.341722,4.342184,4.342414,4.341953,4.342184,4.341953,4.342184,4.342184,4.342184,4.339908
2,4.022523,4.013502,4.016066,4.021147,4.031499,4.018593,4.022639,4.023485,4.021355,4.036409,...,4.021962,4.021693,4.021559,4.021828,4.021693,4.021828,4.021693,4.021693,4.021693,4.021915
3,2.130764,2.210005,2.189340,2.188266,2.208069,2.139449,2.181463,2.203075,2.183673,2.198490,...,2.189506,2.189686,2.189776,2.189596,2.189686,2.189596,2.189686,2.189686,2.189686,2.190258
4,3.689834,3.449239,3.661482,3.546685,3.671706,3.522065,3.417272,3.485580,3.569415,3.553803,...,3.568452,3.573032,3.575322,3.570742,3.573032,3.570742,3.573032,3.573032,3.573032,3.565302
5,3.841357,3.704942,3.691918,3.730871,3.702952,3.849667,3.721585,3.717701,3.709047,3.730524,...,3.725250,3.724949,3.724798,3.725099,3.724949,3.725099,3.724949,3.724949,3.724949,3.726161
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.008559,3.311410,3.528014,3.685726,3.662437,3.574083,2.636622,3.743594,3.694575,3.809236,...,3.656720,3.659545,3.660958,3.658133,3.659545,3.658133,3.659545,3.659545,3.659545,3.662659
607,3.655580,3.749001,3.930045,3.773304,3.798414,3.764450,3.680760,3.712295,3.706167,3.666946,...,3.730252,3.729623,3.729309,3.729937,3.729623,3.729937,3.729623,3.729623,3.729623,3.729321
608,2.981917,1.971070,3.018113,3.143401,3.138173,3.288238,3.146395,3.143393,3.116502,3.458781,...,3.134497,3.136343,3.137266,3.135420,3.136343,3.135420,3.136343,3.136343,3.136343,3.126694
609,3.376036,3.312668,3.321776,3.313954,3.321762,3.366356,3.344640,3.322053,3.312425,3.353693,...,3.322926,3.322510,3.322302,3.322718,3.322510,3.322718,3.322510,3.322510,3.322510,3.323477


In [29]:
train_data.head(3)

movieId,1,2,3,4,5,6,7,8,9,10,...,191005,193565,193567,193571,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,


In [30]:
train_data_predicted.head(3)

movieId,1,2,3,4,5,6,7,8,9,10,...,191005,193565,193567,193571,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.410727,4.345313,4.402501,4.305192,4.318092,4.148316,4.191991,4.347755,4.35876,4.177399,...,4.341722,4.342184,4.342414,4.341953,4.342184,4.341953,4.342184,4.342184,4.342184,4.339908
2,4.022523,4.013502,4.016066,4.021147,4.031499,4.018593,4.022639,4.023485,4.021355,4.036409,...,4.021962,4.021693,4.021559,4.021828,4.021693,4.021828,4.021693,4.021693,4.021693,4.021915
3,2.130764,2.210005,2.18934,2.188266,2.208069,2.139449,2.181463,2.203075,2.183673,2.19849,...,2.189506,2.189686,2.189776,2.189596,2.189686,2.189596,2.189686,2.189686,2.189686,2.190258


So, let's try evaluating the model now. Do note that I will be skipping movies that were not in train data. I'm aware this introduces a certain bias in my results but I was more interested in seeing how this works.

In [31]:
# These two will hold predicted vs actual data

actual_values = []
predicted_values = []

In [32]:
raw_test_data.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
69161,448,3175,4.0,1019125340
38923,268,898,5.0,940181021
26974,182,6285,4.0,1063274649


In [33]:
from math import sqrt
from sklearn.metrics import mean_squared_error

In [34]:
i = 0

for key, row in raw_test_data.iterrows():
    userId = row['userId']
    movieId = row['movieId']
    rating = row['rating']
    
    # First, let's see if this movie exists in train data
    
    if movieId not in train_data.columns:
        continue
        
    predicted = train_data_predicted.loc[userId][movieId]
    
    #print("User ID: {}, Movie ID: {}, Predicted: {}, Actual: {}".format(userId, movieId, predicted, rating))
    
    predicted_values.append(predicted)
    actual_values.append(rating)

In [35]:
sqrt(mean_squared_error(actual_values, predicted_values))

0.9197438500627968

Well, that's the best MSE I've ever got so far ... Pretty nice!

Finally, let's build a predicter system, just for the fun of doing it. I'll use the full data set for the sake of "wht the hell not" :)

In [36]:
users_mean_ratings = ratings.mean(axis=1)
normalized_ratings = ratings.apply(lambda x: x - users_mean_ratings[x.name], axis=1).fillna(0)
normalized_ratings.head(3)

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.366379,0.0,-0.366379,0.0,0.0,-0.366379,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
U, sigma, Vt = svds(normalized_ratings.values, k = 2)
sigma = np.diag(sigma)
predicted = np.dot(np.dot(U, sigma), Vt) + users_mean_ratings.values.reshape(-1, 1)

predicted = pd.DataFrame(index=ratings.index, columns=ratings.columns, data=predicted)
predicted.head(3)

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.443717,4.341435,4.356351,4.360288,4.310133,4.415491,4.339169,4.357135,4.354477,4.357889,...,4.366355,4.366297,4.366414,4.366414,4.366355,4.366414,4.366355,4.366355,4.366355,4.366679
2,3.950691,3.950173,3.948489,3.948707,3.953105,3.948302,3.950168,3.948705,3.948743,3.954024,...,3.948268,3.94825,3.948287,3.948287,3.948268,3.948287,3.948268,3.948268,3.948268,3.948285
3,2.433803,2.442137,2.437264,2.437355,2.451117,2.431433,2.442337,2.43764,2.437965,2.448633,...,2.435884,2.435851,2.435917,2.435917,2.435884,2.435917,2.435884,2.435884,2.435884,2.435889


In [38]:
movies = pd.read_csv('./movies.csv')

In [39]:
def get_movie_title(id):
    return movies.loc[id]['title']

def get_movie_genre(id):
    return movies.loc[id]['genres']

In [44]:
def get_movies_to_watch(userId, k=10):
    # Get a list of movies that this user DID NOT watch
    unrated_movies = ratings.loc[userId][pd.isna(ratings.loc[1])].index.values
    
    # Get predicted ratings for unrated movies, sort and return the top-k
    sorted_unrated_movies = predicted.loc[userId][unrated_movies].sort_values(ascending=False)
    
    return sorted_unrated_movies[:k]

In [41]:
target_user = 1 # user for whom we want to predict movies to watch

Let's first list some of the movies that he liked the most 

In [42]:
for movieId in ratings.loc[target_user].dropna().sort_values(ascending=False).index[0:20]:
    print("{} ({})".format(get_movie_title(movieId), get_movie_genre(movieId)))

Dark Days (2000) (Documentary)
Artemisia (1997) (Drama)
NeverEnding Story, The (1984) (Adventure|Children|Fantasy)
Snake Eyes (1998) (Action|Crime|Mystery|Thriller)
Man with Two Brains, The (1983) (Comedy)
Great Mouse Detective, The (1986) (Action|Animation|Children|Crime)
Big Lebowski, The (1998) (Comedy|Crime)
Titanic (1997) (Drama|Romance)
Flubber (1997) (Children|Comedy|Fantasy)
Alien: Resurrection (1997) (Action|Horror|Sci-Fi)
Tales from the Darkside: The Movie (1990) (Fantasy|Horror|Thriller)
I Love You, I Love You Not (1996) (Drama|Romance)
Deceiver (1997) (Crime|Drama|Thriller)
Fast, Cheap & Out of Control (1997) (Documentary)
U Turn (1997) (Crime|Drama|Mystery)
Game, The (1997) (Drama|Mystery|Thriller)
Wishmaster (1997) (Horror)
Kiss the Girls (1997) (Crime|Drama|Mystery|Thriller)
Wrongfully Accused (1998) (Action|Comedy)
Rambo III (1988) (Action|Adventure|Thriller|War)


Judging by looking at movie list, looks like he's a fan of Actio and Comedy I guess? Let's see what the predicted stuff is

In [45]:
for movieId in get_movies_to_watch(target_user, 20).index:
    print("{} ({})".format(get_movie_title(movieId), get_movie_genre(movieId)))

I Love Trouble (1994) (Action|Comedy)
Escape from New York (1981) (Action|Adventure|Sci-Fi|Thriller)
Zapped! (1982) (Comedy|Sci-Fi)
Soul Food (1997) (Drama)
Animal Crackers (1930) (Comedy|Musical)
Last Dance (1996) (Drama)
Jack and Sarah (1995) (Romance)
Pompatus of Love, The (1996) (Comedy|Drama)
Babe (1995) (Children|Drama)
Oklahoma! (1955) (Musical|Romance|Western)
Grand Day Out with Wallace and Gromit, A (1989) (Adventure|Animation|Children|Comedy|Sci-Fi)
TiMER (2009) (Comedy|Drama|Fantasy|Romance)
Georgy Girl (1966) (Comedy)
Surrogates (2009) (Action|Sci-Fi|Thriller)
Desperate Measures (1998) (Crime|Drama|Thriller)
Pride and Glory (2008) (Crime|Drama)
Underneath (1995) (Mystery|Thriller)
Wings of Desire (Himmel über Berlin, Der) (1987) (Drama|Fantasy|Romance)
King Solomon's Mines (1985) (Adventure|Comedy)
Withnail & I (1987) (Comedy)


Well, I hope that he'd like these :-)