In [14]:
import numpy as np
import scipy.sparse.linalg as svds
import pandas as pd
import matplotlib.pyplot as plt

file = 'ratings.csv'
df = pd.read_csv(file)
ratings_matrix = df.pivot(index='userId', columns='movieId', values='rating')
ratings_matrix = ratings_matrix.dropna(thresh=20, axis=0)
ratings_matrix = ratings_matrix.dropna(thresh=30, axis=1)
print(ratings_matrix)

movieId  1       2       3       5       6       7       10      11      \
userId                                                                    
1           4.0     NaN     4.0     NaN     4.0     NaN     NaN     NaN   
2           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
3           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
4           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
5           4.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
...         ...     ...     ...     ...     ...     ...     ...     ...   
606         2.5     NaN     NaN     NaN     NaN     2.5     NaN     2.5   
607         4.0     NaN     NaN     NaN     NaN     NaN     NaN     3.0   
608         2.5     2.0     2.0     NaN     NaN     NaN     4.0     NaN   
609         3.0     NaN     NaN     NaN     NaN     NaN     4.0     NaN   
610         5.0     NaN     NaN     NaN     5.0     NaN     NaN     NaN   

movieId  16      17     

In [15]:
ratings_matrix_filled = ratings_matrix.fillna(2.5)
print(ratings_matrix_filled)

movieId  1       2       3       5       6       7       10      11      \
userId                                                                    
1           4.0     2.5     4.0     2.5     4.0     2.5     2.5     2.5   
2           2.5     2.5     2.5     2.5     2.5     2.5     2.5     2.5   
3           2.5     2.5     2.5     2.5     2.5     2.5     2.5     2.5   
4           2.5     2.5     2.5     2.5     2.5     2.5     2.5     2.5   
5           4.0     2.5     2.5     2.5     2.5     2.5     2.5     2.5   
...         ...     ...     ...     ...     ...     ...     ...     ...   
606         2.5     2.5     2.5     2.5     2.5     2.5     2.5     2.5   
607         4.0     2.5     2.5     2.5     2.5     2.5     2.5     3.0   
608         2.5     2.0     2.0     2.5     2.5     2.5     4.0     2.5   
609         3.0     2.5     2.5     2.5     2.5     2.5     4.0     2.5   
610         5.0     2.5     2.5     2.5     5.0     2.5     2.5     2.5   

movieId  16      17     

In [11]:
R = ratings_matrix_filled.values
user_ratings_mean = np.mean(R, axis=1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)
U, sigma, Vt = svds.svds(R_demeaned, k=3)
sigma = np.diag(sigma)
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns=ratings_matrix.columns, index=ratings_matrix.index)
print(preds_df)

movieId    1         2         3         5         6         7         10      \
userId                                                                          
1        3.707850  2.827957  2.777893  2.631670  3.291676  2.692022  3.025521   
2        2.534508  2.527637  2.462568  2.479260  2.509726  2.471650  2.514610   
3        2.461544  2.484399  2.480570  2.485734  2.470234  2.483580  2.478184   
4        2.785833  2.468483  2.589668  2.510408  2.783238  2.536038  2.553456   
5        2.984992  2.719260  2.610321  2.578045  2.708899  2.600495  2.778583   
...           ...       ...       ...       ...       ...       ...       ...   
606      3.360156  2.570422  2.615463  2.490401  3.280641  2.520478  2.721940   
607      3.557016  2.909906  2.743169  2.652715  3.048024  2.702233  3.053794   
608      3.438024  2.857826  2.590427  2.565172  3.141281  2.568227  2.925581   
609      2.819440  2.650936  2.559732  2.543978  2.625182  2.557031  2.685631   
610      3.524923  2.862275 

In [16]:
predict_data = preds_df.mask(~ratings_matrix.isna())
print(predict_data)

movieId    1         2         3         5         6         7         10      \
userId                                                                          
1             NaN  2.827957       NaN  2.631670       NaN  2.692022  3.025521   
2        2.534508  2.527637  2.462568  2.479260  2.509726  2.471650  2.514610   
3        2.461544  2.484399  2.480570  2.485734  2.470234  2.483580  2.478184   
4        2.785833  2.468483  2.589668  2.510408  2.783238  2.536038  2.553456   
5             NaN  2.719260  2.610321  2.578045  2.708899  2.600495  2.778583   
...           ...       ...       ...       ...       ...       ...       ...   
606           NaN  2.570422  2.615463  2.490401  3.280641       NaN  2.721940   
607           NaN  2.909906  2.743169  2.652715  3.048024  2.702233  3.053794   
608           NaN       NaN       NaN  2.565172  3.141281  2.568227       NaN   
609           NaN  2.650936  2.559732  2.543978  2.625182  2.557031       NaN   
610           NaN  2.862275 

In [30]:
def recommend_movies(user_id, n_recommend=10):
    user_predictions = predict_data.loc[user_id].sort_values(ascending=False)
    top_recommendations_indexes = user_predictions.head(n_recommend).index
    movies_df = pd.read_csv('movies.csv')
    recommended_movies = movies_df[movies_df['movieId'].isin(top_recommendations_indexes)]
    recommended_movies = recommended_movies.reset_index(drop=True)
    return recommended_movies[['title', 'genres']]
user_id = 52
recommendations = recommend_movies(user_id)
print(f"Recommended movies for user {user_id}:\n", recommendations)


Recommended movies for user 52:
                                                title  \
0                                   Gladiator (2000)   
1                                     Memento (2000)   
2                                       Shrek (2001)   
3  Lord of the Rings: The Fellowship of the Ring,...   
4      Lord of the Rings: The Two Towers, The (2002)   
5                         Catch Me If You Can (2002)   
6                                Finding Nemo (2003)   
7  Pirates of the Caribbean: The Curse of the Bla...   
8                               Batman Begins (2005)   
9                                    Iron Man (2008)   

                                              genres  
0                             Action|Adventure|Drama  
1                                   Mystery|Thriller  
2  Adventure|Animation|Children|Comedy|Fantasy|Ro...  
3                                  Adventure|Fantasy  
4                                  Adventure|Fantasy  
5                   