In [1]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds


In [2]:
movies_df=pd.read_csv('movies.csv')
ratings_df=pd.read_csv('ratings.csv')

movies_df.head(10)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [3]:
Rating_df=ratings_df.pivot(index='userId',columns='movieId',values='rating').fillna(0)

Rating_df.shape

(668, 10325)

In [4]:
Rate=Rating_df.values
ratings_mean=np.mean(Rate,axis=1)
R_demean = Rate - ratings_mean.reshape(-1,1)

In [5]:
U_matrix, sigma_values, Vtrans_matrix = svds(R_demean, k = 50)
sigma_values

array([ 67.75583954,  68.10174582,  68.41554944,  69.25742958,
        69.71428633,  70.03822422,  70.63880278,  71.3519959 ,
        72.01046605,  72.49217043,  72.71366588,  73.71441659,
        74.03995873,  75.07874467,  75.63194166,  76.5179679 ,
        78.3284683 ,  79.09658441,  80.14436134,  80.34678755,
        81.33475947,  81.69045271,  83.31280447,  84.07340268,
        85.46896657,  87.97453293,  88.13453246,  89.03316544,
        89.69145949,  92.26863942,  93.32591813,  94.38335623,
        95.68363396,  97.43909002, 100.31028998, 103.8285294 ,
       106.91484618, 110.24997517, 113.42325271, 117.97764847,
       120.30374869, 122.89039344, 130.06109202, 134.86435077,
       143.52740449, 155.79132806, 163.28017959, 188.46765204,
       229.09814764, 480.87123202])

In [6]:
sigma_matrix = np.diag(sigma_values)

sigma_matrix.shape

(50, 50)

In [7]:
user_pred_ratings = np.dot(np.dot(U_matrix, sigma_matrix), Vtrans_matrix) + ratings_mean.reshape(-1, 1)
predicted_df = pd.DataFrame(user_pred_ratings, columns = Rating_df.columns)

In [8]:
def recommend_movies(predictions_df, userId, movies_df, ratings_df, no_of_recommendations=5):
    user_number = userId - 1 
    predictions_values = predictions_df.iloc[user_number]

    user_ratings =ratings_df[ratings_df.userId == (userId)]
    user_rated_movies = (user_ratings.merge(movies_df, how = 'left', left_on = 'movieId', right_on = 'movieId').
                     sort_values(['rating'], ascending=False)
                 )

    print ('User',userId,' has already rated',user_rated_movies.shape[0],' movies.' )
    print ('Recommending the highest',no_of_recommendations,' predicted ratings movies not already rated.')
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommend = (movies_df[~movies_df['movieId'].isin(user_rated_movies['movieId'])].
         merge(pd.DataFrame(predictions_values).reset_index(), how = 'left',
               left_on = 'movieId',
               right_on = 'movieId').                   
         rename(columns = {user_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:no_of_recommendations, :-1]
                      )
    return  user_rated_movies, recommend

previously_rated, recommendations = recommend_movies(predicted_df,500, movies_df, ratings_df, 10)

User 500  has already rated 25  movies.
Recommending the highest 10  predicted ratings movies not already rated.


In [9]:
previously_rated

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
12,500,2694,5.0,1318398248,Big Daddy (1999),Comedy
16,500,3489,5.0,1318398384,Hook (1991),Adventure|Comedy|Fantasy
3,500,296,5.0,1318398894,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
4,500,431,5.0,1318398270,Carlito's Way (1993),Crime|Drama
6,500,1347,5.0,1318398462,"Nightmare on Elm Street, A (1984)",Horror|Thriller
7,500,1367,5.0,1318398997,101 Dalmatians (1996),Adventure|Children|Comedy
21,500,50851,5.0,1318399576,Cocaine Cowboys (2006),Documentary
20,500,33794,5.0,1318399375,Batman Begins (2005),Action|Crime|IMAX
19,500,33166,5.0,1318399618,Crash (2004),Crime|Drama
11,500,2605,5.0,1318398281,Entrapment (1999),Crime|Thriller


In [10]:
recommendations

Unnamed: 0,movieId,title,genres
2349,2959,Fight Club (1999),Action|Crime|Drama|Thriller
45,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
275,318,"Shawshank Redemption, The (1994)",Crime|Drama
520,593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller
532,608,Fargo (1996),Comedy|Crime|Drama|Thriller
42,47,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
2266,2858,American Beauty (1999),Drama|Romance
2045,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller
255,293,Léon: The Professional (a.k.a. The Professiona...,Action|Crime|Drama|Thriller
7139,48516,"Departed, The (2006)",Crime|Drama|Thriller
