In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
file_path = os.path.join("..","data", "movies.csv")
movies = pd.read_csv(file_path)
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
file_path = os.path.join("..","data", "ratings.csv")
ratings = pd.read_csv(file_path)
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [7]:
R_df = ratings.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
R_df.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
R = R_df.values
user_ratings_mean = np.mean(R, axis=1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)


In [10]:
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(R_demeaned, k = 50)

In [11]:
sigma = np.diag(sigma)

In [12]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)

In [44]:
def recommend_movies(predictions_df, userID, movies, original_ratings_df, num_recommendations=5):
    
    # Get and sort the user's predictions
    user_row_number = userID - 1 # UserID starts at 1, not 0
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings_df[original_ratings_df.userId == (userID)]
    user_full = (user_data.merge(movies, how = 'left', left_on = 'movieId', right_on = 'movieId').
                     sort_values(['rating'], ascending=False)
                 )

    print ('User {0} has already rated {1} movies.'.format(userID, user_full.shape[0]))
    print ('Recommending the highest {0} predicted ratings movies not already rated.'.format(num_recommendations))
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (movies[~movies['movieId'].isin(user_full['movieId'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'movieId',
               right_on = 'movieId').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )

    return user_full, recommendations



In [45]:
already_rated, predictions = recommend_movies(preds_df, 414, movies, ratings, 10)

User 414 has already rated 2698 movies.
Recommending the highest 10 predicted ratings movies not already rated.


In [46]:
already_rated.head(10)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
466,414,1262,5.0,961512341,"Great Escape, The (1963)",Action|Adventure|Drama|War
303,414,750,5.0,961512341,Dr. Strangelove or: How I Learned to Stop Worr...,Comedy|War
1396,414,4380,5.0,1016462828,"Princess and the Warrior, The (Krieger und die...",Drama|Romance
351,414,953,5.0,1010675893,It's a Wonderful Life (1946),Children|Drama|Fantasy|Romance
347,414,924,5.0,961514698,2001: A Space Odyssey (1968),Adventure|Drama|Sci-Fi
346,414,923,5.0,961516613,Citizen Kane (1941),Drama|Mystery
344,414,916,5.0,961513654,Roman Holiday (1953),Comedy|Drama|Romance
708,414,2087,5.0,961519005,Peter Pan (1953),Animation|Children|Fantasy|Musical
341,414,912,5.0,961512311,Casablanca (1942),Drama|Romance
340,414,910,5.0,961595311,Some Like It Hot (1959),Comedy|Crime


In [47]:
predictions

Unnamed: 0,movieId,title,genres
5444,97938,Life of Pi (2012),Adventure|Drama|IMAX
410,1031,Bedknobs and Broomsticks (1971),Adventure|Children|Musical
465,1183,"English Patient, The (1996)",Drama|Romance|War
4221,55280,Lars and the Real Girl (2007),Comedy|Drama
5241,92259,Intouchables (2011),Comedy|Drama
468,1186,"Sex, Lies, and Videotape (1989)",Drama
5593,102903,Now You See Me (2013),Crime|Mystery|Thriller
6482,146656,Creed (2015),Drama
3311,8957,Saw (2004),Horror|Mystery|Thriller
6064,122892,Avengers: Age of Ultron (2015),Action|Adventure|Sci-Fi


NameError: name 'original_ratings_df' is not defined