In [10]:
import pandas as pd
import numpy as np

In [11]:
ratings_df = pd.read_csv('/content/drive/MyDrive/ratings.csv',nrows=10000)
movies_df = pd.read_csv('/content/drive/MyDrive/movies.csv')

movies_df['movieId'] = movies_df['movieId'].apply(pd.to_numeric)

In [12]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [13]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


In [14]:
R_df = ratings_df.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
R_df.head()

movieId,1,2,3,5,6,7,9,10,11,14,...,182715,182823,187541,187593,189333,195159,200818,200838,203375,203519
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.0,3.5,4.5,0.0,0.0,0.0,0.0,0.0,0.0
4,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.5,0.0,3.5,3.5,4.0,5.0,5.0,3.0,4.5,2.5
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
R = R_df.to_numpy()
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

In [16]:
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(R_demeaned, k = 50)

In [17]:
sigma = np.diag(sigma)

In [18]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)

In [19]:
def recommend_movies(predictions_df, userID, movies_df, original_ratings_df, num_recommendations=5):

    # Get and sort the user's predictions
    user_row_number = userID - 1 # UserID starts at 1, not 0
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)

    # Get the user's data and merge in the movie information.
    user_data = original_ratings_df[original_ratings_df.userId == (userID)]
    user_full = (user_data.merge(movies_df, how = 'left', left_on = 'movieId', right_on = 'movieId').
                     sort_values(['rating'], ascending=False)
                 )

    print('User {0} has already rated {1} movies.'.format(userID, user_full.shape[0]))
    print('Recommending the highest {0} predicted ratings movies not already rated.'.format(num_recommendations))

    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (movies_df[~movies_df['movieId'].isin(user_full['movieId'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'movieId',
               right_on = 'movieId').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )

    return user_full, recommendations

already_rated, predictions = recommend_movies(preds_df, 75, movies_df, ratings_df, 10)

User 75 has already rated 61 movies.
Recommending the highest 10 predicted ratings movies not already rated.


In [20]:
already_rated.head(10)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,75,1,5.0,1537207651,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
51,75,595,5.0,1537207824,Beauty and the Beast (1991),Animation|Children|Fantasy|Musical|Romance|IMAX
48,75,589,5.0,1537207677,Terminator 2: Judgment Day (1991),Action|Sci-Fi
45,75,541,5.0,1537207913,Blade Runner (1982),Action|Sci-Fi|Thriller
20,75,318,5.0,1537207452,"Shawshank Redemption, The (1994)",Crime|Drama
3,75,17,4.5,1537208282,Sense and Sensibility (1995),Drama|Romance
16,75,261,4.0,1537348973,Little Women (1994),Drama
56,75,736,4.0,1537207939,Twister (1996),Action|Adventure|Romance|Thriller
52,75,597,4.0,1537207951,Pretty Woman (1990),Comedy|Romance
49,75,590,4.0,1537348520,Dances with Wolves (1990),Adventure|Drama|Western


In [21]:
predictions

Unnamed: 0,movieId,title,genres
532,588,Aladdin (1992),Adventure|Animation|Children|Comedy|Musical
148,161,Crimson Tide (1995),Drama|Thriller|War
138,150,Apollo 13 (1995),Adventure|Drama|IMAX
534,592,Batman (1989),Action|Crime|Thriller
20,25,Leaving Las Vegas (1995),Drama|Romance
31,36,Dead Man Walking (1995),Crime|Drama
486,535,Short Cuts (1993),Drama
282,306,Three Colors: Red (Trois couleurs: Rouge) (1994),Drama
316,344,Ace Ventura: Pet Detective (1994),Comedy
43,52,Mighty Aphrodite (1995),Comedy|Drama|Romance
