In [16]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.metrics import mean_squared_error
from scipy.sparse import csr_matrix
from math import sqrt

In [17]:
ratings_df = pd.read_csv('ratings.csv')
movies_df = pd.read_csv('movies.csv')

In [18]:
movies_df

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [19]:
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [20]:
df = pd.merge(ratings_df, movies_df, on='movieId')

In [21]:
pivot_df = pd.pivot_table(df, values='rating', index='userId', columns='title')

In [22]:
pivot_df

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,,,,,,,,,,...,,,,,,,,,,
607,,,,,,,,,,,...,,,,,,,,,,
608,,,,,,,,,,,...,,,,,,4.5,3.5,,,
609,,,,,,,,,,,...,,,,,,,,,,


In [23]:
pivot_df.fillna(0, inplace=True)

In [24]:
pivot_matrix = pivot_df.values

In [25]:
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds

def recommend_movies(user_id, ratings_df, movies_df, num_recommendations=10, personalization=0.5):
    
    # Create a pivot table to get the ratings of each user for each movie
    ratings_pivot = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    
    # Convert the pivot table to a sparse matrix
    ratings_matrix = csr_matrix(ratings_pivot.values)
    
    # Normalize the ratings matrix by subtracting the mean rating for each user
    ratings_mean = np.mean(ratings_matrix, axis=1)
    ratings_normalized = ratings_matrix - ratings_mean.reshape(-1, 1)
    
    # Perform matrix factorization using SVD
    U, sigma, Vt = svds(ratings_normalized, k=50)
    sigma = np.diag(sigma)
    predicted_ratings = np.dot(np.dot(U, sigma), Vt) + ratings_mean.reshape(-1, 1)
    
    # Convert the predicted ratings to a dataframe
    pred_ratings_df = pd.DataFrame(predicted_ratings, columns=ratings_pivot.columns)
    
    # Get the movies the user has already rated
    user_ratings = ratings_df[ratings_df['userId'] == user_id]
    
    # Merge the predicted ratings with the movies dataframe
    user_pred_ratings = pred_ratings_df.loc[user_id]
    user_pred_ratings = user_pred_ratings.reset_index()
    user_pred_ratings.columns = ['movieId', 'predicted_rating']
    recommended_movies = pd.merge(user_pred_ratings, movies_df, on='movieId')
    
    # Remove the movies the user has already rated
    recommended_movies = recommended_movies[~recommended_movies['movieId'].isin(user_ratings['movieId'])]
    
    # Sort the recommended movies by predicted rating
    recommended_movies = recommended_movies.sort_values('predicted_rating', ascending=False)
    
    # Personalize the recommendations based on the user's past ratings
    if personalization > 0:
        user_mean_rating = user_ratings['rating'].mean()
        recommended_movies['predicted_rating'] = recommended_movies['predicted_rating'] * personalization + user_mean_rating * (1 - personalization)
        recommended_movies = recommended_movies.sort_values('predicted_rating', ascending=False)
    
    # Return the top N recommended movies
    return recommended_movies.head(num_recommendations)


In [26]:
def create_user_movie_matrix(ratings_df):
    """
    Create a user-movie matrix from the ratings dataframe.
    """
    user_movie_matrix = ratings_df.pivot(
        index='userId', columns='movieId', values='rating'
    ).fillna(0)
    user_movie_matrix = csr_matrix(user_movie_matrix.values)
    return user_movie_matrix


In [28]:
recommendations = recommend_movies(3, ratings_df, movies_df, num_recommendations=5, personalization=0.5)
print(recommendations)


      movieId  predicted_rating                                      title  \
862      1136          3.126888     Monty Python and the Holy Grail (1975)   
898      1197          3.037738                 Princess Bride, The (1987)   
224       260          2.909718  Star Wars: Episode IV - A New Hope (1977)   
2144     2858          2.848287                     American Beauty (1999)   
694       912          2.758562                          Casablanca (1942)   

                                       genres  
862                  Adventure|Comedy|Fantasy  
898   Action|Adventure|Comedy|Fantasy|Romance  
224                   Action|Adventure|Sci-Fi  
2144                            Drama|Romance  
694                             Drama|Romance  
