In [None]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

# Load the MovieLens dataset
movies = pd.read_csv('/content/movies.csv')
ratings = pd.read_csv('/content/ratings.csv')

# Merge ratings and movies
data = pd.merge(ratings, movies, on='movieId')
data.head()


Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,16,4.0,1217897793,Casino (1995),Crime|Drama
1,9,16,4.0,842686699,Casino (1995),Crime|Drama
2,12,16,1.5,1144396284,Casino (1995),Crime|Drama
3,24,16,4.0,963468757,Casino (1995),Crime|Drama
4,29,16,3.0,836820223,Casino (1995),Crime|Drama


In [None]:
# Create a pivot table with users as rows and movies as columns
user_movie_matrix = data.pivot_table(index='userId', columns='title', values='rating').fillna(0)

# Convert the pivot table to a numpy matrix
user_movie_matrix = user_movie_matrix.values

# Normalize the matrix by subtracting the mean rating of each user
user_ratings_mean = np.mean(user_movie_matrix, axis=1)
user_movie_matrix_norm = user_movie_matrix - user_ratings_mean.reshape(-1, 1)


In [None]:
# Perform SVD
U, sigma, Vt = svds(user_movie_matrix_norm, k=50)

# Convert sigma to a diagonal matrix
sigma = np.diag(sigma)

# Predict ratings by multiplying the decomposed matrices
predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

# Convert the predicted ratings to a DataFrame
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=data.pivot_table(index='userId', columns='title', values='rating').columns)


In [None]:
def recommend_movies(predictions_df, user_id, movies_df, original_ratings_df, num_recommendations=10):
    # Get and sort the user's predicted ratings
    user_row_number = user_id - 1  # User ID starts from 1
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)

    # Get the user's data and merge with the movies dataset
    user_data = original_ratings_df[original_ratings_df.userId == user_id]
    user_full = user_data.merge(movies_df, how='left', on='movieId').sort_values(['rating'], ascending=False)

    print('User {0} has already rated {1} movies.'.format(user_id, user_full.shape[0]))
    print('Recommending the highest {0} predicted ratings movies not already rated.'.format(num_recommendations))

    # Recommend the highest predicted rating movies that the user hasn't seen yet
    recommendations = movies_df[~movies_df['movieId'].isin(user_full['movieId'])].copy()
    recommendations = recommendations.merge(pd.DataFrame(sorted_user_predictions).reset_index(), how='left',
                                            left_on='title', right_on='title')
    recommendations = recommendations.rename(columns={user_row_number: 'PredictedRating'})
    recommendations = recommendations.sort_values('PredictedRating', ascending=False)

    return user_full, recommendations.head(num_recommendations)




In [None]:
# Example usage
user_id = 1  # Specify the user ID
user_full, recommendations = recommend_movies(predicted_ratings_df, user_id, movies, ratings, 10)

# Print user ratings and recommendations
print("User's rated movies:")
print(user_full)



User 1 has already rated 113 movies.
Recommending the highest 10 predicted ratings movies not already rated.
User's rated movies:
     userId  movieId  rating   timestamp  \
36        1      912     5.0  1217897623   
40        1     1136     5.0  1217897630   
86        1     4262     5.0  1217897697   
46        1     1221     5.0  1217897613   
78        1     2959     5.0  1217896334   
..      ...      ...     ...         ...   
31        1      719     0.5  1217895799   
11        1      256     0.5  1217895764   
106       1    45950     0.5  1217897813   
14        1      277     0.5  1217895772   
112       1    57949     0.5  1217896004   

                                      title                       genres  
36                        Casablanca (1942)                Drama|Romance  
40   Monty Python and the Holy Grail (1975)     Adventure|Comedy|Fantasy  
86                          Scarface (1983)           Action|Crime|Drama  
46           Godfather: Part II, The (197

In [None]:
print("\nTop 10 movie recommendations:")
print(recommendations)


Top 10 movie recommendations:
      movieId                                              title  \
3236     4226                                     Memento (2000)   
0           1                                   Toy Story (1995)   
7556    58559                            Dark Knight, The (2008)   
995      1291          Indiana Jones and the Last Crusade (1989)   
460       541                                Blade Runner (1982)   
243       293  Léon: The Professional (a.k.a. The Professiona...   
4705     6539  Pirates of the Caribbean: The Curse of the Bla...   
3067     3996  Crouching Tiger, Hidden Dragon (Wo hu cang lon...   
915      1193             One Flew Over the Cuckoo's Nest (1975)   
1778     2329                          American History X (1998)   

                                           genres  PredictedRating  
3236                             Mystery|Thriller         2.723471  
0     Adventure|Animation|Children|Comedy|Fantasy         2.339103  
7556         