In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [7]:
df = pd.read_csv('movie_ratings.csv', encoding = 'unicode_escape')

In [8]:
df.shape

(5877, 4)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5877 entries, 0 to 5876
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   user_id   5877 non-null   int64 
 1   movie_id  5877 non-null   int64 
 2   rating    5877 non-null   int64 
 3   genre     5877 non-null   object
dtypes: int64(3), object(1)
memory usage: 183.8+ KB


In [10]:
# Create the user-item matrix
user_movie_matrix = df.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)

In [11]:
# Convert the pandas DataFrame to a numpy array for SVD
rating_matrix = user_movie_matrix.values

In [12]:
# Perform Singular Value Decomposition
user_features, movie_features_strength, movie_features = np.linalg.svd(rating_matrix, full_matrices=False)

In [13]:
# Reconstruct the sigma matrix into a diagonal matrix
# This matrix gives us the "importance" of each latent feature
latent_feature_weights = np.diag(movie_features_strength)

In [14]:
# Display the shapes of the decomposed matrices
print("Shape of User Features Matrix:", user_features.shape)
print("Shape of Latent Feature Weights Matrix:", latent_feature_weights.shape)
print("Shape of Movie Features Matrix:", movie_features.shape)

Shape of User Features Matrix: (994, 100)
Shape of Latent Feature Weights Matrix: (100, 100)
Shape of Movie Features Matrix: (100, 100)


In [15]:
# Choose a user to make recommendations for (e.g., User with ID 1)
user_id = 1

# Get the user's index in the matrix
user_index = user_movie_matrix.index.get_loc(user_id)

In [16]:
# Predict the ratings for all movies for this user
predicted_ratings = user_features[user_index, :] @ latent_feature_weights @ movie_features

In [17]:
# Get the movie IDs corresponding to the matrix columns
movie_ids = user_movie_matrix.columns

In [24]:
# Create a DataFrame for the predicted ratings
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=movie_ids, columns=['predicted_rating'])
predicted_ratings_df.head(10)

Unnamed: 0_level_0,predicted_rating
movie_id,Unnamed: 1_level_1
1,-3.747003e-16
2,6.52256e-16
3,-4.857226e-16
4,1.526557e-15
5,8.326673e-16
6,2.567391e-15
7,1.221245e-15
8,4.0
9,5.620504e-15
10,-2.88658e-15


In [23]:
# Get movies already rated by the user
rated_movies = df[df['user_id'] == user_id]['movie_id']
rated_movies

Unnamed: 0,movie_id
1438,43
1740,12
3631,8
5583,85


In [27]:
# Filter out the movies already rated by the user
unrated_movies = predicted_ratings_df[~predicted_ratings_df.index.isin(rated_movies)]
unrated_movies.head(10)

Unnamed: 0_level_0,predicted_rating
movie_id,Unnamed: 1_level_1
1,-3.747003e-16
2,6.52256e-16
3,-4.857226e-16
4,1.526557e-15
5,8.326673e-16
6,2.567391e-15
7,1.221245e-15
9,5.620504e-15
10,-2.88658e-15
11,-4.82947e-15


In [28]:
# Sort the unrated movies by their predicted rating in descending order
recommendations = unrated_movies.sort_values(by='predicted_rating', ascending=False)
print(f"Top 5 movie recommendations for User {user_id}:")
print(recommendations.head())

Top 5 movie recommendations for User 1:
          predicted_rating
movie_id                  
20            7.410739e-15
38            5.981327e-15
9             5.620504e-15
21            4.052314e-15
39            4.024558e-15
