# Movie Recommendation System using Matrix Factorization with Singular Value Decomposition

With Matrix Factorization, we would be able to look at a user's historical rating to see what movies to recommend them.

In [10]:
import pandas as pd 
import gdown
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import train_test_split
from surprise import Reader
from scipy.sparse import csr_matrix


file_id = "1TGyRuaqxVSOtyRk2CMJ0TROdPyuHEEZ2"
gdown.download(f"https://drive.google.com/uc?id={file_id}", "ratings.csv", quiet=False)

movies = pd.read_csv("../BigMovieData/ml-32m/movies.csv")
ratings = pd.read_csv("ratings.csv")


  from pandas.core import (


In [None]:
ratings.head()

In [None]:
user_codes = ratings["userId"].astype("category").cat.codes
movie_codes = ratings["movieId"].astype("category").cat.codes

sparse_matrix = csr_matrix((ratings["rating"], (user_codes, movie_codes)))


In [None]:
final_dataset = pd.DataFrame.sparse.from_spmatrix(sparse_matrix)

# Assign userId and movieId if needed
final_dataset.index = ratings["userId"].astype("category").cat.categories
final_dataset.columns = ratings["movieId"].astype("category").cat.categories


In [None]:
# final_dataset = ratings.pivot(index="userId", columns="movieId", values="rating")
# final_dataset.fillna(0,inplace = True)
final_dataset.head()

In [None]:
reader = Reader(rating_scale=(0.5,5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

In [7]:
model = SVD()
model.fit(trainset)
from surprise import accuracy
predictions = model.test(testset)
print("RMSE:", accuracy.rmse(predictions))

RMSE: 0.8741
RMSE: 0.874079876653131


In [11]:
# Get all unique movie IDs
all_movies = ratings['movieId'].unique()

# Function to get top recommended movies for a user
def recommend_movies(user_id, n_recommendations=10):
    # Get movies the user has already rated
    watched_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    
    # Predict ratings for movies the user hasn't watched
    movie_predictions = [
        (movie, model.predict(user_id, movie).est) for movie in all_movies if movie not in watched_movies
    ]
    
    # Sort by predicted rating
    movie_predictions.sort(key=lambda x: x[1], reverse=True)
    
    # Get top N recommendations
    top_movies = movie_predictions[:n_recommendations]
    
    # Convert movie IDs to titles (assuming a movies.csv file exists)
    movies_df = pd.read_csv('../BigMovieData/ml-32m/movies.csv')  # Contains columns: movieId, title
    recommended_movies = [(movies_df[movies_df['movieId'] == movie_id]['title'].values[0], rating) 
                          for movie_id, rating in top_movies]
    
    return recommended_movies

# Example: Recommend movies for user ID 1
user_id = 1
recommendations = recommend_movies(user_id)
for movie in recommendations:
    print(movie)


('Mad Max: Fury Road (2015)', 4.776484860701145)
('Red Shoes, The (1948)', 4.775585909889986)
('Before Sunset (2004)', 4.769854627453665)
('Come and See (Idi i smotri) (1985)', 4.766503163298407)
('Alien (1979)', 4.762540655189273)
('Late Spring (Banshun) (1949)', 4.749646292999602)
('Repulsion (1965)', 4.71481543914906)
('Night of the Hunter, The (1955)', 4.697467682371876)
('Vertigo (1958)', 4.697272789675524)
('Fanny and Alexander (Fanny och Alexander) (1982)', 4.673463780460623)
