# Movie Recommendation System using Matrix Factorization with Singular Value Decomposition

With Matrix Factorization, we would be able to look at a user's historical rating to see what movies to recommend them.

In [None]:
import pandas as pd 
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import train_test_split
from surprise import Reader
import os
import boto3
from dotenv import load_dotenv
import pickle

In [None]:
load_dotenv()

bucket_name = os.getenv("AWS_BUCKET_NAME")
ratings_file = os.getenv("AWS_RATINGS_FILE")
models_file = os.getenv("AWS_MODEL_FILE")

s3 = boto3.client(
    's3',
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY"),
    aws_secret_access_key=os.getenv("AWS_SECRET"),
    region_name=os.getenv("AWS_REGION")
)

s3.download_file(bucket_name, ratings_file, "ratings.csv")
s3.download_file(bucket_name, models_file, models_file)
ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("../BigMovieData/ml-32m/movies.csv")
ratings = pd.read_csv("ratings.csv")

In [None]:
ratings.head()

In [None]:
reader = Reader(rating_scale=(0.5,5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

In [7]:
model = SVD()
model.fit(trainset)
from surprise import accuracy
predictions = model.test(testset)
print("RMSE:", accuracy.rmse(predictions))

RMSE: 0.8741
RMSE: 0.874079876653131


In [5]:
with open("recommender_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

In [None]:
all_movies = ratings['movieId'].unique()
def recommend_movies(user_id, n_recommendations=10):
    watched_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    movie_predictions = [
        (movie, loaded_model.predict(user_id, movie).est) for movie in all_movies if movie not in watched_movies
    ]
    movie_predictions.sort(key=lambda x: x[1], reverse=True)
    top_movies = movie_predictions[:n_recommendations]
    movies_df = pd.read_csv('../BigMovieData/ml-32m/movies.csv')
    recommended_movies = [(movies_df[movies_df['movieId'] == movie_id]['title'].values[0], rating) 
                          for movie_id, rating in top_movies]
    
    return recommended_movies

user_id = 1
recommendations = recommend_movies(user_id)
for movie in recommendations:
    print(movie)


('Alien (1979)', 4.717917225345996)
('Nights of Cabiria (Notti di Cabiria, Le) (1957)', 4.710287942408444)
('Children of Paradise (Les enfants du paradis) (1945)', 4.615927607967822)
('Aguirre: The Wrath of God (Aguirre, der Zorn Gottes) (1972)', 4.614071627007398)
('Century of the Self, The (2002)', 4.5984657370733615)
('Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)', 4.577376370657517)
('Seven Samurai (Shichinin no samurai) (1954)', 4.573015197668927)
('Night of the Hunter, The (1955)', 4.554318101883387)
('35 Up (1991)', 4.535665998224487)
('Dawn of the Dead (1978)', 4.5351399424992875)
