# Movie Recommendation System using Matrix Factorization with Singular Value Decomposition

With Matrix Factorization, we would be able to look at a user's historical rating to see what movies to recommend them.

In [2]:
pip install scikit-surprise

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml): started
  Building wheel for scikit-surprise (pyproject.toml): finished with status 'done'
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-win_amd64.whl size=1298019 sha256=e3d657586513f63d856e5908e56cab50da28e40bdfca4ea500a8dfe4e756206e
  Stored in directory: c:\users\larry\appdata\local\pip\cache\wheels\2a\8f\6e\7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surpris

In [None]:
import pandas as pd 
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import train_test_split
from surprise import Reader
from scipy.sparse import csr_matrix

movies = pd.read_csv("../BigMovieData/ml-32m/movies.csv")
ratings = pd.read_csv("../BigMovieData/ml-32m/ratings.csv")


In [13]:
user_counts = ratings["userId"].value_counts()
active_users = user_counts[user_counts >= 20].index
ratings = ratings[ratings["userId"].isin(active_users)]


In [14]:
movie_counts = ratings["movieId"].value_counts()
popular_movies = movie_counts[movie_counts >= 20].index
ratings = ratings[ratings["movieId"].isin(popular_movies)]


In [15]:
num_users = ratings["userId"].nunique()
num_movies = ratings["movieId"].nunique()
total_cells = num_users * num_movies
print(f"Users: {num_users}, Movies: {num_movies}, Total Cells: {total_cells}")


Users: 200846, Movies: 23344, Total Cells: 4688549024


In [16]:
final_dataset = ratings.pivot(index="userId", columns="movieId", values="rating")
final_dataset.fillna(0,inplace = True)
final_dataset.head()

  num_cells = num_rows * num_columns


IndexError: index 393581729 is out of bounds for axis 0 with size 393581728

In [5]:
reader = Reader(rating_scale=(0.5,5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

In [7]:
model = SVD()
model.fit(trainset)
from surprise import accuracy
predictions = model.test(testset)
print("RMSE:", accuracy.rmse(predictions))

RMSE: 0.8741
RMSE: 0.874079876653131


In [None]:
# Get all unique movie IDs
all_movies = ratings['movieId'].unique()

# Function to get top recommended movies for a user
def recommend_movies(user_id, n_recommendations=10):
    # Get movies the user has already rated
    watched_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    
    # Predict ratings for movies the user hasn't watched
    movie_predictions = [
        (movie, model.predict(user_id, movie).est) for movie in all_movies if movie not in watched_movies
    ]
    
    # Sort by predicted rating
    movie_predictions.sort(key=lambda x: x[1], reverse=True)
    
    # Get top N recommendations
    top_movies = movie_predictions[:n_recommendations]
    
    # Convert movie IDs to titles (assuming a movies.csv file exists)
    movies_df = pd.read_csv('../ml-32m/movies.csv')  # Contains columns: movieId, title
    recommended_movies = [(movies_df[movies_df['movieId'] == movie_id]['title'].values[0], rating) 
                          for movie_id, rating in top_movies]
    
    return recommended_movies

# Example: Recommend movies for user ID 1
user_id = 1
recommendations = recommend_movies(user_id)
for movie in recommendations:
    print(movie)


('Lawrence of Arabia (1962)', 4.719919316383328)
('Great Escape, The (1963)', 4.624865832959987)
('Philadelphia Story, The (1940)', 4.612152783804999)
('Amadeus (1984)', 4.58227462411902)
('L.A. Confidential (1997)', 4.558613600335368)
('Grand Day Out with Wallace and Gromit, A (1989)', 4.5462286873423245)
("Howl's Moving Castle (Hauru no ugoku shiro) (2004)", 4.541433900871185)
("One Flew Over the Cuckoo's Nest (1975)", 4.527814594544625)
('Serenity (2005)', 4.522529504928613)
('Hoop Dreams (1994)', 4.515881468464204)
