In [8]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse  
import pickle
import pandas

data_path = '../datasets/ml-latest-small/ratings.csv'
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
data = Dataset.load_from_file(data_path, reader=reader)

trainset, testset = train_test_split(data, test_size=0.2)

svd_model = SVD()
svd_model.fit(trainset)

predictions = svd_model.test(testset)
print(f"RMSE: {rmse(predictions)}")


RMSE: 0.8790
RMSE: 0.8790395081623208


In [9]:

from surprise.model_selection import GridSearchCV

param_grid = {
    'n_factors': [50, 100, 150],
    'n_epochs': [10, 20, 30],
    'lr_all': [0.002, 0.005, 0.01],
    'reg_all': [0.02, 0.1, 0.2]
}

grid_search = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)
grid_search.fit(data)
print(grid_search.best_params)
print(f"Best RMSE: {grid_search.best_score['rmse']}")


{'rmse': {'n_factors': 100, 'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}}
Best RMSE: 0.8616995439184981


In [10]:
from surprise import KNNBasic

data = Dataset.load_from_file(data_path, reader=reader)
trainset, testset = train_test_split(data, test_size=0.25)

sim_options = {
    'name': 'cosine',
    'user_based': False  
}

knn_model = KNNBasic(sim_options=sim_options)
knn_model.fit(trainset)
predictions = knn_model.test(testset)
print(f"RMSE: {rmse(predictions)}")


Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9745
RMSE: 0.9745352331639119


In [11]:
with open('knn_model.pkl', 'wb') as file:
    pickle.dump(knn_model, file)

In [3]:

links_df = pd.read_csv('../datasets/ml-latest-small/links.csv')

movie_id_mapping = dict(zip(links_df['movieId'], links_df['tmdbId']))

import json
with open('movie_id_mapping.json', 'w') as f:
    json.dump(movie_id_mapping, f)
