In [None]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, KNNBasic, SVD, NMF
from surprise.model_selection import cross_validate

# Load datasets
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

# Merge datasets
merged_df = pd.merge(ratings, movies, on="movieId")

# Create User-Item Matrix
user_item_matrix = merged_df.pivot_table(index='userId', columns='title', values='rating')

# User-Based Collaborative Filtering
# Fill NaNs with user's mean rating
user_item_filled = user_item_matrix.apply(lambda row: row.fillna(row.mean()), axis=1)
user_correlation = user_item_filled.T.corr(method='pearson')

# Find correlation of User 1 with all users
user_1_corr = user_correlation[1].dropna().sort_values(ascending=False)
top_50_users = user_1_corr.iloc[1:51]

# Predict rating for movieId 32
movie_32_ratings = user_item_matrix.loc[top_50_users.index, merged_df[merged_df['movieId'] == 32]['title'].values[0]]
predicted_rating = np.sum(top_50_users * movie_32_ratings.fillna(0)) / np.sum(top_50_users)
print(f'Predicted rating for User 1 on movieId 32: {predicted_rating:.2f}')

# Item-Based Collaborative Filtering
# Fill NaNs with movie's mean rating
item_item_filled = user_item_matrix.apply(lambda col: col.fillna(col.mean()), axis=0)
item_correlation = item_item_filled.corr(method='pearson')

# Find similar movies to Jurassic Park (1993)
jurassic_park_corr = item_correlation["Jurassic Park (1993)"].dropna().sort_values(ascending=False)[1:11]
print("Top 10 similar movies to Jurassic Park (1993):")
print(jurassic_park_corr)

# Model-Based Collaborative Filtering
reader = Reader(rating_scale=(ratings['rating'].min(), ratings['rating'].max()))
data = Dataset.load_from_df(merged_df[['userId', 'movieId', 'rating']], reader)

# KNNBasic Model
knn = KNNBasic(sim_options={'name': 'msd', 'user_based': True}, k=20)
cross_validate(knn, data, measures=['RMSE'], cv=5, verbose=True)

# SVD Model
svd = SVD()
cross_validate(svd, data, measures=['RMSE'], cv=5, verbose=True)

# NMF Model
nmf = NMF()
cross_validate(nmf, data, measures=['RMSE'], cv=5, verbose=True)


In [None]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import KNNBasic, SVD, NMF
from surprise.model_selection import cross_validate

# Load the ratings dataset
ratings = pd.read_csv('ratings.csv')  # Adjust the path as necessary
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# KNNBasic Model
knn_algo = KNNBasic(sim_options={'name': 'msd', 'user_based': True})
knn_cv_results = cross_validate(knn_algo, data, measures=['RMSE'], cv=5, verbose=True)
knn_best_score = knn_cv_results['test_rmse'].mean()

# SVD Model
svd_algo = SVD()
svd_cv_results = cross_validate(svd_algo, data, measures=['RMSE'], cv=5, verbose=True)
svd_best_score = svd_cv_results['test_rmse'].mean()

# NMF Model
nmf_algo = NMF()
nmf_cv_results = cross_validate(nmf_algo, data, measures=['RMSE'], cv=5, verbose=True)
nmf_best_score = nmf_cv_results['test_rmse'].mean()

# Print best scores for each model
print(f"KNNBasic Mean RMSE: {knn_best_score:.4f}")
print(f"SVD Mean RMSE: {svd_best_score:.4f}")
print(f"NMF Mean RMSE: {nmf_best_score:.4f}")