## Collaborative Filtering with Cold Start
### Model inspried by: Ibtesam Ahmed
#### Link: https://www.kaggle.com/code/ibtesama/getting-started-with-a-movie-recommendation-system/notebook


##### Using SVD learning model and including user cold start scenario
##### Change cold start user to 2

In [1]:
import pandas as pd
from surprise import Reader, Dataset, SVD, accuracy

In [2]:
# Load dataset
ratings = pd.read_csv('../tmdb/ratings_small.csv')
reader = Reader(rating_scale=(0.5, 5.0))

In [3]:
# Choose a user to simulate cold start
user_id = 2
user_ratings = ratings[ratings['userId'] == user_id]

# Keep only 5 ratings for the user, remove the rest
keep_ratings = user_ratings.sample(n=5, random_state=42)   # the 5 ratings we keep
remove_ratings = user_ratings.drop(keep_ratings.index)      # the rest we remove

# Remove the other ratings from the dataset for training
train_ratings = ratings.drop(remove_ratings.index)

In [None]:
# Print # of movies user 2 has rated
print(len(user_ratings))

76


In [11]:
# Cold start information
keep_ratings

Unnamed: 0,userId,movieId,rating,timestamp
24,2,50,4.0,835355586
55,2,349,4.0,835355441
30,2,153,4.0,835355441
20,2,10,4.0,835355493
65,2,382,3.0,835356165


In [4]:
# Load the training set into Surprise
train_data = Dataset.load_from_df(train_ratings[['userId','movieId','rating']], reader)
trainset = train_data.build_full_trainset()

In [5]:
# Train the SVD model
svd = SVD()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x775f2297a360>

In [6]:
# Predict the removed ratings for the user
predictions = []
for movie_id in remove_ratings['movieId']:
    true_rating = ratings[(ratings['userId'] == user_id) & (ratings['movieId'] == movie_id)]['rating'].values[0]
    pred = svd.predict(user_id, movie_id, r_ui=true_rating)
    predictions.append(pred)
    print(pred)

user: 2          item: 17         r_ui = 5.00   est = 3.75   {'was_impossible': False}
user: 2          item: 39         r_ui = 5.00   est = 3.64   {'was_impossible': False}
user: 2          item: 47         r_ui = 4.00   est = 4.01   {'was_impossible': False}
user: 2          item: 52         r_ui = 3.00   est = 3.89   {'was_impossible': False}
user: 2          item: 62         r_ui = 3.00   est = 3.79   {'was_impossible': False}
user: 2          item: 110        r_ui = 4.00   est = 3.80   {'was_impossible': False}
user: 2          item: 144        r_ui = 3.00   est = 3.37   {'was_impossible': False}
user: 2          item: 150        r_ui = 5.00   est = 3.98   {'was_impossible': False}
user: 2          item: 161        r_ui = 3.00   est = 4.04   {'was_impossible': False}
user: 2          item: 165        r_ui = 3.00   est = 3.34   {'was_impossible': False}
user: 2          item: 168        r_ui = 3.00   est = 3.24   {'was_impossible': False}
user: 2          item: 185        r_ui = 3.

In [7]:
# Evaluate performance on these predictions
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

RMSE: 0.9553
MAE:  0.7595
