## Collaborative Filtering with Cold Start
### Model inspried by: Ibtesam Ahmed
#### Link: https://www.kaggle.com/code/ibtesama/getting-started-with-a-movie-recommendation-system/notebook


##### Using SVD learning model and including user cold start scenario
##### Change cold start user to 2
##### n_factor = 20, n_epochs = 30, lr_all = .005, reg_all = .1

In [1]:
import pandas as pd
from surprise import Reader, Dataset, SVD, accuracy

In [2]:
# Load dataset
ratings = pd.read_csv('../tmdb/ratings_small.csv')
reader = Reader(rating_scale=(0.5, 5.0))

In [7]:
# Choose a user to simulate cold start
user_id = 2
user_ratings = ratings[ratings['userId'] == user_id]

# Keep only 5 ratings for the user, remove the rest
keep_ratings = user_ratings.sample(n=5, random_state=42)   # the 5 ratings we keep
remove_ratings = user_ratings.drop(keep_ratings.index)      # the rest we remove

# Remove the other ratings from the dataset for training
train_ratings = ratings.drop(remove_ratings.index)

In [8]:
# Number of movies user rated
print(len(user_ratings))

76


In [9]:
# cold start information
keep_ratings

Unnamed: 0,userId,movieId,rating,timestamp
24,2,50,4.0,835355586
55,2,349,4.0,835355441
30,2,153,4.0,835355441
20,2,10,4.0,835355493
65,2,382,3.0,835356165


In [10]:
# Load the training set into Surprise
train_data = Dataset.load_from_df(train_ratings[['userId','movieId','rating']], reader)
trainset = train_data.build_full_trainset()

In [11]:
# Train the SVD model
svd = SVD(n_factors=20, n_epochs=30, lr_all=0.005, reg_all=0.1)
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x730800dde330>

In [12]:
# Predict the removed ratings for the user
predictions = []
for movie_id in remove_ratings['movieId']:
    true_rating = ratings[(ratings['userId'] == user_id) & (ratings['movieId'] == movie_id)]['rating'].values[0]
    pred = svd.predict(user_id, movie_id, r_ui=true_rating)
    predictions.append(pred)
    print(pred)

user: 2          item: 17         r_ui = 5.00   est = 4.02   {'was_impossible': False}
user: 2          item: 39         r_ui = 5.00   est = 3.73   {'was_impossible': False}
user: 2          item: 47         r_ui = 4.00   est = 4.10   {'was_impossible': False}
user: 2          item: 52         r_ui = 3.00   est = 3.73   {'was_impossible': False}
user: 2          item: 62         r_ui = 3.00   est = 3.81   {'was_impossible': False}
user: 2          item: 110        r_ui = 4.00   est = 4.03   {'was_impossible': False}
user: 2          item: 144        r_ui = 3.00   est = 3.54   {'was_impossible': False}
user: 2          item: 150        r_ui = 5.00   est = 3.95   {'was_impossible': False}
user: 2          item: 161        r_ui = 3.00   est = 3.93   {'was_impossible': False}
user: 2          item: 165        r_ui = 3.00   est = 3.63   {'was_impossible': False}
user: 2          item: 168        r_ui = 3.00   est = 3.36   {'was_impossible': False}
user: 2          item: 185        r_ui = 3.

In [13]:
# Evaluate performance on these predictions
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

RMSE: 0.9417
MAE:  0.7352
