## Collaborative Filtering with Cold Start
### Model inspried by: Ibtesam Ahmed
#### Link: https://www.kaggle.com/code/ibtesama/getting-started-with-a-movie-recommendation-system/notebook


##### Using SVD learning model and including user cold start scenario
##### Change cold start user to 6
##### n_factor = 20, n_epochs = 30, lr_all = .005, reg_all = .1

In [1]:
import pandas as pd
from surprise import Reader, Dataset, SVD, accuracy

In [2]:
# Load dataset
ratings = pd.read_csv('../tmdb/ratings_small.csv')
reader = Reader(rating_scale=(0.5, 5.0))

In [14]:
# Choose a user to simulate cold start
user_id = 6
user_ratings = ratings[ratings['userId'] == user_id]

# Keep only 5 ratings for the user, remove the rest
keep_ratings = user_ratings.sample(n=5, random_state=42)   # the 5 ratings we keep
remove_ratings = user_ratings.drop(keep_ratings.index)      # the rest we remove

# Remove the other ratings from the dataset for training
train_ratings = ratings.drop(remove_ratings.index)

In [15]:
# Number of movies user rated
print(len(user_ratings))

44


In [16]:
# cold start information
keep_ratings

Unnamed: 0,userId,movieId,rating,timestamp
488,6,7090,3.0,1108134534
475,6,2571,1.0,1109258202
476,6,2657,2.0,1108134271
487,6,5952,5.0,1108134311
485,6,4641,1.5,1109258217


In [17]:
# Load the training set into Surprise
train_data = Dataset.load_from_df(train_ratings[['userId','movieId','rating']], reader)
trainset = train_data.build_full_trainset()

In [18]:
# Train the SVD model
svd = SVD(n_factors=20, n_epochs=30, lr_all=0.005, reg_all=0.1)
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7583604b0b60>

In [19]:
# Predict the removed ratings for the user
predictions = []
for movie_id in remove_ratings['movieId']:
    true_rating = ratings[(ratings['userId'] == user_id) & (ratings['movieId'] == movie_id)]['rating'].values[0]
    pred = svd.predict(user_id, movie_id, r_ui=true_rating)
    predictions.append(pred)
    print(pred)

user: 6          item: 111        r_ui = 4.00   est = 3.46   {'was_impossible': False}
user: 6          item: 158        r_ui = 2.00   est = 2.35   {'was_impossible': False}
user: 6          item: 173        r_ui = 2.00   est = 1.98   {'was_impossible': False}
user: 6          item: 293        r_ui = 5.00   est = 3.34   {'was_impossible': False}
user: 6          item: 596        r_ui = 4.00   est = 2.93   {'was_impossible': False}
user: 6          item: 903        r_ui = 4.00   est = 3.52   {'was_impossible': False}
user: 6          item: 1204       r_ui = 5.00   est = 3.52   {'was_impossible': False}
user: 6          item: 1250       r_ui = 4.50   est = 3.44   {'was_impossible': False}
user: 6          item: 1259       r_ui = 4.50   est = 3.42   {'was_impossible': False}
user: 6          item: 1276       r_ui = 4.50   est = 3.55   {'was_impossible': False}
user: 6          item: 1285       r_ui = 4.50   est = 3.16   {'was_impossible': False}
user: 6          item: 1358       r_ui = 2.

In [20]:
# Evaluate performance on these predictions
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

RMSE: 1.0063
MAE:  0.8684
