### Collaborative Filtering avec Suprise

In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD

In [2]:
# Fonction de recommandation
def generate_recommendations(ratings_updated, favorite_movies):

    ### COLLABORATIVE FILTERING ###
    # Add new user in ratings dataset:
    new_user_id = ratings_updated['userId'].max() + 1
    for movie in favorite_movies:
        newdata = pd.DataFrame([[new_user_id, movie, 5.0]], columns=['userId', 'tmdb_id', 'rating'])
        ratings_updated = pd.concat([ratings_updated, newdata], ignore_index=True)

    # Train model with new data:
    reader = Reader(rating_scale=(0.5, 5))
    data = Dataset.load_from_df(ratings_updated[['userId', 'tmdb_id', 'rating']], reader)
    svd = SVD()
    train_set = data.build_full_trainset()
    svd.fit(train_set)

    # Isolate movies the user never saw:
    all_movies = ratings_updated['tmdb_id'].unique().tolist()
    already_seen = ratings_updated[ratings_updated['userId'] == new_user_id]['tmdb_id'].tolist()
    never_seen = [x for x in all_movies if x not in already_seen]

    # Make predictions for new user:
    predictions = []
    movies = []
    for movie in never_seen:
        pred = svd.predict(new_user_id, movie)
        predictions.append(pred.est)
        movies.append(pred.iid)

    # Results collaborative filtering:
    result_collaborative = pd.DataFrame(list(zip(predictions, movies)), columns=['predicted_rating', 'tmdb_id'])
    result_collaborative.sort_values(by='predicted_rating', ascending=False, inplace=True)   
    
    return result_collaborative

In [3]:
# Importation du fichier
ratings_updated = pd.read_csv('src/Movielens_ratings_updated.csv')

# Liste de films favoris (exemple)
favorite_movies = [13448, 402, 204922]

# Test de la fonction de recommandation
generate_recommendations(ratings_updated, favorite_movies)

Unnamed: 0,predicted_rating,tmdb_id
7,5.000000,278
291,4.884021,745
114,4.881138,346
120,4.877691,975
78,4.852871,389
...,...,...
1865,2.888362,1370
2280,2.703560,11186
2347,2.697328,9349
2420,2.595323,10157
