## IMPORT AND READ CSV

In [1]:
import pandas as pd

# Movies in input:
input_movies = [671, 259316, 411]

# Recommendations from collaborative filtering:
result_cf = pd.read_csv('./results/recommendations_from_cf.csv')
average_predicted_rating = result_cf['predicted_rating'].mean()

# Recommendations from content-based filtering:
result_cb = pd.read_csv('./results/recommendations_from_cb.csv')
average_similarity_score = result_cb['score'].mean()

# Add a column occurrence for movies that appears several times:
result_cb['occurence'] = result_cb.groupby('tmdb_id')['tmdb_id'].transform('count')

# Our combined list:
combined_list = pd.merge(result_cb, result_cf, on='tmdb_id', how='left')
combined_list['predicted_rating'] = combined_list['predicted_rating'].apply(lambda x: average_predicted_rating if (type(x)==float and pd.isna(x)) else x)

## COMBINED_SCORE FORMULA

*Variables*  

*occurence* = nombre de fois qu'un film apparait dans les listes combinées  
*scf* = note prédite par le collaborative filtering  
*scb* = score calculé par le content-based filtering

*Constantes*  

*wcf* = le poids du modèle collaborative filtering  
*wcb* = le poids du modèle content-based filtering  
*alpha* = le poids donné au caractère d'occurence

In [2]:
def calculate_combined_score(tmdb_id, combined_list, wcf=.6, wcb=.4, alpha=0.1):

    tmdb_rows = combined_list[combined_list['tmdb_id'] == tmdb_id]
    if tmdb_rows.empty:
        return None
    
    scf = tmdb_rows['predicted_rating'].values[0]/5
    scb_sum = tmdb_rows['score'].sum()
    occurence = tmdb_rows['occurence'].values[0]
    
    score = wcf * scf + wcb * (scb_sum / occurence + alpha * occurence)

    return score

In [3]:
# Calculate combined score:
combined_list['combined_score'] = combined_list['tmdb_id'].apply(lambda x: calculate_combined_score(x, combined_list))
combined_list = combined_list.drop_duplicates(subset=['tmdb_id'])
combined_list = combined_list[~combined_list['tmdb_id'].isin(input_movies)]

In [4]:
# Display results with titles and genres:
movies_title = pd.read_csv('./src/TMDB_content.csv')
combined_list_enriched = pd.merge(combined_list, movies_title, how='left', left_on='tmdb_id', right_on='tmdb_id')
combined_list_enriched = combined_list_enriched.drop(['poster_path','year', 'keywords', 'cast', 'director', 'watch_providers'], axis=1)
combined_list_enriched.sort_values(by='combined_score', ascending=False).head(13)

Unnamed: 0.1,tmdb_id,score,occurence,Unnamed: 0,predicted_rating,combined_score,title,genres
7,675,0.4,2,98.0,4.38648,0.781307,Harry Potter and the Order of the Phoenix,"Adventure, Fantasy"
20,2454,0.185164,2,,3.932914,0.752379,The Chronicles of Narnia: Prince Caspian,"Adventure, Family, Fantasy"
0,672,0.7,2,91.0,4.110082,0.747113,Harry Potter and the Chamber of Secrets,"Adventure, Fantasy"
5,12445,0.414039,3,,3.932914,0.746259,Harry Potter and the Deathly Hallows: Part 2,"Fantasy, Adventure"
14,338952,0.20702,2,,3.932914,0.740745,Fantastic Beasts: The Crimes of Grindelwald,"Fantasy, Adventure, Family"
3,767,0.466667,2,,3.932914,0.740212,Harry Potter and the Half-Blood Prince,"Adventure, Fantasy"
6,12444,0.407046,2,,3.932914,0.735268,Harry Potter and the Deathly Hallows: Part 1,"Adventure, Fantasy"
13,338953,0.210819,3,,3.932914,0.726723,Fantastic Beasts: The Secrets of Dumbledore,"Fantasy, Adventure, Family"
4,673,0.456435,2,13.0,4.310946,0.72574,Harry Potter and the Prisoner of Azkaban,"Adventure, Fantasy"
2,674,0.476731,2,94.0,4.314765,0.725443,Harry Potter and the Goblet of Fire,"Adventure, Fantasy"
