In [14]:
import pandas as pd
import numpy as np


In [15]:
# Load the uploaded dataset

# Charger les fichiers CSV
ratings = pd.read_csv('../data/ratings.csv')
ratings = ratings.drop('timestamp', axis=1)
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [16]:
df = pd.DataFrame(ratings)

# Utiliser pivot pour réorganiser les données
rating_matrix = df.pivot(index='userId', columns='movieId', values='rating')
rating_matrix.fillna(0, inplace=True)
# Afficher le DataFrame résultant
rating_matrix.head()

# Sauvegarder le DataFrame en CSV sous le nom "ratings_matrix.csv"
#rating_matrix.to_csv('ratings_matrix.csv', index=False)

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
from tqdm import tqdm

def matrix_factorization(R, K, num_iterations, alpha, beta):
    """
    Implémente la Matrix Factorization avec Gradient Descent.
    
    Arguments:
        R : np.array, matrice d'évaluations (users x items), valeurs manquantes doivent être 0
        K : int, nombre de facteurs latents
        steps : int, nombre d'itérations
        alpha : float, taux d'apprentissage
        beta : float, coefficient de régularisation

    Retourne:
        P : np.array, matrice des utilisateurs (users x K)
        Q : np.array, matrice des items (items x K)
    """
    # Dimensions de la matrice
    num_users, num_items = R.shape

    # Initialisation aléatoire des matrices P (users x K) et Q (items x K)
    P = np.random.rand(num_users, K)
    Q = np.random.rand(num_items, K)
    
    for iteration in tqdm(range(num_iterations), desc="Matrix Factorization Progress"):
       
        for i in range(num_users):
            for j in range(num_items):
                if R[i, j] > 0:
                    eij = R[i, j] - np.dot(P[i, :], Q[j, :].T)
                    for k in range(K):
                        P[i, k] += alpha * (2 * eij * Q[j, k] - beta * P[i, k])     

        for j in range(num_items):
            for i in range(num_users):
                if R[i, j] > 0:
                    eij = R[i, j] - np.dot(P[i, :], Q[j, :].T)
                    for k in range(K):
                        Q[j, k] += alpha * (2 * eij * P[i, k] - beta * Q[j, k])
    return P, Q

R = rating_matrix.to_numpy()
K = 30
num_iterations = 10
alpha = 0.01
beta = 0.01
P, Q = matrix_factorization(R, K, num_iterations, alpha, beta)
R_approx = np.dot(P, Q.T)

print(R_approx[:5, :5])

Matrix Factorization Progress:   0%|          | 0/10 [00:00<?, ?it/s]

Matrix Factorization Progress: 100%|██████████| 10/10 [00:54<00:00,  5.49s/it]

[[4.56555033 4.16671145 3.72908575 3.50253554 3.29919792]
 [3.62030263 3.85387899 1.95918414 2.05003291 2.92926645]
 [2.31198128 2.25892401 3.49980599 2.66919476 4.26250057]
 [4.87317834 4.34541226 2.18448757 2.70349676 3.0993092 ]
 [3.44094486 4.35022248 2.76241447 2.65320513 2.82416264]]





In [18]:
print(R_approx)

[[4.56555033 4.16671145 3.72908575 ... 4.61453277 4.24802835 6.20292134]
 [3.62030263 3.85387899 1.95918414 ... 3.50423924 2.58719628 4.25606994]
 [2.31198128 2.25892401 3.49980599 ... 4.41088591 2.9239126  5.30580992]
 ...
 [3.67011257 3.64989291 3.09728591 ... 3.65535115 2.83324079 4.29658836]
 [4.01738599 4.15394373 2.49099809 ... 3.68259602 2.26044877 3.98936089]
 [3.86169066 4.11765231 2.83962884 ... 4.06030125 3.55006036 4.96706785]]


In [19]:
# Function to recommend top N items for each user
def recommend_top_n(R_approx, num_recommendations=5):
    recommendations = {}

    for user_index in range(R_approx.shape[0]):
        user_ratings = R_approx[user_index]
        unseen_items = np.where(R[user_index] == 0.)[0]
        sorted_unseen_items = unseen_items[np.argsort(user_ratings[unseen_items])[::-1]]
        recommendations[user_index] = sorted_unseen_items[:num_recommendations]
    return recommendations

In [20]:
# Charger les fichiers CSV
movies = pd.read_csv('../data/movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [21]:
# Recommend top 5 movies for each user
top_n_recommendations = recommend_top_n(R_approx, num_recommendations=5)

for user_index, recommended_items in top_n_recommendations.items():
    print(f"User {user_index+1} recommended items: {[movies[movies['movieId'] == rating_matrix.columns[item]]['title'].values[0] for item in recommended_items]}")
    

User 1 recommended items: ['Alien Contamination (1980)', 'Grand Hotel (1932)', 'Pride and Prejudice and Zombies (2016)', 'The Natural Love (1996)', 'Samurai Assassin (Samurai) (1965)']
User 2 recommended items: ['Neon Genesis Evangelion: The End of Evangelion (Shin seiki Evangelion Gekijô-ban: Air/Magokoro wo, kimi ni) (1997)', 'Girls About Town (1931)', 'Shadow of a Doubt (1943)', 'Dark Blue World (Tmavomodrý svet) (2001)', 'Little Caesar (1931)']
User 3 recommended items: ['Legend (2015)', 'Watch on the Rhine (1943)', 'Old Boy (2003)', 'Nasu: Summer in Andalusia (2003)', 'Big Night (1996)']
User 4 recommended items: ['We Could Be King (2014)', 'Water Horse: Legend of the Deep, The (2007)', "Midsummer Night's Sex Comedy, A (1982)", 'Salo, or The 120 Days of Sodom (Salò o le 120 giornate di Sodoma) (1976)', 'Toy Story 2 (1999)']
User 5 recommended items: ['Sex Ed (2014)', 'Graduate, The (1967)', 'Harry Brown (2009)', 'Sophie Scholl: The Final Days (Sophie Scholl - Die letzten Tage) (20

In [22]:
# Recommend top 5 movies for each user

for user_index, recommended_items in top_n_recommendations.items():
    print(f"User {user_index+1} recommended items: {[movies[movies['movieId'] == rating_matrix.columns[item]]['genres'].values[0] for item in recommended_items]}")

User 1 recommended items: ['Action|Horror|Sci-Fi', 'Drama|Romance', 'Comedy|Horror|Romance|Thriller', 'Documentary', 'Action|Adventure|Drama']
User 2 recommended items: ['Action|Animation|Drama|Fantasy|Sci-Fi', 'Comedy', 'Crime|Drama|Thriller', 'Drama|War', 'Crime|Drama']
User 3 recommended items: ['Crime|Thriller', 'Drama', 'Mystery|Thriller', 'Animation', 'Comedy|Drama']
User 4 recommended items: ['Documentary', 'Adventure|Children|Fantasy', 'Comedy|Romance', 'Drama', 'Adventure|Animation|Children|Comedy|Fantasy']
User 5 recommended items: ['Comedy|Romance', 'Comedy|Drama|Romance', 'Crime|Drama|Thriller', 'Drama|War', 'Crime|Drama']
User 6 recommended items: ['Adventure|Comedy|Drama', 'Comedy', 'Action|Drama', 'Drama', 'Comedy|Romance']
User 7 recommended items: ['Comedy', 'Comedy|Drama', 'Crime|Drama|Mystery', 'Drama|Sci-Fi', 'Adventure|Animation|Children|Crime|Drama']
User 8 recommended items: ['Musical|Romance', 'Documentary', 'Comedy|Drama', 'Drama', 'Drama']
User 9 recommended i

In [23]:
#Meilleur avis d'un user
ratings_user1_above_4 = ratings[(ratings['userId'] == 1) & (ratings['rating'] > 4)]['movieId'].tolist()
print(len(ratings_user1_above_4))
for i in ratings_user1_above_4:
    print(movies[movies['movieId'] == i]['title'].values[0],movies[movies['movieId'] == i]['genres'].values[0])

124
Seven (a.k.a. Se7en) (1995) Mystery|Thriller
Usual Suspects, The (1995) Crime|Mystery|Thriller
Bottle Rocket (1996) Adventure|Comedy|Crime|Romance
Rob Roy (1995) Action|Drama|Romance|War
Canadian Bacon (1995) Comedy|War
Desperado (1995) Action|Romance|Western
Billy Madison (1995) Comedy
Dumb & Dumber (Dumb and Dumber) (1994) Adventure|Comedy
Star Wars: Episode IV - A New Hope (1977) Action|Adventure|Sci-Fi
Tommy Boy (1995) Comedy
Jungle Book, The (1994) Adventure|Children|Romance
Fugitive, The (1993) Thriller
Schindler's List (1993) Drama|War
Tombstone (1993) Action|Drama|Western
Pinocchio (1940) Animation|Children|Fantasy|Musical
Fargo (1996) Comedy|Crime|Drama|Thriller
James and the Giant Peach (1996) Adventure|Animation|Children|Fantasy|Musical
Wizard of Oz, The (1939) Adventure|Children|Fantasy|Musical
Citizen Kane (1941) Drama|Mystery
Adventures of Robin Hood, The (1938) Action|Adventure|Romance
Mr. Smith Goes to Washington (1939) Drama
Winnie the Pooh and the Blustery Day (19