In [103]:
import pandas as pd
import numpy as np


In [104]:
# Load the uploaded dataset

# Charger les fichiers CSV
ratings = pd.read_csv('../data/ratings.csv')
ratings = ratings.drop('timestamp', axis=1)
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [105]:
df = pd.DataFrame(ratings)

# Utiliser pivot pour réorganiser les données
rating_matrix = df.pivot(index='userId', columns='movieId', values='rating')
rating_matrix.fillna(0, inplace=True)
# Afficher le DataFrame résultant
rating_matrix.head()

# Sauvegarder le DataFrame en CSV sous le nom "ratings_matrix.csv"
#rating_matrix.to_csv('ratings_matrix.csv', index=False)

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [106]:
from tqdm import tqdm

def matrix_factorization(R, K, num_iterations, alpha, beta):
    """
    Implémente la Matrix Factorization avec Gradient Descent.
    
    Arguments:
        R : np.array, matrice d'évaluations (users x items), valeurs manquantes doivent être 0
        K : int, nombre de facteurs latents
        steps : int, nombre d'itérations
        alpha : float, taux d'apprentissage
        beta : float, coefficient de régularisation

    Retourne:
        P : np.array, matrice des utilisateurs (users x K)
        Q : np.array, matrice des items (items x K)
    """
    # Dimensions de la matrice
    num_users, num_items = R.shape

    # Initialisation aléatoire des matrices P (users x K) et Q (items x K)
    P = np.random.rand(num_users, K)
    Q = np.random.rand(num_items, K)
    
    for iteration in tqdm(range(num_iterations), desc="Matrix Factorization Progress"):
       
        for i in range(num_users):
            for j in range(num_items):
                if R[i, j] > 0:
                    eij = R[i, j] - np.dot(P[i, :], Q[j, :].T)
                    for k in range(K):
                        P[i, k] += alpha * (2 * eij * Q[j, k] - beta * P[i, k])     

        for j in range(num_items):
            for i in range(num_users):
                if R[i, j] > 0:
                    eij = R[i, j] - np.dot(P[i, :], Q[j, :].T)
                    for k in range(K):
                        Q[j, k] += alpha * (2 * eij * P[i, k] - beta * Q[j, k])
    return P, Q

R = rating_matrix.to_numpy()
K = 30
num_iterations = 10
alpha = 0.01
beta = 0.01
P, Q = matrix_factorization(R, K, num_iterations, alpha, beta)
R_approx = np.dot(P, Q.T)

print(R_approx[:5, :5])

Matrix Factorization Progress:   0%|          | 0/10 [00:00<?, ?it/s]

Matrix Factorization Progress: 100%|██████████| 10/10 [01:04<00:00,  6.48s/it]

[[3.79183131 4.4446826  3.6842954  3.91743397 3.99362813]
 [2.83505649 3.63005797 2.56384606 3.10494065 2.22041419]
 [1.59418716 2.89024856 2.02750374 3.24684832 2.85436825]
 [4.4869823  4.62125059 3.63828255 1.70832954 3.40521595]
 [3.47925024 3.88183094 2.42851683 3.33823786 3.18622625]]





In [107]:
print(R_approx)

[[3.79183131 4.4446826  3.6842954  ... 5.61981773 5.74294286 4.12422214]
 [2.83505649 3.63005797 2.56384606 ... 4.23558281 3.37183998 3.58553665]
 [1.59418716 2.89024856 2.02750374 ... 4.05887058 2.44801116 3.36115743]
 ...
 [2.66590446 3.87144297 2.51064    ... 4.9949117  3.86404581 4.21476153]
 [3.24110999 3.06932417 3.00295044 ... 4.23841602 4.59007547 3.12946484]
 [4.80060156 4.06735555 3.62944556 ... 4.87814947 3.78135713 3.49268302]]


In [108]:
# Function to recommend top N items for each user
def recommend_top_n(R_approx, num_recommendations=5):
    recommendations = {}

    for user_index in range(R_approx.shape[0]):
        user_ratings = R_approx[user_index]
        unseen_items = np.where(R[user_index] == 0.)[0]
        sorted_unseen_items = unseen_items[np.argsort(user_ratings[unseen_items])[::-1]]
        recommendations[user_index] = sorted_unseen_items[:num_recommendations]
    return recommendations

In [109]:
# Charger les fichiers CSV
movies = pd.read_csv('../data/movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [110]:
# Recommend top 5 movies for each user
top_n_recommendations = recommend_top_n(R_approx, num_recommendations=5)

for user_index, recommended_items in top_n_recommendations.items():
    print(f"User {user_index+1} recommended items: {[movies[movies['movieId'] == rating_matrix.columns[item]]['title'].values[0] for item in recommended_items]}")
    

User 1 recommended items: ['Louis Theroux: Law & Disorder (2008)', 'Venus in Fur (La Vénus à la fourrure) (2013)', 'Come and See (Idi i smotri) (1985)', 'Bad Boy Bubby (1993)', 'Calamity Jane (1953)']
User 2 recommended items: ['You Will Meet a Tall Dark Stranger (2010)', 'Rudderless (2014)', 'Wild Tales (2014)', 'Johnny Stecchino (1991)', 'Tales from the Darkside: The Movie (1990)']
User 3 recommended items: ['Kill List (2011)', 'Dragonslayer (1981)', 'The Angry Birds Movie (2016)', 'Red Riding Hood (2011)', 'Dust (2001)']
User 4 recommended items: ['China Syndrome, The (1979)', 'Return of the Pink Panther, The (1975)', 'Superman/Batman: Public Enemies (2009)', 'Deathgasm (2015)', "Where's Marlowe? (1998)"]
User 5 recommended items: ['Young Frankenstein (1974)', 'Big Lebowski, The (1998)', 'Sessions, The (Surrogate, The) (2012)', 'Jim & Andy: The Great Beyond (2017)', 'George Harrison: Living in the Material World (2011)']
User 6 recommended items: ['Man with the Iron Fists, The (2012

In [111]:
# Recommend top 5 movies for each user

for user_index, recommended_items in top_n_recommendations.items():
    print(f"User {user_index+1} recommended items: {[movies[movies['movieId'] == rating_matrix.columns[item]]['genres'].values[0] for item in recommended_items]}")

User 1 recommended items: ['Documentary', 'Drama', 'Drama|War', 'Drama', 'Musical|Western']
User 2 recommended items: ['Comedy|Romance', 'Comedy|Drama', 'Comedy|Drama|Thriller', 'Comedy', 'Fantasy|Horror|Thriller']
User 3 recommended items: ['Horror|Mystery|Thriller', 'Action|Adventure|Fantasy', 'Animation|Comedy', 'Fantasy|Horror|Mystery|Thriller', 'Drama|Western']
User 4 recommended items: ['Drama|Thriller', 'Comedy|Crime', 'Action|Animation|Fantasy', 'Comedy|Horror', 'Comedy']
User 5 recommended items: ['Comedy|Fantasy', 'Comedy|Crime', 'Drama', 'Documentary', 'Documentary']
User 6 recommended items: ['Action|Adventure|Crime', 'Comedy|Horror|Sci-Fi', 'Drama|Romance|Western', 'Comedy|Romance', 'Adventure|Animation|Children|Comedy']
User 7 recommended items: ['Drama|Romance', 'Documentary', 'Adventure|Comedy|Romance', 'Adventure|Drama|Sci-Fi', 'Drama|Mystery']
User 8 recommended items: ['Comedy', 'Action|Drama|War', 'Adventure|Drama|Romance', 'Drama|Mystery|Romance|Thriller', 'Comedy|

In [113]:
#Meilleur avis d'un user
ratings_user1_above_4 = ratings[(ratings['userId'] == 1) & (ratings['rating'] > 4)]['movieId'].tolist()
print(len(ratings_user1_above_4))
for i in ratings_user1_above_4:
    print(movies[movies['movieId'] == i]['title'].values[0],movies[movies['movieId'] == i]['genres'].values[0])

124
Seven (a.k.a. Se7en) (1995) Mystery|Thriller
Usual Suspects, The (1995) Crime|Mystery|Thriller
Bottle Rocket (1996) Adventure|Comedy|Crime|Romance
Rob Roy (1995) Action|Drama|Romance|War
Canadian Bacon (1995) Comedy|War
Desperado (1995) Action|Romance|Western
Billy Madison (1995) Comedy
Dumb & Dumber (Dumb and Dumber) (1994) Adventure|Comedy
Star Wars: Episode IV - A New Hope (1977) Action|Adventure|Sci-Fi
Tommy Boy (1995) Comedy
Jungle Book, The (1994) Adventure|Children|Romance
Fugitive, The (1993) Thriller
Schindler's List (1993) Drama|War
Tombstone (1993) Action|Drama|Western
Pinocchio (1940) Animation|Children|Fantasy|Musical
Fargo (1996) Comedy|Crime|Drama|Thriller
James and the Giant Peach (1996) Adventure|Animation|Children|Fantasy|Musical
Wizard of Oz, The (1939) Adventure|Children|Fantasy|Musical
Citizen Kane (1941) Drama|Mystery
Adventures of Robin Hood, The (1938) Action|Adventure|Romance
Mr. Smith Goes to Washington (1939) Drama
Winnie the Pooh and the Blustery Day (19