In [1]:
import pandas as pd

books = pd.read_csv("data/books.csv")
ratings = pd.read_csv("data/ratings.csv")

## Collaborative Filtering by model (SVD)

In [2]:
import numpy as np
from scipy.sparse.linalg import svds
from scipy.linalg import sqrtm

# Création de la table user // book avec les notes déjà données
R_df = ratings.pivot(index="user_id",columns="book_id",values="rating").fillna(0)

# on récupère seulement les valeurs pour le svds
R = R_df.values

# Calcul la moyenne des notes mise par chaque user, pour la retirer (on la rajoutera à la fin)
user_ratings_mean = np.mean(R, axis=1)
user_ratings_mean = np.float16(user_ratings_mean)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

latent_dimension = 90 # Après une étude sur cette valeur (de 90 à 110) et sur l'erreur myenne et écart type prediction - note réelle, on a décidé de prendre 90 (rapport erreur/temps de calcul)

U, sigma, Vt = svds(R_demeaned, k=latent_dimension) 

sigma = np.diag(sigma)
s_root = sqrtm(sigma)

Usk = np.dot(U, s_root)
Usk = np.float16(Usk) # Passage float64 à float16

skV = np.dot(s_root, Vt)
skV = np.float16(skV) # Passage float64 à float16

predicted_rating = np.dot(Usk, skV) # Calcul des notes de chaque user pour chaque livre

predicted_rating = predicted_rating + user_ratings_mean.reshape(-1, 1) # On rajoute la moyenne

preds_df = pd.DataFrame(predicted_rating, columns=R_df.columns, index=R_df.index)

[0.042  0.0287 0.0158 0.0505 0.0404 0.0389 0.0592 0.035  0.0455 0.048 ]
[0.042  0.0287 0.0158 0.0505 0.0404 0.0389 0.0592 0.035  0.0455 0.048 ]


In [7]:
def get_book_name(book_id):
    # Fonction qui récupère le titre du livre grâce à son id
    book_name = books["original_title"][books["book_id"]==book_id].item()
    return book_name

def recommandation(user, preds_df=preds_df):
    # Fonction qui, pour un utilisateur, renvoie la liste de ses 10 livres préférés (meilleures notes) ainsi que les 10 livres qui lui sont le plus recommandés

    list_recommandations = []

    # Récupère la ligne qui correspond à l'user dans les notes originales et les notes prédites
    original_ratings = R_df.loc[user]
    pred_ratings = preds_df.loc[user]

    prefered_books = pred_ratings[original_ratings!=0].sort_values(axis=0, ascending=False)
    pred_ratings = pred_ratings[original_ratings==0].sort_values(axis=0, ascending=False) # On garde les note qui n'étaient pas présentes dans les notes originales et on les trie

    # On récupère les indices (= id) des 10 premiers 
    list_prefered = list(prefered_books[0:10].index)
    list_recommandations = list(pred_ratings[0:10].index)

    # On récupère leurs noms
    list_prefered = list(map(get_book_name,list_prefered))
    list_recommandations = list(map(get_book_name,list_recommandations))

    return list_prefered, list_recommandations

for i in range(1,10):
    list_prefered, list_recommandations = recommandation(user=i)
    print("User n°",i)
    print("Livres préférés :")
    print(list_prefered)

    print("\nLivres recommandés :")
    print(list_recommandations)
    print("_______________\n")


1
Livres préférés :
['To Kill a Mockingbird', 'Pride and Prejudice', 'The Kite Runner ', 'Life of Pi', 'Memoirs of a Geisha', 'The Help', 'Män som hatar kvinnor', 'O Alquimista', 'A Thousand Splendid Suns', 'The Giver']

Livres recommandés :
['Анна Каренина', 'Cutting for Stone', 'The Guernsey Literary and Potato Peel Pie Society', 'A Tree Grows In Brooklyn ', 'Le Petit Prince', 'Wuthering Heights', 'The Joy Luck Club', 'Brave New World', 'A Tale of Two Cities', 'Tuesdays with Morrie']
_______________

2
Livres préférés :
['Harry Potter and the Prisoner of Azkaban', 'The Great Gatsby', "Harry Potter and the Philosopher's Stone", 'Harry Potter and the Goblet of Fire', 'Harry Potter and the Chamber of Secrets', 'Harry Potter and the Half-Blood Prince', 'Harry Potter and the Deathly Hallows', 'Harry Potter and the Order of the Phoenix', 'Het Achterhuis: Dagboekbrieven 14 juni 1942 - 1 augustus 1944', 'The Tipping Point: How Little Things Can Make a Big Difference']

Livres recommandés :
[

In [4]:
preds_df

book_id,1,2,3,4,5,6,7,8,9,10,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.243652,0.394531,0.032990,5.101562,0.219727,-0.221436,0.444092,0.032410,0.101807,4.804688,...,0.049500,-0.006897,-0.018097,-0.010223,-0.006439,0.010010,-0.010406,-0.017578,0.007507,-0.017761
2,-0.338379,5.148438,-0.010025,-0.271240,5.234375,-0.191528,0.436768,3.632812,0.831055,3.222656,...,-0.001633,-0.007553,0.008713,-0.010025,-0.012222,0.018219,0.047974,0.007187,0.037170,0.031097
3,0.018753,0.186157,-0.088928,2.826172,0.086121,0.092041,0.129883,0.034851,-0.050598,0.473145,...,0.023895,0.002213,0.008408,0.007156,0.021759,0.003883,0.003082,0.005463,0.016663,0.007797
4,-0.185669,5.359375,-0.264893,3.792969,4.285156,-0.216309,4.261719,4.273438,0.791992,4.039062,...,-0.017181,-0.003052,-0.029633,0.000946,0.006256,0.013000,-0.029999,0.006378,-0.009766,-0.020660
5,-0.230103,0.165527,0.065063,0.099243,-0.050232,2.140625,0.025482,0.027954,-0.088379,0.077637,...,0.018005,0.014191,0.032166,0.014862,0.022675,0.095459,0.025696,0.028610,0.029770,0.024216
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53420,4.636719,4.652344,4.730469,-0.105164,0.989258,0.070068,0.202881,1.127930,3.398438,2.482422,...,-0.020325,-0.026550,0.005997,0.008163,-0.003845,0.012665,-0.015411,-0.002319,0.005905,-0.007019
53421,5.078125,3.910156,0.507812,4.835938,3.007812,1.063477,4.128906,1.019531,4.652344,0.668945,...,0.002502,0.054871,0.033936,-0.007324,0.073914,-0.004852,0.031982,0.077026,-0.017242,0.070312
53422,3.632812,5.125000,0.058960,0.115845,-0.086182,-0.167358,5.421875,0.143921,0.380859,3.427734,...,-0.027344,-0.058838,-0.006531,0.060272,-0.003662,0.017181,0.000031,0.010071,0.003998,0.010803
53423,2.507812,4.902344,0.581055,5.125000,0.575195,0.903320,4.457031,2.824219,-0.213257,0.492188,...,0.000092,0.024078,0.007660,0.058228,-0.004669,-0.009644,0.008133,0.010040,0.009293,0.012863


In [5]:
R_df

book_id,1,2,3,4,5,6,7,8,9,10,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,5.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,5.0,0.0,4.0,4.0,0.0,4.0,4.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53420,4.0,5.0,3.0,0.0,2.0,0.0,0.0,0.0,4.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53421,4.0,5.0,0.0,5.0,4.0,0.0,4.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53422,4.0,5.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53423,4.0,5.0,0.0,5.0,0.0,0.0,5.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
