# Système de recommandation

In [1]:
import math
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from scipy.spatial.distance import pdist, squareform
from sklearn.metrics import mean_squared_error
from sklearn.metrics import pairwise_distances
from sklearn.neighbors import NearestNeighbors

## 1.Chargement des données

In [2]:
df_ratings = pd.read_csv(
    'Reviews.csv',
    usecols=['customer', 'productId', 'rating'],
    dtype={'customer': 'int32', 'productId': 'int32', 'rating': 'float32'})

df_products = pd.read_csv(
    'Products.csv',
    usecols=['productId', 'title'],
    dtype={'productId': 'int32', 'title': 'str'})

## 2. action sur les données 'Products'

In [3]:
df_products.shape

(549, 2)

In [4]:
df_products.head()

Unnamed: 0,productId,title
0,620,Toddler Boys Camo Print Hoodie & Sweatpants
1,619,SHEIN Girls Solid Cami Top & Belted Pants Set
2,618,SHEIN Girls Colorblock Slogan Graphic Pullover
3,617,SHEIN Girls Drop Shoulder Plaid Sweatshirt
4,616,SHEIN Girls Drop Shoulder Letter Hoodie & Camo...


In [5]:
# obtenir le titre de produit a prtir de son Id
def productTitle(productId):
    for ligne in df_products.values:
        if ligne[0]==productId:
            return ligne[1]

In [6]:
productTitle(620)

'Toddler Boys Camo Print Hoodie & Sweatpants'

## 3. action sur les données 'Ratings'

In [7]:
df_ratings.shape

(1781, 3)

In [8]:
df_ratings.head()

Unnamed: 0,customer,productId,rating
0,1,541,4.0
1,1,540,4.0
2,1,539,5.0
3,1,538,3.0
4,1,537,3.0


In [9]:
# matrice product_user
product_user_mat = df_ratings.pivot(index='productId', columns='customer', values='rating').fillna(0)
product_user_mat.T.head()       # Transposé du matrice product_user       

productId,48,49,50,51,52,53,54,55,56,57,...,611,612,613,614,615,616,617,618,619,620
customer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,2.0,2.0,4.0,1.0,4.0,3.0,2.0,4.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,1.0,4.0,4.0,4.0,1.0,4.0,1.0,5.0
3,4.0,2.0,4.0,3.0,5.0,4.0,2.0,3.0,3.0,4.0,...,4.0,2.0,2.0,3.0,5.0,5.0,5.0,1.0,5.0,2.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# identifier les produits évalué par chaque utilisateur
rate = {}
ligne_idx ={}
for idUser, ligne in product_user_mat.T.iterrows():
    ligneIdx = [x for x in range(0, len(product_user_mat.T.columns))]  #[0,1,.....] pour 
    combine_PrdId_Rat_idx = list(zip(ligne.index, ligne.values, ligneIdx))   # [(productid, rating, index),....]
    rated = [(prdId, idx) for prdId, rat, idx in combine_PrdId_Rat_idx if rat !=0 ]
    ligne_prdTitleReted = [r[0] for r in rated]
    ligne_index = [r[1] for r in rated]
    rate[idUser] = ligne_prdTitleReted
    ligne_idx[idUser] = ligne_index

In [11]:
# Afficher 10 produits évalué par l'utilisateur i
i=1
for prdId in rate[i][:10]:
    print(productTitle(prdId))

Sweater Dress Winter
Beach Dress Woman
Long Party Dresses
new women's hollow slim dress
Midi Dresses Spring Women
Long beaut Dress Elegant
white lace dress women
Autumn New Dresses Classic
Sweater Long Dresses
Cute Large Dress Big Robe


In [12]:
# identifier les produits qui ne sont pas évalué par chaque utilisateur
norate = {}
ligne_idx2 ={}
for idUser, ligne in product_user_mat.T.iterrows():
    ligneIdx = [x for x in range(0, len(product_user_mat.T.columns))]  #[0,1,.....] pour 
    combine_PrdId_Rat_idx = list(zip(ligne.index, ligne.values, ligneIdx))   # [(productid, rating, index),....]
    rated = [(prdId, idx) for prdId, rat, idx in combine_PrdId_Rat_idx if rat ==0 ]
    ligne_prdTitleReted = [r[0] for r in rated]
    ligne_index = [r[1] for r in rated]
    norate[idUser] = ligne_prdTitleReted
    ligne_idx2[idUser] = ligne_index

In [13]:
# Afficher 10 produits non évalué par l'utilisateur i
i=1
for prdId in norate[i][:10]:
    print(productTitle(prdId))     

Men Halloween Pumpkin Design Stud Earrings
Men Halloween Skull Charm Necklace
Men Bat Decor Stud Earrings
Men Spider Design Earring Jackets
Men Rhinestone Decor Skull Earring Jackets
2pcs Men Eye Decor Ring
Men Halloween Pumpkin Design Stud Earrings
Men Rhinestone Decor Skull Design Ring
2pcs Men Eye Design Ring
Men Skull Decor Mittens Bracelet


In [14]:
product_user_mat.head()

customer,1,2,3,4,6,8
productId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
48,1.0,0.0,4.0,0.0,0.0,1.0
49,2.0,0.0,2.0,0.0,0.0,3.0
50,2.0,0.0,4.0,0.0,0.0,5.0
51,4.0,0.0,3.0,0.0,0.0,3.0
52,1.0,0.0,5.0,0.0,0.0,2.0


In [15]:
# obtenir le liste de rating d'un produit a prtir son ProductId
def productRat(productId):
    ratings ={}
    for prdId, ligne in product_user_mat.iterrows():
        ratings[prdId] = ligne.values
    return list(ratings[productId])

In [16]:
productRat(184)

[1.0, 0.0, 1.0, 0.0, 0.0, 1.0]

# Application de L'algorithme

In [17]:
def cosine_sim(X, Y):
    c=0; rx=0; ry=0
    if len(X) == len(Y):
        for i in range(len(X)):
            c += X[i]*Y[i] 
            rx += X[i]**2
            ry += Y[i]**2
        return c / ((rx*ry)**0.5)

In [18]:
# Matrice de similarité entre les produits
dists = pdist(product_user_mat, cosine_sim)
sim_Mtx = pd.DataFrame(squareform(dists), columns=product_user_mat.index, index=product_user_mat.index)

In [19]:
def make_recommendation(n_prdtRec, userId):
    """
    Parameters
    ----------
    n_prdtRec  : Nombre de produits à recommander
    userId     : L'utilisateur pour lequel on va commander les articles

    Return
    ------
    liste des n produit à recommander
    """
    # liste des Prédictions de rating pour les produits non evalué par l'utilistater 'userId'
    R=[]
    for noratedPrdtId in norate[userId]:
        S1=0; S2=0
        for ratedPrdtId in rate[userId]:
            r=product_user_mat[userId][ratedPrdtId]
            S1 += sim_Mtx[noratedPrdtId][ratedPrdtId]*r
            S2 += sim_Mtx[noratedPrdtId][ratedPrdtId]
        if S2 == 0:
            ri = 0
        else:
            ri = S1 / S2
        R.append((noratedPrdtId, productTitle(noratedPrdtId), ri))
        
    # trier la liste
    for i in range(len(R)-1):
        for j in range(i,len(R)):
            if R[i][2]<R[j][2]:
                ECH=R[i]
                R[i]=R[j]
                R[j]=ECH
    
    #affichage de 'n_prdtRec' produits
    print(f"Les {n_prdtRec} produits recommandé  pour l'utilisateur {userId} :")
    for prdId, title, rating in R[:n_prdtRec]:
        print(f'{prdId} : {title} avec un rating = {rating}')

In [20]:
make_recommendation(
    n_prdtRec = 10,
    userId = 6
)

Les 10 produits recommandé  pour l'utilisateur 6 :
526 : Men Solid Flap Pocket Cargo Pants avec un rating = 3.0052532702747503
539 : Men Contrast Camo Drawstring Waist Pants avec un rating = 3.0023575904315583
531 : Men Striped Print Symmetrical Pocket Tailored Pants avec un rating = 2.9958023294818226
528 : Men Colorful Splash Ink Print Pants avec un rating = 2.9912310130487354
533 : Men Fire Print Drawstring Sweatpants avec un rating = 2.9891984055802174
530 : Men Contrast Panel Letter Patched Sweatpants avec un rating = 2.9891984055802174
540 : Men Pocket Side Tailored Pants avec un rating = 2.9891984055802174
541 : SHEIN Men Zipper Fly Allover Print Tailored Pants avec un rating = 2.9861473588983767
535 : Men Tribal Print Harem Pants avec un rating = 2.980073257967493
529 : Men Solid Slant Pocket Tailored Pants avec un rating = 2.9747521347762387
