## Importation des librairies et transformation des données

## Création de la table des produits

In [56]:
import pandas as pd
import pickle as pk
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
#Chargement du dataframe
df_customer = pd.read_csv('./csv/shopping_behavior_updated.csv', sep=";")

#On récupère l'ensemble des valeurs unique des produits
u_products = df_customer['Item Purchased'].str.split(', ', expand = True).stack().unique()

#On créer un dictionnaire qui représente la table d'achats des produits
data = {}

#On modifie la valeur stocké si le client a acheté ce produit
for i, row in df_customer.iterrows():
    items = row['Item Purchased'].split(', ')
    for item in items:
        if item not in data:
            data[item] = [0] * len(df_customer)
        data[item][i] += 1
df_product_table = pd.DataFrame(data)
df_product_table.to_csv('./csv/product_table.csv', sep=";", index=False)

print(df_product_table)

df_customer.drop(columns=['Item Purchased'], inplace=True)
df_customer.drop(columns=['Customer ID'], inplace=True)
df_customer.drop(columns=['Category'], inplace=True)
df_customer.drop(columns=['Purchase Amount (USD)'], inplace=True)
df_customer.drop(columns=['Previous Purchases'], inplace=True)
df_customer.drop(columns=['Color'], inplace=True)
df_customer.drop(columns=['Review Rating'], inplace=True)
df_customer.drop(columns=['Size'], inplace=True)

print(df_customer.head())

      Blouse  Sweater  Jeans  Sandals  Sneakers  Shirt  Shorts  Coat  Handbag  \
0          1        0      0        0         0      0       0     0        0   
1          0        1      0        0         0      0       0     0        0   
2          0        0      1        0         0      0       0     0        0   
3          0        0      0        1         0      0       0     0        0   
4          1        0      0        0         0      0       0     0        0   
...      ...      ...    ...      ...       ...    ...     ...   ...      ...   
3895       0        0      0        0         0      0       0     0        0   
3896       0        0      0        0         0      0       0     0        0   
3897       0        0      0        0         0      0       0     0        0   
3898       0        0      0        0         0      0       0     0        0   
3899       0        0      0        0         0      0       0     0        1   

      Shoes  ...  Hoodie  J

## Encodage des données

In [57]:
#Indexe des colonnes catégorielles (Gender,Location, Subscription Status, Frequency of Purchases)
cols_cat_info = [1,2,3,4,5,6,7,8,9]

#On créer une copie de notre dataframe source
df_grouped_customer = df_customer.copy()

#On supprime la colonne des achats

a_grouped_customer = df_grouped_customer.values
#Encodage des colonnes catégorielles
label_encoders_info = [LabelEncoder() for _ in range(len(cols_cat_info))]
for i, col_idx in enumerate(cols_cat_info):
    a_grouped_customer[:, col_idx] = label_encoders_info[i].fit_transform(a_grouped_customer[:, col_idx])

with open('./pickles/label_encoders_info_test.pkl', 'wb') as f:
    pk.dump(label_encoders_info, f)
#Print des données encodées
print(a_grouped_customer[0])



[55 1 16 1 3 3 1 1 1 5]


## Initialisation du modèle

In [58]:
from sklearn.multioutput import RegressorChain
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

a_product_table = df_product_table.values
print(a_grouped_customer[0])
#Initialisation du modèle
chained_model = RegressorChain(RandomForestRegressor(n_estimators=100, random_state=1), random_state=1)
chained_model.fit(a_grouped_customer, a_product_table)

y_pred = chained_model.predict(a_grouped_customer)
print("MSE : ",mean_squared_error(a_product_table, y_pred))
print(y_pred[0])

y_new_pred = chained_model.predict([[20, 2, 16, 1,1 , 2, 1,1, 1, 2]])
print(y_new_pred)


with open('./pickles/predictionModelTest.pkl', 'wb') as f:
    pk.dump(chained_model, f)
    

[55 1 16 1 3 3 1 1 1 5]
MSE :  0.005629907692307692
[0.69 0.01 0.   0.02 0.03 0.   0.01 0.01 0.01 0.   0.02 0.02 0.03 0.02
 0.03 0.01 0.   0.02 0.03 0.   0.   0.   0.03 0.01 0.01]
[[0.01 0.19 0.1  0.02 0.05 0.02 0.04 0.21 0.13 0.02 0.   0.01 0.   0.2
  0.09 0.15 0.07 0.04 0.02 0.12 0.05 0.09 0.35 0.13 0.35]]


In [59]:
df_customer = pd.read_csv('./csv/shopping_behavior_updated.csv', sep=";")

#On récupère l'ensemble des valeurs unique des produits
u_products = df_customer['Item Purchased'].str.split(', ', expand = True).stack().unique()

def predictedProduct(uniqueProduct, y_new_pred):
    products = y_new_pred[0]
    idProductRecommand = np.argsort((-products))
    list = [uniqueProduct[i] for i in idProductRecommand[:3]]
    return list

print(predictedProduct(u_products, y_new_pred))

['Gloves', 'Belt', 'Coat']
