In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
#Chargement du dataframe
df_customer = pd.read_csv('./csv/shopping_behavior_updated.csv', sep=";")

u_products = df_customer['Item Purchased'].unique()
print("Unique products")
print(u_products)

#On regroupe notre dataframe en fonction des colonnes Age, Gender, Location, Subscription Status, Frequency of Purchases
df_customer_info_grouped = df_customer.groupby(['Age','Gender', 'Location', 'Subscription Status','Frequency of Purchases'])

def group_values (series):
    return list(series)
#On regroupe les produits achetés par les clients
aggregated_product = df_customer.groupby(['Age','Gender', 'Location', 'Subscription Status','Frequency of Purchases']).agg({
    'Item Purchased': group_values,
    'Review Rating' : group_values
}).reset_index()



#On créer notre dataframe correspondant au regroupement des produits achetés par les clients
df_grouped_customer_purchase = pd.DataFrame(aggregated_product)

print("Aggregated product")
print(df_grouped_customer_purchase)

df_grouped_customer_purchase.to_csv('./csv/grouped_customer_purchase_review.csv', sep=';', index=False)

Unique products
['Blouse' 'Sweater' 'Jeans' 'Sandals' 'Sneakers' 'Shirt' 'Shorts' 'Coat'
 'Handbag' 'Shoes' 'Dress' 'Skirt' 'Sunglasses' 'Pants' 'Jacket' 'Hoodie'
 'Jewelry' 'T-shirt' 'Scarf' 'Hat' 'Socks' 'Backpack' 'Belt' 'Boots'
 'Gloves']
Aggregated product
      Age  Gender    Location Subscription Status Frequency of Purchases  \
0      18  Female      Alaska                  No                 Weekly   
1      18  Female    Illinois                  No            Fortnightly   
2      18  Female      Kansas                  No               Annually   
3      18  Female    Kentucky                  No            Fortnightly   
4      18  Female    Maryland                  No               Annually   
...   ...     ...         ...                 ...                    ...   
3744   70    Male       Texas                  No         Every 3 Months   
3745   70    Male       Texas                 Yes                Monthly   
3746   70    Male     Vermont                  No     

## Reorganisation des données

In [12]:
for values in df_grouped_customer_purchase['Item Purchased']:
    temp = ""
    for value in values:
        temp += value + ", "
    values = temp
    print(values)
    
df_grouped_customer_purchase['Review Rating'] = df_grouped_customer_purchase['Review Rating'].apply(lambda x: ', '.join(map(str,x)))

temp = df_grouped_customer_purchase['Review Rating']
df_grouped_customer_purchase['Item Purchased'] = df_grouped_customer_purchase['Item Purchased'].apply(lambda x: ', '.join(map(str,x)))
df_grouped_customer_purchase['Review Rating'] = temp

print(df_grouped_customer_purchase)

Shorts, 
Shirt, 
Socks, 
Coat, 
Dress, 
Boots, 
Shirt, 
Handbag, 
Sunglasses, 
Handbag, 
Sunglasses, 
Hat, 
Belt, 
Dress, 
Backpack, 
Scarf, 
Scarf, 
Jacket, 
Shorts, 
Jeans, 
Shirt, 
Shirt, 
Gloves, 
Shorts, 
Belt, 
Dress, 
Jacket, 
Scarf, 
Dress, 
Sneakers, 
Sweater, Jeans, 
Sneakers, 
Jacket, 
Pants, 
Socks, 
T-shirt, 
Gloves, 
Shirt, 
Jewelry, 
Sweater, 
Jacket, 
Coat, 
Sunglasses, 
Skirt, 
Sneakers, 
Coat, 
Shirt, 
Gloves, 
Skirt, 
Blouse, 
Socks, 
Scarf, 
Socks, 
Jacket, 
Boots, 
Coat, 
Sweater, 
Coat, 
Dress, 
Boots, 
Skirt, 
Sandals, 
Sandals, 
Hoodie, 
Socks, 
Gloves, 
Blouse, 
Skirt, 
Scarf, 
Sneakers, 
Shoes, 
Skirt, 
Shirt, 
Sweater, 
Backpack, 
Hat, Jewelry, 
Sunglasses, 
Sandals, 
Coat, 
Blouse, 
Coat, 
Sunglasses, 
Dress, 
Sunglasses, 
Dress, 
Blouse, 
Handbag, 
Hoodie, 
Jacket, 
Sweater, 
Skirt, 
Coat, 
Coat, 
Jewelry, 
Shoes, 
Skirt, 
Sweater, 
Pants, 
Coat, 
Belt, 
Boots, 
Shoes, 
Pants, 
Pants, 
Socks, 
Sneakers, 
Jeans, 
Sweater, 
Gloves, 
Dress, 
Sunglasses, 
Handb

In [15]:
#On récupère l'ensemble des valeurs unique des produits
u_products = df_grouped_customer_purchase['Item Purchased'].str.split(', ', expand = True).stack().unique()

#On créer un dictionnaire qui représente la table d'achats des produits
data = {}
dataReview = {}

#On modifie la valeur stocké si le client a acheté ce produit
for i, row in df_grouped_customer_purchase.iterrows():
    items = row['Item Purchased'].split(', ')
    reviews = row['Review Rating'].split(', ')
    indice = 0
    for item in items:
        if item not in data:
            data[item] = [0] * len(df_grouped_customer_purchase)
            dataReview[item + "_review"] = [0] * len(df_grouped_customer_purchase)
        data[item][i] += 1
        if dataReview[item + "_review"][i] == 0:
            dataReview[item + "_review"][i] = float(reviews[indice])
        else:
            dataReview[item + "_review"][i] = (float(reviews[indice]) + dataReview[item + "_review"][i])/2
        indice += 1 
        

        
df_product_table = pd.DataFrame(data)
df_review_table = pd.DataFrame(dataReview)

df_concat = pd.concat([df_grouped_customer_purchase, df_product_table, df_review_table], axis=1)

columns = [col for pair in zip(df_product_table.columns, df_review_table.columns) for col in pair]
df_concat = df_concat.reindex(columns=columns)
print(df_concat)

df_product_table.to_csv('./csv/product_table.csv', sep=';', index=False)

      Shorts  Shorts_review  Shirt  Shirt_review  Socks  Socks_review  Coat  \
0          1            3.5      0           0.0      0           0.0     0   
1          0            0.0      1           3.7      0           0.0     0   
2          0            0.0      0           0.0      1           3.5     0   
3          0            0.0      0           0.0      0           0.0     1   
4          0            0.0      0           0.0      0           0.0     0   
...      ...            ...    ...           ...    ...           ...   ...   
3744       0            0.0      0           0.0      0           0.0     0   
3745       0            0.0      0           0.0      0           0.0     0   
3746       0            0.0      0           0.0      0           0.0     0   
3747       0            0.0      0           0.0      0           0.0     0   
3748       0            0.0      0           0.0      0           0.0     0   

      Coat_review  Dress  Dress_review  ...  Skirt 