# Cluster Products

In [1]:
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
import pickle
import os

In [2]:
df = pd.read_csv("..{}data{}KaDo_Products.csv".format(os.sep, os.sep), sep=",")
df = df.drop(columns=df.columns[0], axis=1)

df.head()

Unnamed: 0,FAMILLE,MAILLE,UNIVERS,LIBELLE,PRIX_NET
0,MAQUILLAGE,MAQ_LEV_BASPRIX,MAQ_LEV BRILLANT,GLOSS TRANSPARENCE LU HV06 4.5ML,3.95
1,MAQUILLAGE,MAQ_LEV_BASPRIX,MAQ_LEV BRILLANT,GLOSS SEXYPULP CORAIL 01 CN3 10ML,9.95
2,SOINS DU VISAGE,VIS_JEUNE_ET_LEVRE,VIS_SOIN LEVRES,BAUME LEVR REPAIR REPACK 2012 4G,2.5
3,SOINS DU CORPS,CORPS_SPA_ET_MINCEUR,CORPS_SOINS HAMMAM,MASQUE VIS TRAD DE HAM T100ml,8.95
4,SOINS DU CORPS,CORPS_HYDR_LAIT_HUILE,CORPS_LAIT HUILE PARFUMS,LAIT ROSE FP FL200ML,4.45


In [3]:
df.shape[0]

1484

### Prepare DF

Using Categorical

In [4]:
CATEGORIES = []

CATEGORIES.extend(df["LIBELLE"].unique().tolist())
CATEGORIES.extend(df["FAMILLE"].unique().tolist())
CATEGORIES.extend(df["MAILLE"].unique().tolist())
CATEGORIES.extend(df["UNIVERS"].unique().tolist())

CATEGORIES = list(dict.fromkeys(CATEGORIES))

CATEGORIES[:5]

['GLOSS TRANSPARENCE LU HV06 4.5ML',
 'GLOSS SEXYPULP CORAIL 01 CN3 10ML',
 'BAUME LEVR REPAIR REPACK 2012 4G',
 'MASQUE VIS TRAD DE HAM T100ml',
 'LAIT ROSE FP FL200ML']

In [5]:
df["LIBELLE"] = pd.Categorical(df["LIBELLE"], categories=CATEGORIES)
df["LIBELLE"] = df["LIBELLE"].cat.codes

df["FAMILLE"] = pd.Categorical(df["FAMILLE"], categories=CATEGORIES)
df["FAMILLE"] = df["FAMILLE"].cat.codes

df["MAILLE"] = pd.Categorical(df["MAILLE"], categories=CATEGORIES)
df["MAILLE"] = df["MAILLE"].cat.codes

df["UNIVERS"] = pd.Categorical(df["UNIVERS"], categories=CATEGORIES)
df["UNIVERS"] = df["UNIVERS"].cat.codes

df.head(10)

Unnamed: 0,FAMILLE,MAILLE,UNIVERS,LIBELLE,PRIX_NET
0,1484,1493,1527,0,3.95
1,1484,1493,1527,1,9.95
2,1485,1494,1528,2,2.5
3,1486,1495,1529,3,8.95
4,1486,1496,1530,4,4.45
5,1487,1497,1531,5,9.9
6,1485,1494,1532,6,5.95
7,1488,1498,1533,7,6.45
8,1484,1499,1534,8,9.0
9,1484,1500,1535,9,3.0


### Train Model

In [6]:
kmeans = KMeans(n_clusters=20, n_init=100, max_iter=1000, algorithm="full")

kmeans.fit(df)

kmeans.labels_

array([2, 2, 2, ..., 3, 3, 3])

### Save Clusters

In [7]:
pickle.dump(kmeans, open("..{}models{}model_products_20.pkl".format(os.sep, os.sep), "wb"))


# Apply Model to Client

In [17]:
df_history = pd.read_csv("..{}data{}KaDo.csv".format(os.sep, os.sep), sep=",")
df_products = pd.read_csv("..{}data{}KaDo_Products.csv".format(os.sep, os.sep), sep=",")

df_products = df_products.drop(columns=df_products.columns[0], axis=1)
df_all = df_history.copy()
df_history.head()

Unnamed: 0,TICKET_ID,MOIS_VENTE,PRIX_NET,FAMILLE,UNIVERS,MAILLE,LIBELLE,CLI_ID
0,35592159,10,1.67,HYGIENE,HYG_DOUCHE JARDINMONDE,HYG_JDM,GD JDM4 PAMPLEMOUSSE FL 200ML,1490281
1,35592159,10,1.66,HYGIENE,HYG_DOUCHE JARDINMONDE,HYG_JDM,GD JDM4 PAMPLEMOUSSE FL 200ML,1490281
2,35592159,10,7.45,SOINS DU VISAGE,VIS_CJOUR Jeunes Specifique,VIS_JEUNE_ET_LEVRE,CR JR PARF BIO.SPE AC.SENT.50ML,1490281
3,35592159,10,5.95,SOINS DU VISAGE,VIS_DEMAQ AAAR,VIS_AAAR_DEMAQLOTION,EAU MICELLAIRE 3 THES FL200ML,1490281
4,35592159,10,1.67,HYGIENE,HYG_DOUCHE JARDINMONDE,HYG_JDM,GD JDM4 TIARE FL 200ML,1490281


#### Extract Client

In [18]:
CLIENT_ID = 984943411  # 984943411  # 13290776


In [19]:
df_history = df_history[df_history["CLI_ID"] == CLIENT_ID]

df_history.head(20)

Unnamed: 0,TICKET_ID,MOIS_VENTE,PRIX_NET,FAMILLE,UNIVERS,MAILLE,LIBELLE,CLI_ID
3643163,32991432,1,2.95,CAPILLAIRES,CAP_AP SHAMP,CAPILLAIRE_AUTRE,SVC ECLAT COULEUR AP SH 150ML,984943411
3643164,32991432,1,2.95,CAPILLAIRES,CAP_AP SHAMP,CAPILLAIRE_AUTRE,SVC ECLAT COULEUR AP SH 150ML,984943411
3643165,32991432,1,1.95,HYGIENE,HYG_DOUCHE FRAICHEUR VEG,HYG_AUTRES,GEL MOUSS THE VERT FV FL200 REF,984943411
3643166,32991432,1,17.5,PARFUMAGE,PARF_EAUX PARFUMS,PARF_PARFUM,EDT NATURELLE VAPO 75ML,984943411
3643167,32991432,1,1.95,HYGIENE,HYG_DOUCHE FRAICHEUR VEG,HYG_AUTRES,GEL MOUSS FL200 VERVEINE FV,984943411
3643168,33278153,2,2.95,CAPILLAIRES,CAP_AP SHAMP,CAPILLAIRE_AUTRE,SVC NUTRITION AP SH 150ML,984943411
3643169,33278153,2,2.95,CAPILLAIRES,CAP_AP SHAMP,CAPILLAIRE_AUTRE,SVC NUTRITION AP SH 150ML,984943411
3643170,33278153,2,1.95,HYGIENE,HYG_DOUCHE FRAICHEUR VEG,HYG_AUTRES,GEL MOUSS THE VERT FV FL200 REF,984943411
3643171,33278153,2,1.5,HYGIENE,HYG_DOUCHE JARDINMONDE,HYG_JDM,GD JDM4 THE VERT FL200ML,984943411
3643172,33278153,2,1.5,HYGIENE,HYG_DOUCHE JARDINMONDE,HYG_JDM,GD JDM4 THE VERT FL200ML,984943411


#### Prepare DF

In [20]:
df_history["LIBELLE"] = pd.Categorical(df_history["LIBELLE"], categories=CATEGORIES)
df_history["LIBELLE"] = df_history["LIBELLE"].cat.codes

df_history["FAMILLE"] = pd.Categorical(df_history["FAMILLE"],
                                       categories=CATEGORIES)
df_history["FAMILLE"] = df_history["FAMILLE"].cat.codes

df_history["MAILLE"] = pd.Categorical(df_history["MAILLE"], categories=CATEGORIES)
df_history["MAILLE"] = df_history["MAILLE"].cat.codes

df_history["UNIVERS"] = pd.Categorical(df_history["UNIVERS"], categories=CATEGORIES)
df_history["UNIVERS"] = df_history["UNIVERS"].cat.codes

df_history.head()

Unnamed: 0,TICKET_ID,MOIS_VENTE,PRIX_NET,FAMILLE,UNIVERS,MAILLE,LIBELLE,CLI_ID
3643163,32991432,1,2.95,1490,1603,1515,420,984943411
3643164,32991432,1,2.95,1490,1603,1515,420,984943411
3643165,32991432,1,1.95,1487,1562,1508,988,984943411
3643166,32991432,1,17.5,1489,1547,1509,1328,984943411
3643167,32991432,1,1.95,1487,1562,1508,251,984943411


#### Load Model

In [21]:
model = pickle.load(open("..{}models{}model_products_20.pkl".format(os.sep, os.sep), "rb"))

model.labels_

array([2, 2, 2, ..., 3, 3, 3])

#### Clusterize History

In [22]:
df_kmean = df_history.copy()
df_kmean = df_kmean[["FAMILLE", "MAILLE", "UNIVERS", "LIBELLE", "PRIX_NET"]]

distance = model.transform(df_kmean)
distance = [sum(elts) for elts in zip(*distance)]

min_value = min(distance)

client_cluster = distance.index(min_value)

client_cluster

4

#### Extract products corresponding to Client Cluster

Get ids of products from cluster

In [23]:
cluster = np.where(np.isin(model.labels_, client_cluster))[0]

cluster

array([710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722,
       723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735,
       736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748,
       749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761,
       762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774,
       775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787,
       789], dtype=int64)

Get products from ids

In [24]:
df_client = df_products.loc[cluster]

df_client.head(20)

Unnamed: 0,FAMILLE,MAILLE,UNIVERS,LIBELLE,PRIX_NET
710,HYGIENE,HYG_JDM,HYG_DOUCHE JARDINMONDE,GD JDM HOMME VETIVER FL200ML,1.5
711,SOINS DU CORPS,CORPS_HYDR_LAIT_HUILE,CORPS_LAIT HUILE AUTRES,LAIT CORPS F300 FLEURS CRIST,3.45
712,SOINS DU VISAGE,VIS_JEUNE_ET_LEVRE,VIS_CJOUR Jeunes Specifique,PORE MINIMIZING SERUM SEBOV 30ML,18.0
713,MAQUILLAGE,MAQ_ONGLES,MAQ_ONG Vernis LUM,VAO TOPCO FUCHIA NACR ITCOL FEV14 L4 3ML,1.65
714,MAQUILLAGE,MAQ_YEUX_MASCA_EYEL_FARD,MAQ_YEUX Fard,FAP CR METAL BRONZE CN3 5ML,5.0
715,CAPILLAIRES,CAPILLAIRE_SHAMPOING,CAP_SHAMP SPECIFIQUE,SVC REFLETS SH REF ARG FL200ML,5.9
716,MAQUILLAGE,MAQ_YEUX_MASCA_EYEL_FARD,MAQ_YEUX Fard,"FAP MONO 2013 CN3 2,5G ROSE POIVRE",6.95
717,SOINS DU VISAGE,VIS_PUR,VIS_DEMAQ PUR,LINGETTES CALMILLE 2013,2.95
718,SOINS DU VISAGE,VIS_AAAR_HORS_DEMAQLOTION,VIS_TRAIT AAAR,ELIXIR 7.9 Int Jeunesse30ml CS,11.95
719,MAQUILLAGE,MAQ_LEV_RAL_HMG,MAQ_LEV RAL Autres,LAQUE LEVR OCRE/NUDE CN3 09 5,7.45


In [25]:
df_client.shape[0]

79