In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
import mlxtend
import pickle

pd.pandas.set_option('display.max_columns',None)
sns.set()

import warnings
warnings.filterwarnings('ignore')

In [2]:
dataset = pd.read_csv('cluster_dataset.csv')
dataset.head()

Unnamed: 0,Education,Marital_Status,Income,Recency,Wines,Fruits,Meat,Fish,Sweets,Gold,HasChild,Children,Age,TotalAmtSpent,Relation,Clusters
0,Grad,Single,58138.0,58,635,88,546,172,88,88,no child,0,64,1617,110.1,Stars
1,Grad,Single,46344.0,38,11,1,6,2,1,6,child,2,67,27,91.766667,Need Attention
2,Grad,Married,71613.0,26,426,49,127,111,21,42,no child,0,56,776,98.4,High Potential
3,Grad,Married,26646.0,26,11,4,20,10,3,5,child,1,37,53,92.633333,Need Attention
4,PGrad,Married,58293.0,94,173,43,118,46,27,15,child,1,40,422,93.366667,Need Attention


In [3]:
data = dataset.copy(deep=True)

We will create different groups w.r.t Age, Income to help us analyze the customers.  

In [4]:
labels_age = ['Young','Adult','Mature','Senior']
bins_age = [0,30,50,70,120]
data['Age_Gr'] = pd.cut(data['Age'],bins=bins_age,labels=labels_age)

In [5]:
labels_income = ['Low income','Low to Medium','Medium to High','High income']
data['Income_Gr'] = pd.qcut(data['Income'],q=4,labels=labels_income)

In [6]:
labels_relation = ['New','Discovering','Experienced','Old']
data["Relation_Gr"] = pd.qcut(data['Relation'],q=4,labels=labels_relation)

Now we will divide the customers with the frequency of buying particular product

In [7]:
cut_labels = ['Low Buyer','Normal Buyer','Frequent Buyer']
data['Wines_seg'] = pd.qcut(data['Wines'],q=[0,0.25,0.75,1],labels=cut_labels).astype("object")
data['Fruits_seg'] = pd.qcut(data['Fruits'],q=[0,0.25,0.75,1],labels=cut_labels).astype("object")
data['Meat_seg'] = pd.qcut(data['Meat'],q=[0,0.25,0.75,1],labels=cut_labels).astype("object")
data['Fish_seg'] = pd.qcut(data['Fish'],q=[0,0.25,0.75,1],labels=cut_labels).astype("object")
data['Sweets_seg'] = pd.qcut(data['Sweets'],q=[0,0.25,0.75,1],labels=cut_labels).astype("object")
data['Gold_seg'] = pd.qcut(data['Gold'],q=[0,0.25,0.75,1],labels=cut_labels).astype("object")
data.replace(np.nan,"Non Buyer",inplace=True)
data.drop(columns=['Age','Income','Relation','Wines','Fruits','Meat','Fish','Sweets','Gold'],inplace=True)
data= data.astype(object)

In [8]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [9]:
association = data.copy()
df = pd.get_dummies(association)

In [10]:
min_support = 0.06
max_length = 10
frequent_items= apriori(df,min_support=min_support,use_colnames=True,max_len=max_length+1)
rules=association_rules(frequent_items,metric='lift',min_threshold=1)

In [11]:
with open('association.pkl','wb') as file:
    pickle.dump(rules, file)

In [16]:
with open(f'association.pkl','rb') as f:
    model_association = pickle.load(f)

In [17]:
product='Wines'
segment='Frequent Buyer'
target = '{\'%s_seg_%s\'}' %(product,segment)
wine_association = model_association[model_association['consequents'].astype(str).str.contains(target, na=False)].sort_values(by='confidence', ascending=False)
wine_association.head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
2394,"(Clusters_Stars, Education_PGrad)",(Wines_seg_Frequent Buyer),0.099681,0.248976,0.076468,0.767123,3.081115,0.05165,3.224986
2466,"(Income_Gr_High income, Education_PGrad)",(Wines_seg_Frequent Buyer),0.093764,0.248976,0.071461,0.762136,3.061083,0.048116,3.157367
2512,"(Meat_seg_Frequent Buyer, Education_PGrad)",(Wines_seg_Frequent Buyer),0.089213,0.248976,0.064634,0.72449,2.909879,0.042422,2.725939
32753,"(Meat_seg_Frequent Buyer, Income_Gr_High incom...",(Wines_seg_Frequent Buyer),0.096495,0.248976,0.069185,0.716981,2.879721,0.04516,2.653619
8332,"(Income_Gr_High income, Clusters_Stars)",(Wines_seg_Frequent Buyer),0.122895,0.248976,0.086026,0.7,2.811517,0.055429,2.503414


In [14]:
wine_association.to_excel('wine_association.xlsx',index=False)
