In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [4]:
df = pd.read_csv('transactions_dataset.csv', sep=";")

In [5]:
df.drop_duplicates()

Unnamed: 0,date_order,date_invoice,product_id,client_id,sales_net,quantity,order_channel,branch_id
0,2017-09-25,2017-09-25,2376333,188502,155.44320,3,online,2732
1,2017-09-25,2017-09-25,2520527,835089,16.39440,3,at the store,10279
2,2017-09-25,2017-09-25,2536756,1977896,365.76624,21,online,6184
3,2017-09-25,2017-09-25,3028673,598816,8.59878,201,at the store,4200
4,2017-09-25,2017-09-25,203377,2086861,1093.37400,3,by phone,7051
...,...,...,...,...,...,...,...,...
63319310,2019-06-10,2020-06-12,1596284,245272,-2056.91300,5,by phone,7682
63319311,2018-12-14,2020-07-15,908253,880496,27.62760,1001,by phone,9375
63319312,2019-07-26,2020-10-29,1168247,2063314,-204.60984,101,by phone,3014
63319313,2018-05-23,2020-11-27,2319965,1287654,70.49730,17,by phone,1015


In [6]:
df = df.drop(['date_invoice'],axis=1)

In [6]:
df.head()

Unnamed: 0,date_order,product_id,client_id,sales_net,quantity,order_channel,branch_id
0,2017-09-25,2376333,188502,155.4432,3,online,2732
1,2017-09-25,2520527,835089,16.3944,3,at the store,10279
2,2017-09-25,2536756,1977896,365.76624,21,online,6184
3,2017-09-25,3028673,598816,8.59878,201,at the store,4200
4,2017-09-25,203377,2086861,1093.374,3,by phone,7051


In [7]:
#will not be used in our analysis
df = df.drop(['sales_net'],axis=1)
df = df.drop(['order_channel'],axis=1)
df = df.drop(['client_id'],axis=1)
df = df.drop(['branch_id'],axis=1)


In [8]:
df.head()

Unnamed: 0,date_order,product_id,quantity
0,2017-09-25,2376333,3
1,2017-09-25,2520527,3
2,2017-09-25,2536756,21
3,2017-09-25,3028673,201
4,2017-09-25,203377,3


In [9]:
df1 = pd.read_csv('product_clustering_1_scored.csv', sep=",")

In [10]:
df1.head()

Unnamed: 0,product_id,Recency,Monetary,lifespam,clv,clv_Tile,Rec_Tile,Mone_Tile,lifespam_Tile,score_v1,score,score_level,cluster_labels
0,23,563,613,45,612,4,1,4,3,4143,12,Loyal,Standard Products
1,39,166,197,0,197,2,3,2,1,2321,8,Regular,Churned Products
2,45,296,59,0,59,1,2,1,1,1211,5,Churned,Churned Products
3,48,467,391,0,391,3,1,3,1,3131,8,Regular,Standard Products
4,96,418,191,0,191,2,2,2,1,2221,7,Churned,Standard Products


In [11]:
df1 = df1[df1['cluster_labels']!= 'High Value Products'] #to try and find relationship between products that are standard and churned products

In [12]:
df1['cluster_labels'].value_counts()

Churned Products     137290
Standard Products    131982
Name: cluster_labels, dtype: int64

In [13]:
df1 = df1.drop(['Monetary'],axis=1)
df1 = df1.drop(['lifespam'],axis=1)
df1 = df1.drop(['clv'],axis=1)
df1 = df1.drop(['clv_Tile'],axis=1)
df1 = df1.drop(['Rec_Tile'],axis=1)
df1 = df1.drop(['Mone_Tile'],axis=1)
df1 = df1.drop(['lifespam_Tile'],axis=1)
df1 = df1.drop(['score_v1'],axis=1)
df1 = df1.drop(['score_level'],axis=1)
df1 = df1.drop(['cluster_labels'],axis=1)
df1.head()

Unnamed: 0,product_id,Recency,score
0,23,563,12
1,39,166,8
2,45,296,5
3,48,467,8
4,96,418,7


In [14]:
df2 = df.merge(df1, on='product_id')
df2.head()

Unnamed: 0,date_order,product_id,quantity,Recency,score
0,2017-09-29,1260937,3,193,6
1,2017-10-16,1260937,3,193,6
2,2017-10-07,1260937,3,193,6
3,2017-10-06,1260937,3,193,6
4,2017-10-11,1260937,3,193,6


In [15]:
df2 = df2.drop(['Recency'],axis=1)
df2 = df2.drop(['score'],axis=1)
df2.head()

Unnamed: 0,date_order,product_id,quantity
0,2017-09-29,1260937,3
1,2017-10-16,1260937,3
2,2017-10-07,1260937,3
3,2017-10-06,1260937,3
4,2017-10-11,1260937,3


In [16]:
basket = df2.groupby(['date_order','product_id'])['quantity'].sum().unstack().reset_index().fillna(0).set_index('date_order')
basket.head()

product_id,23,39,45,48,96,131,141,146,202,216,...,3238727,3238737,3238759,3238763,3238789,3238792,3238809,3238811,3238819,3238833
date_order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-09-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-09-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-09-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-09-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
def num(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_new = basket.applymap(num)
basket_new.head()

product_id,23,39,45,48,96,131,141,146,202,216,...,3238727,3238737,3238759,3238763,3238789,3238792,3238809,3238811,3238819,3238833
date_order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-09-24,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-09-25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-09-26,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-09-27,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
from mlxtend.frequent_patterns import fpgrowth
rule_fp = fpgrowth(basket_new, min_support=0.2, use_colnames=True)
rule_fp

Unnamed: 0,support,itemsets
0,0.254848,(2332548)
1,0.583102,(1281421)
2,0.501385,(2688704)
3,0.486150,(3042748)
4,0.473684,(1964536)
...,...,...
3150,0.202216,"(628618, 937149)"
3151,0.207756,"(1307169, 1589169, 937149)"
3152,0.202216,"(1589169, 1109595, 937149)"
3153,0.200831,"(1307169, 1109595, 937149)"


In [19]:
items = apriori(basket_new, min_support=0.2, use_colnames=True)
items

Unnamed: 0,support,itemsets
0,0.210526,(20109)
1,0.210526,(31844)
2,0.246537,(50399)
3,0.235457,(69468)
4,0.227147,(97152)
...,...,...
3150,0.204986,"(2792898, 630508, 1281421, 1955215, 1651129, 1..."
3151,0.200831,"(2688704, 630508, 1281421, 1964536, 1651129, 3..."
3152,0.200831,"(2688704, 2792898, 630508, 1281421, 1651129, 3..."
3153,0.202216,"(2688704, 690632, 1281421, 1964536, 3042748, 1..."


In [20]:
rule = association_rules(items, metric="lift", min_threshold=1)
rule

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(31844),(1281421),0.210526,0.583102,0.202216,0.960526,1.647268,0.079458,10.561404
1,(1281421),(31844),0.583102,0.210526,0.202216,0.346793,1.647268,0.079458,1.208612
2,(97152),(1281421),0.227147,0.583102,0.213296,0.939024,1.610393,0.080847,6.837119
3,(1281421),(97152),0.583102,0.227147,0.213296,0.365796,1.610393,0.080847,1.218619
4,(145148),(1281421),0.213296,0.583102,0.204986,0.961039,1.648148,0.080612,10.700369
...,...,...,...,...,...,...,...,...,...
32351,(690632),"(2688704, 1281421, 2258738, 1964536, 3042748)",0.475069,0.249307,0.211911,0.446064,1.789213,0.093473,1.355198
32352,(1281421),"(2688704, 690632, 2258738, 1964536, 3042748)",0.583102,0.214681,0.211911,0.363420,1.692836,0.086730,1.233653
32353,(2258738),"(2688704, 690632, 1281421, 1964536, 3042748)",0.434903,0.265928,0.211911,0.487261,1.832305,0.096258,1.431668
32354,(1964536),"(2688704, 690632, 1281421, 2258738, 3042748)",0.473684,0.256233,0.211911,0.447368,1.745946,0.090538,1.345865


In [21]:
rule.sort_values(by = 'lift', ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
18240,"(1238272, 1307169)","(1589169, 1109595)",0.207756,0.229917,0.203601,0.980000,4.262410,0.155834,38.504155
18245,"(1589169, 1109595)","(1238272, 1307169)",0.229917,0.207756,0.203601,0.885542,4.262410,0.155834,6.921709
18242,"(1238272, 1589169)","(1307169, 1109595)",0.209141,0.229917,0.203601,0.973510,4.234182,0.155516,29.070637
18243,"(1307169, 1109595)","(1238272, 1589169)",0.229917,0.209141,0.203601,0.885542,4.234182,0.155516,6.909608
3942,(628618),"(1589169, 937149)",0.228532,0.211911,0.202216,0.884848,4.175560,0.153788,6.843928
...,...,...,...,...,...,...,...,...,...
383,(630508),(2063171),0.387812,0.427978,0.216066,0.557143,1.301803,0.050092,1.291663
857,(2063171),(1651129),0.427978,0.429363,0.236842,0.553398,1.288882,0.053084,1.277731
856,(1651129),(2063171),0.429363,0.427978,0.236842,0.551613,1.288882,0.053084,1.275733
834,(1651129),(1652077),0.429363,0.365651,0.202216,0.470968,1.288025,0.045219,1.199074
