## Importing necessary libraries

In [36]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

## Loading the Data

In [37]:
df = pd.read_csv('E:\Work Space\Ecko data\GroceryStoreDataSet.csv', names = ['Products'], sep = ',')

In [38]:
df.head()

Unnamed: 0,Products
0,"MILK,BREAD,BISCUIT"
1,"BREAD,MILK,BISCUIT,CORNFLAKES"
2,"BREAD,TEA,BOURNVITA"
3,"JAM,MAGGI,BREAD,MILK"
4,"MAGGI,TEA,BISCUIT"


In [39]:
df.shape

(20, 1)

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 1 columns):
Products    20 non-null object
dtypes: object(1)
memory usage: 288.0+ bytes


In [41]:
df.values

array([['MILK,BREAD,BISCUIT'],
       ['BREAD,MILK,BISCUIT,CORNFLAKES'],
       ['BREAD,TEA,BOURNVITA'],
       ['JAM,MAGGI,BREAD,MILK'],
       ['MAGGI,TEA,BISCUIT'],
       ['BREAD,TEA,BOURNVITA'],
       ['MAGGI,TEA,CORNFLAKES'],
       ['MAGGI,BREAD,TEA,BISCUIT'],
       ['JAM,MAGGI,BREAD,TEA'],
       ['BREAD,MILK'],
       ['COFFEE,COCK,BISCUIT,CORNFLAKES'],
       ['COFFEE,COCK,BISCUIT,CORNFLAKES'],
       ['COFFEE,SUGER,BOURNVITA'],
       ['BREAD,COFFEE,COCK'],
       ['BREAD,SUGER,BISCUIT'],
       ['COFFEE,SUGER,CORNFLAKES'],
       ['BREAD,SUGER,BOURNVITA'],
       ['BREAD,COFFEE,SUGER'],
       ['BREAD,COFFEE,SUGER'],
       ['TEA,MILK,COFFEE,CORNFLAKES']], dtype=object)

In [42]:
data = list(df["Products"].apply(lambda x:x.split(',')))

In [43]:
data

[['MILK', 'BREAD', 'BISCUIT'],
 ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'],
 ['BREAD', 'TEA', 'BOURNVITA'],
 ['JAM', 'MAGGI', 'BREAD', 'MILK'],
 ['MAGGI', 'TEA', 'BISCUIT'],
 ['BREAD', 'TEA', 'BOURNVITA'],
 ['MAGGI', 'TEA', 'CORNFLAKES'],
 ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'],
 ['JAM', 'MAGGI', 'BREAD', 'TEA'],
 ['BREAD', 'MILK'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'SUGER', 'BOURNVITA'],
 ['BREAD', 'COFFEE', 'COCK'],
 ['BREAD', 'SUGER', 'BISCUIT'],
 ['COFFEE', 'SUGER', 'CORNFLAKES'],
 ['BREAD', 'SUGER', 'BOURNVITA'],
 ['BREAD', 'COFFEE', 'SUGER'],
 ['BREAD', 'COFFEE', 'SUGER'],
 ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]

## One-Hot Encoding

### As seen below, the algorithm should be transformed as True/False or 1/0. That is, the list is converted to a list of one-hot encoded boolean.

In [44]:
myEncoding = TransactionEncoder()
myEncoding_data = myEncoding.fit(data).transform(data)
df = pd.DataFrame(myEncoding_data,columns=myEncoding.columns_)
df.head()

Unnamed: 0,BISCUIT,BOURNVITA,BREAD,COCK,COFFEE,CORNFLAKES,JAM,MAGGI,MILK,SUGER,TEA
0,True,False,True,False,False,False,False,False,True,False,False
1,True,False,True,False,False,True,False,False,True,False,False
2,False,True,True,False,False,False,False,False,False,False,True
3,False,False,True,False,False,False,True,True,True,False,False
4,True,False,False,False,False,False,False,True,False,False,True


In [45]:
df.sum()

BISCUIT        7
BOURNVITA      4
BREAD         13
COCK           3
COFFEE         8
CORNFLAKES     6
JAM            2
MAGGI          5
MILK           5
SUGER          6
TEA            7
dtype: int64

In [46]:
df = apriori(df,min_support=0.1,use_colnames=True,verbose=1)

Processing 110 combinations | Sampling itemset size 2Processing 234 combinations | Sampling itemset size 3Processing 108 combinations | Sampling itemset size 4


In [47]:
df

Unnamed: 0,support,itemsets
0,0.35,(BISCUIT)
1,0.2,(BOURNVITA)
2,0.65,(BREAD)
3,0.15,(COCK)
4,0.4,(COFFEE)
5,0.3,(CORNFLAKES)
6,0.1,(JAM)
7,0.25,(MAGGI)
8,0.25,(MILK)
9,0.3,(SUGER)


In [48]:
# Sorting
df.sort_values(by="support",ascending=False)

Unnamed: 0,support,itemsets
2,0.65,(BREAD)
4,0.4,(COFFEE)
0,0.35,(BISCUIT)
10,0.35,(TEA)
5,0.3,(CORNFLAKES)
9,0.3,(SUGER)
7,0.25,(MAGGI)
8,0.25,(MILK)
30,0.2,"(SUGER, COFFEE)"
34,0.2,"(MAGGI, TEA)"


In [49]:
# Using the Associan rule function - confidence values are more than 0.4.
df_rules = association_rules(df, metric = "confidence", min_threshold = 0.4)
df_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(BISCUIT),(BREAD),0.35,0.65,0.20,0.571429,0.879121,-0.0275,0.816667
1,(COCK),(BISCUIT),0.15,0.35,0.10,0.666667,1.904762,0.0475,1.950000
2,(BISCUIT),(CORNFLAKES),0.35,0.30,0.15,0.428571,1.428571,0.0450,1.225000
3,(CORNFLAKES),(BISCUIT),0.30,0.35,0.15,0.500000,1.428571,0.0450,1.300000
4,(MAGGI),(BISCUIT),0.25,0.35,0.10,0.400000,1.142857,0.0125,1.083333
...,...,...,...,...,...,...,...,...,...
70,"(COFFEE, CORNFLAKES)","(BISCUIT, COCK)",0.20,0.10,0.10,0.500000,5.000000,0.0800,1.800000
71,"(BISCUIT, COCK)","(COFFEE, CORNFLAKES)",0.10,0.20,0.10,1.000000,5.000000,0.0800,inf
72,"(BISCUIT, CORNFLAKES)","(COFFEE, COCK)",0.15,0.15,0.10,0.666667,4.444444,0.0775,2.550000
73,"(COCK, CORNFLAKES)","(COFFEE, BISCUIT)",0.10,0.10,0.10,1.000000,10.000000,0.0900,inf
