In [None]:
# Support: Probability of occurrence of item(s) in all transactions
# Confidence: Probability of the occurrence of the following item(s) in the same transaction given some item(s)
# Lift: Ratio of the probability of B occurrence given A is present and the probability of B occurrence without knowing about A.
# Leverage: The difference between the probability of A and B occurring together and the probability of A and B being independent.
# Conviction: If the rule happened to be there by chance or not

# 1. Import libraries

In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
df = pd.read_csv("GroceryStoreDataSet.csv", names = ['transaction'], sep = ',')

In [2]:
df

Unnamed: 0,transaction
0,"MILK,BREAD,BISCUIT"
1,"BREAD,MILK,BISCUIT,CORNFLAKES"
2,"BREAD,TEA,BOURNVITA"
3,"JAM,MAGGI,BREAD,MILK"
4,"MAGGI,TEA,BISCUIT"
5,"BREAD,TEA,BOURNVITA"
6,"MAGGI,TEA,CORNFLAKES"
7,"MAGGI,BREAD,TEA,BISCUIT"
8,"JAM,MAGGI,BREAD,TEA"
9,"BREAD,MILK"


# 2. One-hot encoding
We have to change the data to the appropriate format before inputting it into the Apriori algorithm.

In [3]:
df = list(df["transaction"].apply(lambda x:x.split(",")))

In [4]:
one_hot_transformer = TransactionEncoder()
df_transform = one_hot_transformer.fit_transform(df)

In [5]:
df_transform

array([[ True, False,  True, False, False, False, False, False,  True,
        False, False],
       [ True, False,  True, False, False,  True, False, False,  True,
        False, False],
       [False,  True,  True, False, False, False, False, False, False,
        False,  True],
       [False, False,  True, False, False, False,  True,  True,  True,
        False, False],
       [ True, False, False, False, False, False, False,  True, False,
        False,  True],
       [False,  True,  True, False, False, False, False, False, False,
        False,  True],
       [False, False, False, False, False,  True, False,  True, False,
        False,  True],
       [ True, False,  True, False, False, False, False,  True, False,
        False,  True],
       [False, False,  True, False, False, False,  True,  True, False,
        False,  True],
       [False, False,  True, False, False, False, False, False,  True,
        False, False],
       [ True, False, False,  True,  True,  True, False, Fal

In [6]:
df = pd.DataFrame(df_transform,columns=one_hot_transformer.columns_)

In [7]:
df

Unnamed: 0,BISCUIT,BOURNVITA,BREAD,COCK,COFFEE,CORNFLAKES,JAM,MAGGI,MILK,SUGER,TEA
0,True,False,True,False,False,False,False,False,True,False,False
1,True,False,True,False,False,True,False,False,True,False,False
2,False,True,True,False,False,False,False,False,False,False,True
3,False,False,True,False,False,False,True,True,True,False,False
4,True,False,False,False,False,False,False,True,False,False,True
5,False,True,True,False,False,False,False,False,False,False,True
6,False,False,False,False,False,True,False,True,False,False,True
7,True,False,True,False,False,False,False,True,False,False,True
8,False,False,True,False,False,False,True,True,False,False,True
9,False,False,True,False,False,False,False,False,True,False,False


# 3. Find the frequent itemsets using Apriori

In [8]:
df = apriori(df, min_support = 0.2, use_colnames = True)
df.sort_values(['support'],ascending=False, inplace = True)

# 4. Association rule

The association_rules function will automatically calculate key metrics of our transaction data including support, confidence, lift, leverage, and conviction.

In [9]:
df_ar = association_rules(df, metric="lift", min_threshold=1)

In [10]:
df_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(BREAD),(MILK),0.65,0.25,0.2,0.307692,1.230769,0.0375,1.083333
1,(MILK),(BREAD),0.25,0.65,0.2,0.8,1.230769,0.0375,1.75
2,(BREAD),(SUGER),0.65,0.3,0.2,0.307692,1.025641,0.005,1.011111
3,(SUGER),(BREAD),0.3,0.65,0.2,0.666667,1.025641,0.005,1.05
4,(CORNFLAKES),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8
5,(COFFEE),(CORNFLAKES),0.4,0.3,0.2,0.5,1.666667,0.08,1.4
6,(COFFEE),(SUGER),0.4,0.3,0.2,0.5,1.666667,0.08,1.4
7,(SUGER),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8
8,(MAGGI),(TEA),0.25,0.35,0.2,0.8,2.285714,0.1125,3.25
9,(TEA),(MAGGI),0.35,0.25,0.2,0.571429,2.285714,0.1125,1.75
