In [None]:
# Import pandas
import pandas as pd

In [None]:
# Read the dataset
data = pd.read_csv('https://raw.githubusercontent.com/analyticsindiamagazine/MocksDatasets/main/Groceries_dataset.csv')

In [None]:
# Visualize the Top 5 rows
data.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


In [None]:
# Shape the data
data.shape

(38765, 3)

#**Prepare the data for modelling**

In [None]:
# Group the intrested features 
count_per_trans = data.groupby(['Member_number','itemDescription'])['itemDescription'].count().reset_index(name='Count')
count_per_trans.head()

Unnamed: 0,Member_number,itemDescription,Count
0,1000,canned beer,1
1,1000,hygiene articles,1
2,1000,misc. beverages,1
3,1000,pastry,1
4,1000,pickled vegetables,1


In [None]:
# Create the Pivot tabel to represent transaction for each item
Item_based_matrix = count_per_trans.pivot_table(index='Member_number', columns='itemDescription', values='Count', aggfunc='sum').fillna(0)
Item_based_matrix.head()

itemDescription,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Member_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0
1001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,0.0,0.0
1002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0


In [None]:
# Convert entries as 0 and 1
def encode(x):
  if x <=0:
    return 0

  elif x >=1:
    return 1

# apply the function 
Item_based_matrix = Item_based_matrix.applymap(encode)

In [None]:
Item_based_matrix.head()

itemDescription,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Member_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
1001,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,1,0,1,0,0
1002,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1003,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1004,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


#**Applying FP-Growth Algorithm**

In [None]:
# FP-Growth works at specific verison 
! pip install mlxtend==0.18.0

Installing collected packages: mlxtend
  Attempting uninstall: mlxtend
    Found existing installation: mlxtend 0.14.0
    Uninstalling mlxtend-0.14.0:
      Successfully uninstalled mlxtend-0.14.0
Successfully installed mlxtend-0.18.0


In [None]:
# import FP-growth and association rule
from mlxtend.frequent_patterns import fpgrowth, association_rules

In [None]:
# Grab the frequent items
frequent_item = fpgrowth(Item_based_matrix, use_colnames=True, min_support=0.15)
frequent_item.sort_values('support', ascending=False).head(10)

Unnamed: 0,support,itemsets
0,0.458184,(whole milk)
8,0.376603,(other vegetables)
6,0.349666,(rolls/buns)
1,0.313494,(soda)
2,0.282966,(yogurt)
9,0.23371,(tropical fruit)
10,0.230631,(root vegetables)
13,0.213699,(bottled water)
3,0.206003,(sausage)
19,0.19138,"(other vegetables, whole milk)"


#**Genrating association rule**

In [None]:
# Genrate rules
ass_rules = association_rules(frequent_item, metric='lift', min_threshold=0.9)

In [None]:
# Visualize the rules
ass_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(soda),(whole milk),0.313494,0.458184,0.151103,0.481997,1.051973,0.007465,1.045971
1,(whole milk),(soda),0.458184,0.313494,0.151103,0.329787,1.051973,0.007465,1.02431
2,(whole milk),(yogurt),0.458184,0.282966,0.15059,0.328667,1.16151,0.02094,1.068076
3,(yogurt),(whole milk),0.282966,0.458184,0.15059,0.532185,1.16151,0.02094,1.158185
4,(whole milk),(rolls/buns),0.458184,0.349666,0.178553,0.389698,1.114484,0.018342,1.065592
