In [1]:
# Import pandas
import pandas as pd

In [2]:
# Read the dataset
data = pd.read_csv('https://raw.githubusercontent.com/analyticsindiamagazine/MocksDatasets/main/New%20groceries%20%20.csv')

In [3]:
# Visualize the Top 5 rows
data.head()

Unnamed: 0,Item(s),Item 1,Item 2,Item 3,Item 4,Item 5,Item 6,Item 7,Item 8,Item 9,...,Item 23,Item 24,Item 25,Item 26,Item 27,Item 28,Item 29,Item 30,Item 31,Item 32
0,4,citrus fruit,semi-finished bread,margarine,ready soups,,,,,,...,,,,,,,,,,
1,3,tropical fruit,yogurt,coffee,,,,,,,...,,,,,,,,,,
2,1,whole milk,,,,,,,,,...,,,,,,,,,,
3,4,pip fruit,yogurt,cream cheese,meat spreads,,,,,,...,,,,,,,,,,
4,4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,,...,,,,,,,,,,


In [4]:
# Shape the data
data.shape

(9835, 33)

#**Prepare data for modelling**

In [5]:
# processing columns as per Eclat package
data = data.iloc[:,1:]
col = data.columns

for i in range(len(col)):
  data.rename(columns={col[i]:int(i)},inplace=True)

#**Prepare the algorithm**

In [6]:
# Installing the elcat package
! pip3 install pyECLAT

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyECLAT
  Downloading pyECLAT-1.0.2-py3-none-any.whl (6.3 kB)
Installing collected packages: pyECLAT
Successfully installed pyECLAT-1.0.2


In [7]:
# Import the ECLAT algorithm
from pyECLAT import ECLAT

In [8]:
# Load the data into algorithm
EC = ECLAT(data=data)

In [9]:
# Visualize the binary form
EC.df_bin

Unnamed: 0,whole milk,dog food,candy,chocolate,kitchen utensil,long life bakery product,ham,sauces,hard cheese,herbs,...,oil,frozen fish,sugar,organic products,cake bar,grapes,finished products,waffles,baby food,salty snack
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9830,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
9831,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9832,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9833,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#**Generate rules**

In [10]:
# setting the minimum support 
min_support = 5/100

# look for transcation who has atleast 2 products
min_combination = 2

# setting upper threshold 
max_combination = 4

In [11]:
# Generate rules
rule_indices, rule_supports = EC.fit(min_support=min_support,
                                                 min_combination=min_combination,
                                                 max_combination=max_combination,
                                                 separator=' & ',
                                                 verbose=True)

Combination 2 by 2


378it [00:05, 72.70it/s]


Combination 3 by 3


3276it [00:37, 86.44it/s]


Combination 4 by 4


20475it [03:58, 85.80it/s]


In [12]:
# Retrive the generated rules
result = pd.DataFrame(rule_supports.items(),columns=['Item', 'Support'])
result.sort_values(by=['Support'], ascending=False)

Unnamed: 0,Item,Support
2,whole milk & other vegetables,0.074835
1,whole milk & rolls/buns,0.056634
0,whole milk & yogurt,0.056024
