In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
file_path = "GroceryStoreDataSet.csv"  # Replace with your file path
df = pd.read_csv(file_path, names=['products'], header=None)


In [3]:
df

Unnamed: 0,products
0,"MILK,BREAD,BISCUIT"
1,"BREAD,MILK,BISCUIT,CORNFLAKES"
2,"BREAD,TEA,BOURNVITA"
3,"JAM,MAGGI,BREAD,MILK"
4,"MAGGI,TEA,BISCUIT"
5,"BREAD,TEA,BOURNVITA"
6,"MAGGI,TEA,CORNFLAKES"
7,"MAGGI,BREAD,TEA,BISCUIT"
8,"JAM,MAGGI,BREAD,TEA"
9,"BREAD,MILK"


In [4]:
data = list(df["products"].apply(lambda x: x.split(',')))

In [5]:
te = TransactionEncoder()
te_data = te.fit(data).transform(data)
df_encoded = pd.DataFrame(te_data, columns=te.columns_).astype(int)

In [6]:
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)



In [7]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.005)



In [8]:
print("Frequent Itemsets:")
print(frequent_itemsets)

Frequent Itemsets:
    support                             itemsets
0      0.35                            (BISCUIT)
1      0.20                          (BOURNVITA)
2      0.65                              (BREAD)
3      0.15                               (COCK)
4      0.40                             (COFFEE)
..      ...                                  ...
78     0.05         (MAGGI, TEA, BREAD, BISCUIT)
79     0.10  (COFFEE, CORNFLAKES, COCK, BISCUIT)
80     0.05            (JAM, BREAD, MAGGI, MILK)
81     0.05             (JAM, TEA, BREAD, MAGGI)
82     0.05      (COFFEE, CORNFLAKES, TEA, MILK)

[83 rows x 2 columns]


In [9]:
print("\nAssociation Rules:")
print(rules)


Association Rules:
      antecedents                 consequents  antecedent support  \
0         (BREAD)                   (BISCUIT)                0.65   
1       (BISCUIT)                     (BREAD)                0.35   
2          (COCK)                   (BISCUIT)                0.15   
3       (BISCUIT)                      (COCK)                0.35   
4        (COFFEE)                   (BISCUIT)                0.40   
..            ...                         ...                 ...   
331   (TEA, MILK)        (COFFEE, CORNFLAKES)                0.05   
332      (COFFEE)     (CORNFLAKES, TEA, MILK)                0.40   
333  (CORNFLAKES)         (COFFEE, TEA, MILK)                0.30   
334         (TEA)  (COFFEE, CORNFLAKES, MILK)                0.35   
335        (MILK)   (COFFEE, CORNFLAKES, TEA)                0.25   

     consequent support  support  confidence      lift  leverage  conviction  \
0                  0.35     0.20    0.307692  0.879121   -0.0275    0.9

In [10]:
# Optionally, save the results to CSV files
frequent_itemsets.to_csv("frequent_itemsets.csv", index=False)
rules.to_csv("association_rules.csv", index=False)

In [11]:
rules,frequent_itemsets

(      antecedents                 consequents  antecedent support  \
 0         (BREAD)                   (BISCUIT)                0.65   
 1       (BISCUIT)                     (BREAD)                0.35   
 2          (COCK)                   (BISCUIT)                0.15   
 3       (BISCUIT)                      (COCK)                0.35   
 4        (COFFEE)                   (BISCUIT)                0.40   
 ..            ...                         ...                 ...   
 331   (TEA, MILK)        (COFFEE, CORNFLAKES)                0.05   
 332      (COFFEE)     (CORNFLAKES, TEA, MILK)                0.40   
 333  (CORNFLAKES)         (COFFEE, TEA, MILK)                0.30   
 334         (TEA)  (COFFEE, CORNFLAKES, MILK)                0.35   
 335        (MILK)   (COFFEE, CORNFLAKES, TEA)                0.25   
 
      consequent support  support  confidence      lift  leverage  conviction  \
 0                  0.35     0.20    0.307692  0.879121   -0.0275    0.938889