In [1]:
#a simple example to demonstrate Association rules using mlxtend
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd


In [None]:

#create a sample dataset
#each row represents a shopping basket with items purchased
data = [ ['Milk', 'Bread', 'Eggs', 'Diapers', 'Beer'],
         ['Bread', 'Diapers', 'Beer', 'Cola'],
         ['Milk', 'Bread', 'Diapers', 'Beer'],
         ['Milk', 'Eggs', 'Diapers', 'Cola'],
         ['Bread', 'Eggs', 'Diapers', 'Beer'],
         ['Milk', 'Bread', 'Eggs', 'Diapers', 'Cola'],
         ['Bread', 'Diapers', 'Cola'],
         ['Milk', 'Bread', 'Diapers', 'Beer'],
         ['Milk', 'Eggs', 'Diapers'],
         ['Bread', 'Eggs', 'Beer'] 
         ]

#convert the dataset into a one-hot encoded DataFrame
#each item gets its own column with True/False values indicating presence/absence in the basket
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)

print("One-hot encoded DataFrame:")
print(df)
print("\n")

#find frequent itemsets using the apriori algorithm
#min_support=0.6 means the itemset appears in at least 60% of the transactions

One-hot encoded DataFrame:
    Beer  Bread   Cola  Diapers   Eggs   Milk
0   True   True  False     True   True   True
1   True   True   True     True  False  False
2   True   True  False     True  False   True
3  False  False   True     True   True   True
4   True   True  False     True   True  False
5  False   True   True     True   True   True
6  False   True   True     True  False  False
7   True   True  False     True  False   True
8  False  False  False     True   True   True
9   True   True  False    False   True  False




In [5]:
#find frequent itemsets using the apriori algorithm
#min_support=0.6 means the itemset appears in at least 60% of the transactions
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
print("Frequent Itemsets:")
print(frequent_itemsets)
print("\n")

Frequent Itemsets:
   support          itemsets
0      0.6            (Beer)
1      0.8           (Bread)
2      0.9         (Diapers)
3      0.6            (Eggs)
4      0.6            (Milk)
5      0.6     (Bread, Beer)
6      0.7  (Bread, Diapers)
7      0.6   (Milk, Diapers)




In [6]:
#generate association rules from the frequent itemsets
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print("Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

Association Rules:
  antecedents consequents  support  confidence      lift
0     (Bread)      (Beer)      0.6    0.750000  1.250000
1      (Beer)     (Bread)      0.6    1.000000  1.250000
2     (Bread)   (Diapers)      0.7    0.875000  0.972222
3   (Diapers)     (Bread)      0.7    0.777778  0.972222
4      (Milk)   (Diapers)      0.6    1.000000  1.111111
