In [1]:
import numpy as np

In [2]:
import pandas as pd

In [4]:
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]


In [6]:
df=pd.DataFrame(dataset)
df

Unnamed: 0,0,1,2
0,Coffee,Donut,Sandwich
1,Coffee,Donut,
2,Coffee,Sandwich,
3,Coffee,Muffin,
4,Donut,Muffin,


In [7]:
items = sorted(set([item for transaction in dataset for item in transaction]))

# Create a one-hot encoded DataFrame
data = []
for transaction in dataset:
    row = [1 if item in transaction else 0 for item in items]
    data.append(row)

df = pd.DataFrame(data, columns=items)
print(df)


   Coffee  Donut  Muffin  Sandwich
0       1      1       0         1
1       1      1       0         0
2       1      0       0         1
3       1      0       1         0
4       0      1       1         0


In [18]:

from mlxtend.frequent_patterns import apriori
frq_itemset=apriori(df,min_support=0.4, use_colnames=True)
print(frq_itemset)

   support            itemsets
0      0.8            (Coffee)
1      0.6             (Donut)
2      0.4            (Muffin)
3      0.4          (Sandwich)
4      0.4     (Coffee, Donut)
5      0.4  (Coffee, Sandwich)




In [19]:
from mlxtend.frequent_patterns import association_rules

# frq_itemset is the output from apriori
rules = association_rules(frq_itemset, metric="support", min_threshold=0)

# Show all rules with their support, confidence, and lift
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

  antecedents consequents  support  confidence      lift
0    (Coffee)     (Donut)      0.4    0.500000  0.833333
1     (Donut)    (Coffee)      0.4    0.666667  0.833333
2    (Coffee)  (Sandwich)      0.4    0.500000  1.250000
3  (Sandwich)    (Coffee)      0.4    1.000000  1.250000


In [21]:
rules = association_rules(frq_itemset, metric="confidence", min_threshold=0.6)
filtered_rules=rules[rules['support']>=0.4]
print(filtered_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

  antecedents consequents  support  confidence      lift
0     (Donut)    (Coffee)      0.4    0.666667  0.833333
1  (Sandwich)    (Coffee)      0.4    1.000000  1.250000


In [22]:
strong_rules = rules[rules['support'] >= 0.4]

# See all strong rules
print(strong_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

# Interpret one strong rule
for _, row in strong_rules.iterrows():
    antecedents = ', '.join(list(row['antecedents']))
    consequents = ', '.join(list(row['consequents']))
    print(f"If a customer buys {antecedents}, they are likely to buy {consequents}.")
    break  # Only print one interpretation

  antecedents consequents  support  confidence      lift
0     (Donut)    (Coffee)      0.4    0.666667  0.833333
1  (Sandwich)    (Coffee)      0.4    1.000000  1.250000
If a customer buys Donut, they are likely to buy Coffee.


In [23]:
from mlxtend.frequent_patterns import association_rules

# Try different minimum support values
for min_sup in [0.2, 0.4, 0.6]:
    frq_itemset = apriori(df, min_support=min_sup, use_colnames=True)
    rules = association_rules(frq_itemset, metric='confidence', min_threshold=0.6)
    print(f"Min support = {min_sup}: {len(rules)} rules")

# Try different minimum confidence values
frq_itemset = apriori(df, min_support=0.4, use_colnames=True)
for min_conf in [0.5, 0.6, 0.8]:
    rules = association_rules(frq_itemset, metric='confidence', min_threshold=min_conf)
    print(f"Min confidence = {min_conf}: {len(rules)} rules")

Min support = 0.2: 3 rules
Min support = 0.4: 2 rules
Min support = 0.6: 0 rules
Min confidence = 0.5: 4 rules
Min confidence = 0.6: 2 rules
Min confidence = 0.8: 1 rules




In [None]:
#items are bought together more often than random chance. So, a rule with lift greater than one shows a real and useful connection between those items