In [7]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [8]:
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]


In [9]:
print("Transactions:")
for i, t in enumerate(dataset, start=1):
    print(f"T{i}: {t}")


Transactions:
T1: ['Coffee', 'Donut', 'Sandwich']
T2: ['Coffee', 'Donut']
T3: ['Coffee', 'Sandwich']
T4: ['Coffee', 'Muffin']
T5: ['Donut', 'Muffin']


In [10]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
print("\nOne-hot encoded DataFrame:")
print(df.astype(int))  


One-hot encoded DataFrame:
   Coffee  Donut  Muffin  Sandwich
0       1      1       0         1
1       1      1       0         0
2       1      0       0         1
3       1      0       1         0
4       0      1       1         0


In [11]:
#3
min_support = 0.4
frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
frequent_itemsets = frequent_itemsets.sort_values(["support", "itemsets"], ascending=[False, True]).reset_index(drop=True)
print(f"\nFrequent itemsets (min_support={min_support}):")
print(frequent_itemsets)


Frequent itemsets (min_support=0.4):
   support            itemsets
0      0.8            (Coffee)
1      0.6             (Donut)
2      0.4            (Muffin)
3      0.4          (Sandwich)
4      0.4     (Coffee, Donut)
5      0.4  (Coffee, Sandwich)


In [12]:
#4
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.0)

rules = rules[["antecedents", "consequents", "support", "confidence", "lift"]]
rules = rules.sort_values(["lift", "confidence", "support"], ascending=False).reset_index(drop=True)
print("\nAll association rules (sorted by lift):")
print(rules)


All association rules (sorted by lift):
  antecedents consequents  support  confidence      lift
0  (Sandwich)    (Coffee)      0.4    1.000000  1.250000
1    (Coffee)  (Sandwich)      0.4    0.500000  1.250000
2     (Donut)    (Coffee)      0.4    0.666667  0.833333
3    (Coffee)     (Donut)      0.4    0.500000  0.833333


In [13]:
#5
min_conf = 0.6
rules_strong = rules[(rules["support"] >= min_support) & (rules["confidence"] >= min_conf)].reset_index(drop=True)
print(f"\nRules meeting support >= {min_support} AND confidence >= {min_conf}:")
print(rules_strong)



Rules meeting support >= 0.4 AND confidence >= 0.6:
  antecedents consequents  support  confidence      lift
0  (Sandwich)    (Coffee)      0.4    1.000000  1.250000
1     (Donut)    (Coffee)      0.4    0.666667  0.833333


In [14]:
#6
def rule_to_text(row):
    A = ", ".join(sorted(list(row["antecedents"])))
    B = ", ".join(sorted(list(row["consequents"])))
    return (f"If a basket contains [{A}], it also contains [{B}] "
            f"with confidence={row['confidence']:.2f}, lift={row['lift']:.2f}.")

if not rules_strong.empty:
    print("\nExample interpretation:")
    print(rule_to_text(rules_strong.iloc[0]))
else:
    print("\nNo rules met the chosen thresholds.")


Example interpretation:
If a basket contains [Sandwich], it also contains [Coffee] with confidence=1.00, lift=1.25.


In [16]:
def run_apriori(min_support: float, min_conf: float):
    """Return frequent itemsets, all rules, strong rules, and lift>1 rules."""
    # Frequent itemsets
    fi = apriori(df, min_support=min_support, use_colnames=True)
    fi = fi.sort_values(["support", "itemsets"], ascending=[False, True]).reset_index(drop=True)

    
    rules = association_rules(fi, metric="confidence", min_threshold=0.0)
    rules = rules[["antecedents", "consequents", "support", "confidence", "lift"]]
    rules = rules.sort_values(["lift", "confidence", "support"], ascending=False).reset_index(drop=True)

    # Strong rules by the given thresholds
    strong = rules[(rules["support"] >= min_support) & (rules["confidence"] >= min_conf)].reset_index(drop=True)

    # Rules with lift>1 (positive association)
    lift_pos = rules[rules["lift"] > 1].reset_index(drop=True)

    return fi, rules, strong, lift_pos



supports = [0.2, 0.3, 0.4, 0.5, 0.6]
confidences = [0.4, 0.6, 0.8]

records = []
for s in supports:
    for c in confidences:
        fi, rules, strong, lift_pos = run_apriori(s, c)
        records.append({
            "min_support": s,
            "min_confidence": c,
            "#frequent_itemsets": len(fi),
            "#all_rules": len(rules),
            "#strong_rules(>=support & >=confidence)": len(strong),
            "#rules_with_lift>1": len(lift_pos)
        })