In [10]:
!pip install mlxtend
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd
# Print the transactions clearly (for the given Apriori dataset)
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]


print("Q1 Transactions")
for i, t in enumerate(dataset, 1):
    print(i, set(t))

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_).astype(int)
print("\nQ2 One-hot encoded DataFrame")
print(df)



Defaulting to user installation because normal site-packages is not writeable
Collecting mlxtend
  Downloading mlxtend-0.23.4-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.4-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ------- -------------------------------- 0.3/1.4 MB ? eta -:--:--
   ---------------------------------------- 1.4/1.4 MB 6.3 MB/s  0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.4
Q1 Transactions
1 {'Coffee', 'Sandwich', 'Donut'}
2 {'Coffee', 'Donut'}
3 {'Coffee', 'Sandwich'}
4 {'Muffin', 'Coffee'}
5 {'Muffin', 'Donut'}

Q2 One-hot encoded DataFrame
   Coffee  Donut  Muffin  Sandwich
0       1      1       0         1
1       1      1       0         0
2       1      0       0         1
3       1      0       1         0
4       0      1       1         0


In [11]:

frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)
print("\nQ3 Frequent itemsets (min_support=0.4)")
print(frequent_itemsets.sort_values(by="support", ascending=False).reset_index(drop=True))

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.0)
rules = rules.sort_values(by=["support","confidence"], ascending=[False, False]).reset_index(drop=True)
print("\nQ4 All association rules (support, confidence, lift)")
print(rules[['antecedents','consequents','support','confidence','lift']])


Q3 Frequent itemsets (min_support=0.4)
   support            itemsets
0      0.8            (Coffee)
1      0.6             (Donut)
2      0.4            (Muffin)
3      0.4          (Sandwich)
4      0.4     (Coffee, Donut)
5      0.4  (Coffee, Sandwich)

Q4 All association rules (support, confidence, lift)
  antecedents consequents  support  confidence      lift
0  (Sandwich)    (Coffee)      0.4    1.000000  1.250000
1     (Donut)    (Coffee)      0.4    0.666667  0.833333
2    (Coffee)     (Donut)      0.4    0.500000  0.833333
3    (Coffee)  (Sandwich)      0.4    0.500000  1.250000




In [15]:
min_confidence = 0.6
min_support=0.4
strong_rules = rules[(rules['support'] >= min_support) & (rules['confidence'] >= min_confidence)].reset_index(drop=True)
print("\nQ5 Rules with support >= 0.4 and confidence >= 0.6")
print(strong_rules[['antecedents','consequents','support','confidence','lift']])

print("\nQ6 Interpretation example")
if not strong_rules.empty:
    a = list(map(lambda s: sorted(list(s)), strong_rules.iloc[0]['antecedents'])) if isinstance(strong_rules.iloc[0]['antecedents'], frozenset) else sorted(list(strong_rules.iloc[0]['antecedents']))
    b = sorted(list(strong_rules.iloc[0]['consequents']))
    print("If a customer buys", a, "they are likely to buy", b, f"(support={strong_rules.iloc[0]['support']}, confidence={strong_rules.iloc[0]['confidence']}, lift={strong_rules.iloc[0]['lift']})")
else:
    print("No strong rule found")



Q5 Rules with support >= 0.4 and confidence >= 0.6
  antecedents consequents  support  confidence      lift
0  (Sandwich)    (Coffee)      0.4    1.000000  1.250000
1     (Donut)    (Coffee)      0.4    0.666667  0.833333

Q6 Interpretation example
If a customer buys [['S', 'a', 'c', 'd', 'h', 'i', 'n', 'w']] they are likely to buy ['Coffee'] (support=0.4, confidence=1.0, lift=1.25)


In [16]:
print("\nQ7 Effect of changing thresholds (experiment)")
for ms in [0.2, 0.3, 0.4, 0.5]:
    fi = apriori(df, min_support=ms, use_colnames=True)
    r = association_rules(fi, metric="confidence", min_threshold=0.0)
    print("min_support=", ms, "frequent_itemsets=", fi.shape[0], "rules=", r.shape[0])
for mc in [0.5, 0.6, 0.7, 0.9]:
    cnt = rules[rules['confidence'] >= mc].shape[0]
    print("min_confidence=", mc, "rules_with_conf >=", mc, cnt)

print("\nQ8 Lift > 1 explanation")
print("Lift > 1 indicates the antecedent increases the likelihood of the consequent compared to independence.")


Q7 Effect of changing thresholds (experiment)
min_support= 0.2 frequent_itemsets= 10 rules= 16
min_support= 0.3 frequent_itemsets= 6 rules= 4
min_support= 0.4 frequent_itemsets= 6 rules= 4
min_support= 0.5 frequent_itemsets= 2 rules= 0
min_confidence= 0.5 rules_with_conf >= 0.5 4
min_confidence= 0.6 rules_with_conf >= 0.6 2
min_confidence= 0.7 rules_with_conf >= 0.7 1
min_confidence= 0.9 rules_with_conf >= 0.9 1

Q8 Lift > 1 explanation
Lift > 1 indicates the antecedent increases the likelihood of the consequent compared to independence.




In [17]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd

dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]

print("========== Q1. Transactions ==========")
for i, t in enumerate(dataset, 1):
    print(f"Transaction {i}: {t}")

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_).astype(int)
print("\n========== Q2. One-Hot Encoded DataFrame ==========")
print(df.to_string(index=False))

min_support = 0.4
frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
print("\n========== Q3. Frequent Itemsets (min_support=0.4) ==========")
for _, row in frequent_itemsets.iterrows():
    print(f"Itemset: {set(row['itemsets'])}, Support: {row['support']:.2f}")

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.0)
print("\n========== Q4. All Association Rules ==========")
for _, row in rules.iterrows():
    print(f"Rule: {set(row['antecedents'])} -> {set(row['consequents'])}, "
          f"Support={row['support']:.2f}, Confidence={row['confidence']:.2f}, Lift={row['lift']:.2f}")

min_confidence = 0.6
strong_rules = rules[(rules['support'] >= min_support) & (rules['confidence'] >= min_confidence)]
print("\n========== Q5. Strong Rules (support>=0.4, confidence>=0.6) ==========")
if strong_rules.empty:
    print("No strong rules found")
else:
    for _, row in strong_rules.iterrows():
        print(f"Rule: {set(row['antecedents'])} -> {set(row['consequents'])}, "
              f"Support={row['support']:.2f}, Confidence={row['confidence']:.2f}, Lift={row['lift']:.2f}")

print("\n========== Q6. Interpretation of One Strong Rule ==========")
if not strong_rules.empty:
    row = strong_rules.iloc[0]
    print(f"If a customer buys {set(row['antecedents'])}, they are likely to also buy {set(row['consequents'])}.")
else:
    print("No strong rule to interpret.")

print("\n========== Q7. Effect of Changing Thresholds ==========")
for ms in [0.2, 0.3, 0.4, 0.5]:
    fi = apriori(df, min_support=ms, use_colnames=True)
    r = association_rules(fi, metric="confidence", min_threshold=0.0)
    print(f"min_support={ms}: Frequent Itemsets={fi.shape[0]}, Rules={r.shape[0]}")
for mc in [0.5, 0.6, 0.7, 0.9]:
    cnt = rules[rules['confidence'] >= mc].shape[0]
    print(f"min_confidence={mc}: Rules with confidence>={mc} = {cnt}")

print("\n========== Q8. Why Lift > 1 is Good ==========")
print("Lift > 1 means the occurrence of the antecedent increases the chance of the consequent, "
      "showing a positive association. Lift = 1 means no relation, and Lift < 1 means negative relation.")


Transaction 1: ['Coffee', 'Donut', 'Sandwich']
Transaction 2: ['Coffee', 'Donut']
Transaction 3: ['Coffee', 'Sandwich']
Transaction 4: ['Coffee', 'Muffin']
Transaction 5: ['Donut', 'Muffin']

 Coffee  Donut  Muffin  Sandwich
      1      1       0         1
      1      1       0         0
      1      0       0         1
      1      0       1         0
      0      1       1         0

Itemset: {'Coffee'}, Support: 0.80
Itemset: {'Donut'}, Support: 0.60
Itemset: {'Muffin'}, Support: 0.40
Itemset: {'Sandwich'}, Support: 0.40
Itemset: {'Coffee', 'Donut'}, Support: 0.40
Itemset: {'Coffee', 'Sandwich'}, Support: 0.40

Rule: {'Coffee'} -> {'Donut'}, Support=0.40, Confidence=0.50, Lift=0.83
Rule: {'Donut'} -> {'Coffee'}, Support=0.40, Confidence=0.67, Lift=0.83
Rule: {'Coffee'} -> {'Sandwich'}, Support=0.40, Confidence=0.50, Lift=1.25
Rule: {'Sandwich'} -> {'Coffee'}, Support=0.40, Confidence=1.00, Lift=1.25

Rule: {'Donut'} -> {'Coffee'}, Support=0.40, Confidence=0.67, Lift=0.83
Rule: {'S

