In [8]:
#apriori algorithm example(coffee shop)

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# 1. What does the dataset represent? List all the transactions clearly.
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]

print("1: Transactions")
for i, transaction in enumerate(dataset, 1):
    print(f"Transaction {i}: {transaction}")


#2. Convert the dataset into a one-hot encoded DataFrame. What does each column and row represent?
te = TransactionEncoder()
te_array = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_array, columns=te.columns_)
print("\n2: One-Hot Encoded DataFrame")
print(df.astype(int))

#3. With minimum support = 0.4, identify all frequent itemsets. Show their support values.s
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)
print("\n3: Frequent Itemsets")
print(frequent_itemsets)

#4. From the frequent itemsets, generate all possible association rules. List their support, confidence, and lift.
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
print("\n4: Association Rules")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

#5. Which rules satisfy both minimum support (0.4) and minimum confidence (0.6)?
print("\n5: ({Sandwich} {Coffee}) satisfy both minimum support (0.4) and minimum confidence (0.6)")

#6. Interpret one strong rule in words. For example: If a customer buys X, they are likely to buy Y.
print("\n6: If a customer buys Sandwich, they are likely to buy Coffee. (Support: 0.4, Confidence: 0.67)")

#7. How does changing min_support or min_conf affect the number of rules generated? 
print("""Lowering min_support or min_confidence increases the number of rules generated, 
while increasing them reduces the number of rules by filtering out less frequent or weaker associations.""")





1: Transactions
Transaction 1: ['Coffee', 'Donut', 'Sandwich']
Transaction 2: ['Coffee', 'Donut']
Transaction 3: ['Coffee', 'Sandwich']
Transaction 4: ['Coffee', 'Muffin']
Transaction 5: ['Donut', 'Muffin']

2: One-Hot Encoded DataFrame
   Coffee  Donut  Muffin  Sandwich
0       1      1       0         1
1       1      1       0         0
2       1      0       0         1
3       1      0       1         0
4       0      1       1         0

3: Frequent Itemsets
   support            itemsets
0      0.8            (Coffee)
1      0.6             (Donut)
2      0.4            (Muffin)
3      0.4          (Sandwich)
4      0.4     (Coffee, Donut)
5      0.4  (Coffee, Sandwich)

4: Association Rules
  antecedents consequents  support  confidence      lift
0     (Donut)    (Coffee)      0.4    0.666667  0.833333
1  (Sandwich)    (Coffee)      0.4    1.000000  1.250000

5: ({Sandwich} {Coffee}) satisfy both minimum support (0.4) and minimum confidence (0.6)
