In [None]:
#Association Rule Mining (Implementation of Apriori Algorithm) [Use Class Example]

In [3]:
from collections import Counter
from itertools import combinations
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Step 1: Define Transactions (Your dataset)
transactions = [
    {1, 3, 4},
    {2, 3, 5},
    {1, 2, 3, 5},
    {2, 5},
    {1, 3, 5}
]

# Convert transactions into a format suitable for Apriori
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Function to generate itemsets of various sizes
def generate_itemsets(transactions, size):
    itemsets = Counter()
    for transaction in transactions:
        for itemset in combinations(sorted(transaction), size):
            itemsets[frozenset(itemset)] += 1
    return itemsets

# Step 2: Frequency of 1-itemsets
print("🔹 Step 1: Frequency of 1-itemsets:")
itemset_1_freq = Counter({frozenset([item]): df[item].sum() for item in df.columns})
for itemset, freq in itemset_1_freq.items():
    print(f"   {set(itemset)} → Frequency: {freq}")

# Step 3: Frequency of 2-itemsets
print("\n🔹 Step 2: Frequency of 2-itemsets:")
itemset_2_freq = generate_itemsets(transactions, 2)
for itemset, freq in itemset_2_freq.items():
    print(f"   {set(itemset)} → Frequency: {freq}")

# Step 4: Frequency of 3-itemsets
print("\n🔹 Step 3: Frequency of 3-itemsets:")
itemset_3_freq = generate_itemsets(transactions, 3)
for itemset, freq in itemset_3_freq.items():
    print(f"   {set(itemset)} → Frequency: {freq}")

# Step 5: Frequency of 4-itemsets
print("\n🔹 Step 4: Frequency of 4-itemsets:")
itemset_4_freq = generate_itemsets(transactions, 4)
for itemset, freq in itemset_4_freq.items():
    print(f"   {set(itemset)} → Frequency: {freq}")

# Step 6: Apply Apriori algorithm to find frequent itemsets with min support of 0.4
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)

# Print frequent itemsets from Apriori (converted to percentage)
print("\n🔹 Step 5: Frequent Itemsets from Apriori:")
for _, row in frequent_itemsets.iterrows():
    support_percentage = row['support'] * 100  # Convert support to percentage
    print(f"   {set(row['itemsets'])} → Support: {support_percentage:.2f}%")

# Step 7: Generate association rules with a minimum confidence of 0.7
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

# Print association rules (showing only the confidence value in percentage)
print("\n🔹 Step 6: Association Rules (Confidence Only):")
for _, row in rules.iterrows():
    confidence_percentage = row['confidence'] * 100  # Convert confidence to percentage
    print(f"   {set(row['antecedents'])} → {set(row['consequents'])} → Confidence: {confidence_percentage:.2f}%")


🔹 Step 1: Frequency of 1-itemsets:
   {1} → Frequency: 3
   {2} → Frequency: 3
   {3} → Frequency: 4
   {4} → Frequency: 1
   {5} → Frequency: 4

🔹 Step 2: Frequency of 2-itemsets:
   {1, 3} → Frequency: 3
   {1, 4} → Frequency: 1
   {3, 4} → Frequency: 1
   {2, 3} → Frequency: 2
   {2, 5} → Frequency: 3
   {3, 5} → Frequency: 3
   {1, 2} → Frequency: 1
   {1, 5} → Frequency: 2

🔹 Step 3: Frequency of 3-itemsets:
   {1, 3, 4} → Frequency: 1
   {2, 3, 5} → Frequency: 2
   {1, 2, 3} → Frequency: 1
   {1, 2, 5} → Frequency: 1
   {1, 3, 5} → Frequency: 2

🔹 Step 4: Frequency of 4-itemsets:
   {1, 2, 3, 5} → Frequency: 1

🔹 Step 5: Frequent Itemsets from Apriori:
   {1} → Support: 60.00%
   {2} → Support: 60.00%
   {3} → Support: 80.00%
   {5} → Support: 80.00%
   {1, 3} → Support: 60.00%
   {1, 5} → Support: 40.00%
   {2, 3} → Support: 40.00%
   {2, 5} → Support: 60.00%
   {3, 5} → Support: 60.00%
   {1, 3, 5} → Support: 40.00%
   {2, 3, 5} → Support: 40.00%

🔹 Step 6: Association Rules (C