In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Sample transactions
data = {
    'TransactionID': [1, 2, 3, 4, 5],
    'Items': [
        ['Bread', 'Milk'],
        ['Bread', 'Diaper', 'Beer', 'Eggs'],
        ['Milk', 'Diaper', 'Beer', 'Coke'],
        ['Bread', 'Milk', 'Diaper', 'Beer'],
        ['Bread', 'Milk', 'Diaper', 'Coke']
    ]
}

# Step 1: Load into DataFrame
df = pd.DataFrame(data)

# Step 2: Convert to one-hot encoded format
encoded_df = df['Items'].apply(lambda x: pd.Series(1, index=x)).fillna(0)
print("One-Hot Encoded Data:\n", encoded_df)

# Step 3: Apply Apriori (support ≥ 0.6)
frequent_itemsets = apriori(encoded_df, min_support=0.6, use_colnames=True)
print("\nFrequent Itemsets:\n", frequent_itemsets)

# Step 4: Generate association rules (confidence ≥ 0.7)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print("\nAssociation Rules:\n", rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


One-Hot Encoded Data:
    Bread  Milk  Diaper  Beer  Eggs  Coke
0    1.0   1.0     0.0   0.0   0.0   0.0
1    1.0   0.0     1.0   1.0   1.0   0.0
2    0.0   1.0     1.0   1.0   0.0   1.0
3    1.0   1.0     1.0   1.0   0.0   0.0
4    1.0   1.0     1.0   0.0   0.0   1.0

Frequent Itemsets:
    support         itemsets
0      0.8          (Bread)
1      0.8           (Milk)
2      0.8         (Diaper)
3      0.6           (Beer)
4      0.6    (Milk, Bread)
5      0.6  (Bread, Diaper)
6      0.6   (Milk, Diaper)
7      0.6   (Diaper, Beer)

Association Rules:
   antecedents consequents  support  confidence    lift
0      (Milk)     (Bread)      0.6        0.75  0.9375
1     (Bread)      (Milk)      0.6        0.75  0.9375
2     (Bread)    (Diaper)      0.6        0.75  0.9375
3    (Diaper)     (Bread)      0.6        0.75  0.9375
4      (Milk)    (Diaper)      0.6        0.75  0.9375
5    (Diaper)      (Milk)      0.6        0.75  0.9375
6    (Diaper)      (Beer)      0.6        0.75  1.25

