# Association Mining

In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [3]:
data = {
'TransactionID': [1, 2, 3, 4, 5],
'Items': [
['Bread', 'Milk'],
['Bread', 'Diaper', 'Beer', 'Eggs'],
['Milk', 'Diaper', 'Beer', 'Coke'],
['Bread', 'Milk', 'Diaper', 'Beer'],
['Bread', 'Milk', 'Diaper', 'Coke']
]
}


In [6]:
df = pd.DataFrame(data)
df

Unnamed: 0,TransactionID,Items
0,1,"[Bread, Milk]"
1,2,"[Bread, Diaper, Beer, Eggs]"
2,3,"[Milk, Diaper, Beer, Coke]"
3,4,"[Bread, Milk, Diaper, Beer]"
4,5,"[Bread, Milk, Diaper, Coke]"


In [7]:
df_items = df['Items'].apply(lambda x: pd.Series(1, index=x)).fillna(0)
df_items

Unnamed: 0,Bread,Milk,Diaper,Beer,Eggs,Coke
0,1.0,1.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,1.0,1.0,0.0
2,0.0,1.0,1.0,1.0,0.0,1.0
3,1.0,1.0,1.0,1.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.0,1.0


In [8]:
frequent_itemsets = apriori(df_items, min_support=0.6, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.8,(Bread)
1,0.8,(Milk)
2,0.8,(Diaper)
3,0.6,(Beer)
4,0.6,"(Milk, Bread)"
5,0.6,"(Bread, Diaper)"
6,0.6,"(Milk, Diaper)"
7,0.6,"(Beer, Diaper)"


In [9]:
rules = association_rules(frequent_itemsets, metric="confidence", 
min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Milk),(Bread),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
1,(Bread),(Milk),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
2,(Bread),(Diaper),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
3,(Diaper),(Bread),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
4,(Milk),(Diaper),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
5,(Diaper),(Milk),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
6,(Beer),(Diaper),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875
7,(Diaper),(Beer),0.8,0.6,0.6,0.75,1.25,1.0,0.12,1.6,1.0,0.75,0.375,0.875


In [10]:
for _, row in rules.iterrows():
    print(f"\nRule: {set(row['antecedents'])} -> {set(row['consequents'])}")
    print(f"Support: {row['support']:.2f}")
    print(f"Confidence: {row['confidence']:.2f}")
    print(f"Lift: {row['lift']:.2f}")


Rule: {'Milk'} -> {'Bread'}
Support: 0.60
Confidence: 0.75
Lift: 0.94

Rule: {'Bread'} -> {'Milk'}
Support: 0.60
Confidence: 0.75
Lift: 0.94

Rule: {'Bread'} -> {'Diaper'}
Support: 0.60
Confidence: 0.75
Lift: 0.94

Rule: {'Diaper'} -> {'Bread'}
Support: 0.60
Confidence: 0.75
Lift: 0.94

Rule: {'Milk'} -> {'Diaper'}
Support: 0.60
Confidence: 0.75
Lift: 0.94

Rule: {'Diaper'} -> {'Milk'}
Support: 0.60
Confidence: 0.75
Lift: 0.94

Rule: {'Beer'} -> {'Diaper'}
Support: 0.60
Confidence: 1.00
Lift: 1.25

Rule: {'Diaper'} -> {'Beer'}
Support: 0.60
Confidence: 0.75
Lift: 1.25
