In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

import warnings
warnings.filterwarnings("ignore")

In [37]:
df = [["Milk", "Onion", "Nutmeg", "Kidney Beans", "Eggs", "Yogurt"], 
      ["Dill", "Onion", "Nutmeg", "Kidney Beans", "Eggs", "Yogurt"], 
      ["Milk", "Apple", "Kidney Beans", "Yogurt"], 
      ["Milk", "Unicorn", "Corn", "Kidney Beans", "Yogurt"], 
      ["Corn", "Onion", "Onion", "Kidney Beans", "Ice cream", "Eggs"]]

In [38]:
te = TransactionEncoder()
nw_df = te.fit(df).transform(df)
nw_df

array([[False, False, False,  True, False,  True,  True,  True,  True,
        False,  True],
       [False, False,  True,  True, False,  True, False,  True,  True,
        False,  True],
       [ True, False, False, False, False,  True,  True, False, False,
        False,  True],
       [False,  True, False, False, False,  True,  True, False, False,
         True,  True],
       [False,  True, False,  True,  True,  True, False, False,  True,
        False, False]])

In [39]:
df = pd.DataFrame(nw_df, columns=te.columns_)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,False,False,True,True,False,False,False,True
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [40]:
apriori(df, min_support=0.6)

Unnamed: 0,support,itemsets
0,0.6,(3)
1,1.0,(5)
2,0.6,(6)
3,0.6,(8)
4,0.8,(10)
5,0.6,"(3, 5)"
6,0.6,"(8, 3)"
7,0.6,"(5, 6)"
8,0.6,"(8, 5)"
9,0.8,"(10, 5)"


In [41]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.6,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.8,(Yogurt),1
5,0.6,"(Eggs, Kidney Beans)",2
6,0.6,"(Eggs, Onion)",2
7,0.6,"(Milk, Kidney Beans)",2
8,0.6,"(Onion, Kidney Beans)",2
9,0.8,"(Kidney Beans, Yogurt)",2


In [42]:
frequent_itemsets[(frequent_itemsets['length'] == 2) & (frequent_itemsets['support'] >= 0.8)]

Unnamed: 0,support,itemsets,length
9,0.8,"(Kidney Beans, Yogurt)",2


In [43]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

In [44]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len
0,(Eggs),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf,0.0,1
1,(Eggs),(Onion),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0,1
2,(Onion),(Eggs),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0,1
3,(Milk),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf,0.0,1
4,(Onion),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf,0.0,1
5,(Kidney Beans),(Yogurt),1.0,0.8,0.8,0.8,1.0,0.0,1.0,0.0,1
6,(Yogurt),(Kidney Beans),0.8,1.0,0.8,1.0,1.0,0.0,inf,0.0,1
7,(Milk),(Yogurt),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5,1
8,(Yogurt),(Milk),0.8,0.6,0.6,0.75,1.25,0.12,1.6,1.0,1
9,"(Eggs, Onion)",(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf,0.0,2


In [45]:
rules[(rules['antecedent_len'] >= 2) & (rules['confidence'] > 0.75) & (rules['lift'] > 1.2)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len
10,"(Eggs, Kidney Beans)",(Onion),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0,2
11,"(Onion, Kidney Beans)",(Eggs),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0,2
15,"(Milk, Kidney Beans)",(Yogurt),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5,2


In [46]:
rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len
10,"(Eggs, Kidney Beans)",(Onion),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0,2


In [24]:
trans = [["Shirt", "Shoes", "Pants", "Hat", "Sunglasses", "Watch"], 
         ["Shirt", "Shoes", "Hat", "Sunglasses", "Watch"],
         ["Shirt", "Pants", "Belt"], 
         ["Shoes", "Hat", "Sunglasses", "Watch"],
         ["Shirt", "Shoes", "Pants", "Belt", "Sunglasses"]
        ]

In [25]:
te = TransactionEncoder()
nw_df = te.fit(trans).transform(trans)

df = pd.DataFrame(nw_df, columns=te.columns_)
df

Unnamed: 0,Belt,Hat,Pants,Shirt,Shoes,Sunglasses,Watch
0,False,True,True,True,True,True,True
1,False,True,False,True,True,True,True
2,True,False,True,True,False,False,False
3,False,True,False,False,True,True,True
4,True,False,True,True,True,True,False


In [43]:
frequent_itemsets = apriori(df, min_support=0.65, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Shirt),1
1,0.8,(Shoes),1
2,0.8,(Sunglasses),1
3,0.8,"(Shoes, Sunglasses)",2


In [44]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.75)


In [45]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Shoes),(Sunglasses),0.8,0.8,0.8,1.0,1.25,0.16,inf,1.0
1,(Sunglasses),(Shoes),0.8,0.8,0.8,1.0,1.25,0.16,inf,1.0
