## Association Rule Mining Using Apriori Algorithm

In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [11]:
data = pd.read_csv( '/content/groceries.csv' )

In [12]:
data.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,citrus fruit,semi-finished bread,margarine,ready soups,,,,,
1,tropical fruit,yogurt,coffee,,,,,,
2,whole milk,,,,,,,,
3,pip fruit,yogurt,cream cheese,meat spreads,,,,,
4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,


In [13]:
basket_sets = data.stack().str.get_dummies().groupby( level=0 ).sum()
# stacks columns into a single column per row
# get_dummies converts stacked column into one hot encoded columns
# groupby regroups ohe columns per transaction
# essentially converts df to a binary matrix

In [14]:
basket_sets.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [7]:
basket_sets_bool = basket_sets.astype( bool )
apriori( basket_sets_bool, min_support=0.02 )

Unnamed: 0,support,itemsets
0,0.028155,(1)
1,0.054369,(8)
2,0.032039,(9)
3,0.027508,(10)
4,0.072492,(11)
...,...,...
105,0.022977,"(154, 163)"
106,0.033981,"(162, 158)"
107,0.055987,"(162, 163)"
108,0.022977,"(121, 162, 101)"


In [8]:
basket_sets_bool = basket_sets.astype( bool )
apriori( basket_sets_bool, min_support=0.02, use_colnames=True )

Unnamed: 0,support,itemsets
0,0.028155,(UHT-milk)
1,0.054369,(beef)
2,0.032039,(berries)
3,0.027508,(beverages)
4,0.072492,(bottled beer)
...,...,...
105,0.022977,"(tropical fruit, yogurt)"
106,0.033981,"(whipped/sour cream, whole milk)"
107,0.055987,"(yogurt, whole milk)"
108,0.022977,"(other vegetables, root vegetables, whole milk)"


In [16]:
df_combo = basket_sets.astype( bool )
frequent_itemsets = apriori( df_combo, min_support=0.002, use_colnames=True )
frequent_itemsets[ 'length' ] = frequent_itemsets[ 'itemsets' ].apply( lambda x: len( x ))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.007120,(Instant food products),1
1,0.028155,(UHT-milk),1
2,0.002913,(abrasive cleaner),1
3,0.003560,(artif. sweetener),1
4,0.014887,(baking powder),1
...,...,...,...
2313,0.002265,"(root vegetables, citrus fruit, whole milk, yo...",5
2314,0.002589,"(whipped/sour cream, citrus fruit, whole milk,...",5
2315,0.002265,"(whipped/sour cream, whole milk, yogurt, other...",5
2316,0.002265,"(tropical fruit, root vegetables, whole milk, ...",5


In [17]:
frequent_itemsets[( frequent_itemsets['length'] >= 3 )]

Unnamed: 0,support,itemsets,length
1260,0.002589,"(bottled water, other vegetables, UHT-milk)",3
1261,0.002265,"(bottled water, UHT-milk, soda)",3
1262,0.002589,"(baking powder, other vegetables, whole milk)",3
1263,0.002265,"(whipped/sour cream, baking powder, whole milk)",3
1264,0.002589,"(berries, beef, whole milk)",3
...,...,...,...
2313,0.002265,"(root vegetables, citrus fruit, whole milk, yo...",5
2314,0.002589,"(whipped/sour cream, citrus fruit, whole milk,...",5
2315,0.002265,"(whipped/sour cream, whole milk, yogurt, other...",5
2316,0.002265,"(tropical fruit, root vegetables, whole milk, ...",5


In [18]:
rules = association_rules( frequent_itemsets, metric='confidence', min_threshold=0.9 )
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,"(curd, hamburger meat)",(whole milk),0.003236,0.256634,0.003236,1.0,3.896595,1.0,0.002406,inf,0.745779,0.01261,1.0,0.506305
1,"(frozen fish, root vegetables)",(whole milk),0.002265,0.256634,0.002265,1.0,3.896595,1.0,0.001684,inf,0.745054,0.008827,1.0,0.504414
2,"(sausage, bottled water, soda)",(yogurt),0.00356,0.136893,0.003236,0.909091,6.640877,1.0,0.002749,9.494175,0.852452,0.023585,0.894672,0.466366
3,"(sausage, yogurt, bottled water)",(soda),0.00356,0.159223,0.003236,0.909091,5.709534,1.0,0.002669,9.248544,0.827801,0.020284,0.891875,0.464708
4,"(butter, pip fruit, whole milk)",(other vegetables),0.00356,0.192557,0.003236,0.909091,4.721161,1.0,0.002551,8.881877,0.791004,0.016779,0.887411,0.462949


In [19]:
rules = association_rules( frequent_itemsets, metric='lift', min_threshold=1 )
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(other vegetables),(Instant food products),0.192557,0.00712,0.002265,0.011765,1.652406,1.0,0.000894,1.0047,0.488978,0.011475,0.004678,0.164973
1,(Instant food products),(other vegetables),0.00712,0.192557,0.002265,0.318182,1.652406,1.0,0.000894,1.18425,0.397653,0.011475,0.155584,0.164973
2,(Instant food products),(root vegetables),0.00712,0.110032,0.002265,0.318182,2.891711,1.0,0.001482,1.305286,0.658875,0.019718,0.233884,0.169385
3,(root vegetables),(Instant food products),0.110032,0.00712,0.002265,0.020588,2.891711,1.0,0.001482,1.013752,0.735065,0.019718,0.013565,0.169385
4,(bottled water),(UHT-milk),0.111974,0.028155,0.006472,0.057803,2.05302,1.0,0.00332,1.031467,0.577587,0.048426,0.030507,0.143844


In [20]:
rules[ (rules['lift'] >= 5 ) & ( rules['confidence'] >= 0.8 ) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
7111,"(sausage, bottled water, soda)",(yogurt),0.00356,0.136893,0.003236,0.909091,6.640877,1.0,0.002749,9.494175,0.852452,0.023585,0.894672,0.466366
7113,"(sausage, yogurt, bottled water)",(soda),0.00356,0.159223,0.003236,0.909091,5.709534,1.0,0.002669,9.248544,0.827801,0.020284,0.891875,0.464708
7377,"(sausage, canned beer, rolls/buns)",(shopping bags),0.002589,0.082848,0.002265,0.875,10.561523,1.0,0.002051,7.337217,0.907667,0.027237,0.863709,0.451172
8132,"(whipped/sour cream, dessert, other vegetables)",(yogurt),0.002913,0.136893,0.002589,0.888889,6.493302,1.0,0.00219,7.767961,0.848466,0.018868,0.871266,0.453901
8186,"(frankfurter, other vegetables, pastry)",(rolls/buns),0.002265,0.194498,0.002265,1.0,5.141431,1.0,0.001825,inf,0.807331,0.011647,1.0,0.505824
8314,"(hamburger meat, pork, whole milk)",(other vegetables),0.002589,0.192557,0.002589,1.0,5.193277,1.0,0.00209,inf,0.809539,0.013445,1.0,0.506723
