## Association rules models with the Apriori algorithm

In [5]:
!pip install mlxtend


Collecting mlxtend
  Obtaining dependency information for mlxtend from https://files.pythonhosted.org/packages/0f/92/5c322336a0991949a1e91f6acd7e04f7e05b0fb6252a3f00fcdc0cb5e97d/mlxtend-0.23.3-py3-none-any.whl.metadata
  Using cached mlxtend-0.23.3-py3-none-any.whl.metadata (7.3 kB)
Using cached mlxtend-0.23.3-py3-none-any.whl (1.4 MB)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.3


In [6]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

data = pd.read_csv('groceries.csv')
data

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,citrus fruit,semi-finished bread,margarine,ready soups,,,,,
1,tropical fruit,yogurt,coffee,,,,,,
2,whole milk,,,,,,,,
3,pip fruit,yogurt,cream cheese,meat spreads,,,,,
4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,
...,...,...,...,...,...,...,...,...,...
3085,whipped/sour cream,domestic eggs,bottled water,soda,,,,,
3086,tropical fruit,pip fruit,other vegetables,butter milk,yogurt,whipped/sour cream,UHT-milk,margarine,sugar
3087,UHT-milk,canned beer,,,,,,,
3088,root vegetables,semi-finished bread,specialty bar,,,,,,


In [7]:
data.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,citrus fruit,semi-finished bread,margarine,ready soups,,,,,
1,tropical fruit,yogurt,coffee,,,,,,
2,whole milk,,,,,,,,
3,pip fruit,yogurt,cream cheese,meat spreads,,,,,
4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,


In [8]:
baskets_sets = data.stack().str.get_dummies().groupby(level = 0).sum()

In [9]:
baskets_sets.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [10]:
baskets_sets_bool = baskets_sets.astype(bool)
apriori(baskets_sets_bool,min_support=0.02)

Unnamed: 0,support,itemsets
0,0.028155,(1)
1,0.054369,(8)
2,0.032039,(9)
3,0.027508,(10)
4,0.072492,(11)
...,...,...
105,0.022977,"(154, 163)"
106,0.033981,"(162, 158)"
107,0.055987,"(162, 163)"
108,0.022977,"(121, 162, 101)"


In [11]:
baskets_sets_bool = baskets_sets.astype(bool)
apriori(baskets_sets_bool,min_support=0.02,use_colnames = True)

Unnamed: 0,support,itemsets
0,0.028155,(UHT-milk)
1,0.054369,(beef)
2,0.032039,(berries)
3,0.027508,(beverages)
4,0.072492,(bottled beer)
...,...,...
105,0.022977,"(yogurt, tropical fruit)"
106,0.033981,"(whole milk, whipped/sour cream)"
107,0.055987,"(yogurt, whole milk)"
108,0.022977,"(whole milk, root vegetables, other vegetables)"


In [14]:
df_combo = baskets_sets.astype(bool)

frequent_itemsets = apriori(df_combo, min_support=0.002, use_colnames=True)

frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.007120,(Instant food products),1
1,0.028155,(UHT-milk),1
2,0.002913,(abrasive cleaner),1
3,0.003560,(artif. sweetener),1
4,0.014887,(baking powder),1
...,...,...,...
2313,0.002265,"(yogurt, citrus fruit, whole milk, other veget...",5
2314,0.002589,"(yogurt, citrus fruit, whole milk, other veget...",5
2315,0.002265,"(yogurt, whole milk, pip fruit, other vegetabl...",5
2316,0.002265,"(yogurt, whole milk, other vegetables, tropica...",5


In [28]:
frequent_itemsets[(frequent_itemsets['length']>=3)]

Unnamed: 0,support,itemsets,length
1260,0.002589,"(bottled water, UHT-milk, other vegetables)",3
1261,0.002265,"(soda, bottled water, UHT-milk)",3
1262,0.002589,"(baking powder, whole milk, other vegetables)",3
1263,0.002265,"(baking powder, whole milk, whipped/sour cream)",3
1264,0.002589,"(beef, whole milk, berries)",3
...,...,...,...
2313,0.002265,"(yogurt, citrus fruit, whole milk, other veget...",5
2314,0.002589,"(yogurt, citrus fruit, whole milk, other veget...",5
2315,0.002265,"(yogurt, whole milk, pip fruit, other vegetabl...",5
2316,0.002265,"(yogurt, whole milk, other vegetables, tropica...",5


rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)
rules.head()