# Association rule mining

In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### Mlxtend (machine learning extensions) 

Python library of useful tools for the day-to-day data science tasks.

http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

### Load data

In [2]:
data = pd.read_csv("./supermarket_short.csv")
data.head (10)

Unnamed: 0,grocery misc,baby needs,bread and cake,baking needs,coupons,juice-sat-cord-ms,tea,biscuits,canned fish-meat,canned fruit,...,750ml white nz,750ml red nz,750ml white imp,750ml red imp,sparkling nz,sparkling imp,brew kits/accesry,port and sherry,ctrled label wine,non host support
0,0,1,1,1,0,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,1,1,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,1,1,0,1,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
7,0,1,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,1,0,0,0,0,0


### Find frequent itemsets and rules

In [19]:
frequent_itemsets = apriori(data, min_support=0.3, use_colnames=True)

In [20]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

### Add antecedent lenght column

In [21]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

### Filter rules

In [22]:
pd.options.display.max_rows = 600

In [33]:
rules[ (rules['support'] > 0.3) & (rules['confidence'] > 0.8) & (rules['lift'] > 1)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
16,(margarine),(bread and cake),0.494489,0.719689,0.395721,0.800262,1.111956,0.039843,1.403396,1
85,"(biscuits, baking needs)",(bread and cake),0.381241,0.719689,0.314675,0.825397,1.14688,0.0403,1.605419,2
87,"(frozen foods, baking needs)",(bread and cake),0.396585,0.719689,0.320942,0.809264,1.124464,0.035524,1.469632,2
91,"(milk-cream, baking needs)",(bread and cake),0.412146,0.719689,0.341474,0.828526,1.151229,0.044857,1.63472,2
94,"(fruit, baking needs)",(bread and cake),0.410633,0.719689,0.338016,0.823158,1.143769,0.042488,1.585093,2
97,"(baking needs, vegetables)",(bread and cake),0.421223,0.719689,0.342771,0.813751,1.130698,0.039621,1.505032,2
99,"(frozen foods, biscuits)",(bread and cake),0.391182,0.719689,0.326345,0.834254,1.159187,0.044816,1.691211,2
103,"(biscuits, milk-cream)",(bread and cake),0.381889,0.719689,0.320942,0.840407,1.167737,0.046101,1.756418,2
106,"(biscuits, fruit)",(bread and cake),0.397018,0.719689,0.333045,0.838868,1.165598,0.047316,1.739634,2
109,"(biscuits, vegetables)",(bread and cake),0.381241,0.719689,0.321375,0.842971,1.171299,0.047,1.785087,2
