# Association rule mining

In [2]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### Mlxtend (machine learning extensions) 

Python library of useful tools for the day-to-day data science tasks.

http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

### Load data

In [3]:
data = pd.read_csv("supermarket_short.csv")
data.head (10)

Unnamed: 0,grocery misc,baby needs,bread and cake,baking needs,coupons,juice-sat-cord-ms,tea,biscuits,canned fish-meat,canned fruit,...,750ml white nz,750ml red nz,750ml white imp,750ml red imp,sparkling nz,sparkling imp,brew kits/accesry,port and sherry,ctrled label wine,non host support
0,0,1,1,1,0,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,1,1,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,1,1,0,1,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
7,0,1,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,1,0,0,0,0,0


### Find frequent itemsets and rules

In [35]:
frequent_itemsets = apriori(data, min_support=0.15, use_colnames=True, low_memory=True)

In [36]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

### Add antecedent lenght column

In [37]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules["consequent_len"] = rules["consequents"].apply(lambda x: len(x))

### Filter rules

In [38]:
pd.set_option('display.max_rows', None)

rules[ (rules['support'] > 0.15) & (rules['support'] < 0.7) &
      (rules['lift'] > 1.15) &
      (rules['conviction'] > 1.3) &
      (rules["antecedent_len"] < 3) &
      (rules["consequent_len"] < 2) &
      (rules["confidence"] > 0.5) & (rules["confidence"] < 0.8)
     ].sort_values(by=['confidence'],ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequent_len
2143,"(pet foods, fruit)",(vegetables),0.28096,0.639939,0.224335,0.798462,1.247714,0.044538,1.78656,2,1
1963,"(sauces-gravy-pkle, fruit)",(vegetables),0.322023,0.639939,0.256538,0.796644,1.244874,0.050463,1.770595,2,1
1753,"(vegetables, biscuits)",(fruit),0.381241,0.640156,0.303436,0.795918,1.24332,0.059383,1.763238,2,1
1758,"(canned fruit, fruit)",(vegetables),0.202291,0.639939,0.160795,0.794872,1.242105,0.031341,1.755295,2,1
1794,"(canned vegetables, vegetables)",(fruit),0.25027,0.640156,0.198401,0.792746,1.238365,0.038189,1.736249,2,1
2265,"(wrapping, fruit)",(vegetables),0.200778,0.639939,0.159066,0.79225,1.238007,0.030581,1.733141,2,1
2291,"(cheese, fruit)",(vegetables),0.283769,0.639939,0.224768,0.792079,1.237741,0.043173,1.73172,2,1
1299,"(baking needs, beef)",(vegetables),0.249622,0.639939,0.19732,0.790476,1.235236,0.037577,1.718471,2,1
2324,"(margarine, fruit)",(vegetables),0.332397,0.639939,0.262589,0.789987,1.234471,0.049875,1.714468,2,1
1093,"(margarine, breakfast food)",(baking needs),0.238167,0.604063,0.188027,0.789474,1.306939,0.044159,1.8807,2,1
