# Apriori Examples

In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

## Numerical example from the mlxtend website
https://rasbt.github.io/mlxtend/user_guide/preprocessing/TransactionEncoder/

In [2]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [3]:
model = TransactionEncoder()
encoded_results = model.fit(dataset).transform(dataset)
df = pd.DataFrame(encoded_results, columns=model.columns_)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [4]:
apriori(df, min_support=0.6)

Unnamed: 0,support,itemsets
0,0.8,[3]
1,1.0,[5]
2,0.6,[6]
3,0.6,[8]
4,0.6,[10]
5,0.8,"[3, 5]"
6,0.6,"[3, 8]"
7,0.6,"[5, 6]"
8,0.6,"[5, 8]"
9,0.6,"[5, 10]"


In [5]:
apriori(df, min_support=0.6, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.8,[Eggs]
1,1.0,[Kidney Beans]
2,0.6,[Milk]
3,0.6,[Onion]
4,0.6,[Yogurt]
5,0.8,"[Eggs, Kidney Beans]"
6,0.6,"[Eggs, Onion]"
7,0.6,"[Kidney Beans, Milk]"
8,0.6,"[Kidney Beans, Onion]"
9,0.6,"[Kidney Beans, Yogurt]"


## Online Retail
http://archive.ics.uci.edu/ml/datasets/online+retail

In [6]:
df = pd.read_csv('online_retail.csv')
df = df.pivot_table(index='InvoiceNo', columns='Description', values='Quantity', aggfunc=len).fillna(0.0)

In [9]:
apriori(df, min_support=0.02, use_colnames=True).sort_values('support', ascending=False)

Unnamed: 0,support,itemsets
103,0.122704,[ Eggs]
109,0.119030,[ White Bread]
97,0.109478,[ 2pct. Milk]
10,0.097722,[ Potato Chips]
3,0.093314,[ 98pct. Fat Free Hamburger]
86,0.092579,[ Hot Dogs]
181,0.085231,[ Sweet Relish]
5,0.080088,[ Onions]
134,0.079353,[ Toothpaste]
117,0.077884,[ Cola]


## Market Basket Analysis
http://csci.viu.ca/~barskym/teaching/DM2012/labs/LAB7/PartII.html

In [10]:
df = pd.read_csv('marketbasket.csv')
apriori(df, min_support=0.05, use_colnames=True).sort_values('support', ascending=False)

Unnamed: 0,support,itemsets
19,0.122704,[ Eggs]
20,0.11903,[ White Bread]
16,0.109478,[ 2pct. Milk]
4,0.097722,[ Potato Chips]
1,0.093314,[ 98pct. Fat Free Hamburger]
11,0.092579,[ Hot Dogs]
36,0.085231,[ Sweet Relish]
3,0.080088,[ Onions]
28,0.079353,[ Toothpaste]
21,0.077884,[ Cola]
