In [1]:
pip install mlxtend

Defaulting to user installation because normal site-packages is not writeable
Collecting mlxtend
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   - -------------------------------------- 0.1/1.4 MB 825.8 kB/s eta 0:00:02
   ---- ----------------------------------- 0.2/1.4 MB 1.5 MB/s eta 0:00:01
   --------- ------------------------------ 0.3/1.4 MB 2.3 MB/s eta 0:00:01
   ------------ --------------------------- 0.5/1.4 MB 2.7 MB/s eta 0:00:01
   -------------------- ------------------- 0.7/1.4 MB 3.6 MB/s eta 0:00:01
   ------------------------------ --------- 1.1/1.4 MB 4.3 MB/s eta 0:00:01
   ---------------------------------------  1.4/1.4 MB 5.1 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 5.1 MB/s eta 0:00:00
Installing collected packages: mlxtend

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
dataset = [
    ['Apple','Beer','Rice','Chicken'],
    ['Apple','Beer','Rice'],
    ['Apple','Beer'],
    ['Apple','Pear'],
    ['Milk','Beer','Rice','Chicken'],
    ['Milk','Beer','Rice'],
    ['Milk','Beer'],
    ['Milk','Pear']
]

In [7]:
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
encoded = te.fit_transform(dataset)
encoded

array([[ True,  True,  True, False, False,  True],
       [ True,  True, False, False, False,  True],
       [ True,  True, False, False, False, False],
       [ True, False, False, False,  True, False],
       [False,  True,  True,  True, False,  True],
       [False,  True, False,  True, False,  True],
       [False,  True, False,  True, False, False],
       [False, False, False,  True,  True, False]])

In [8]:
te.columns_

['Apple', 'Beer', 'Chicken', 'Milk', 'Pear', 'Rice']

In [9]:
df = pd.DataFrame(encoded, columns=te.columns_)
df

Unnamed: 0,Apple,Beer,Chicken,Milk,Pear,Rice
0,True,True,True,False,False,True
1,True,True,False,False,False,True
2,True,True,False,False,False,False
3,True,False,False,False,True,False
4,False,True,True,True,False,True
5,False,True,False,True,False,True
6,False,True,False,True,False,False
7,False,False,False,True,True,False


In [10]:
from mlxtend.frequent_patterns import apriori,association_rules
#1.Find frequent itemset
freq_itemset = apriori(df, min_support=0.25, use_colnames=True)

In [11]:
freq_itemset

Unnamed: 0,support,itemsets
0,0.5,(Apple)
1,0.75,(Beer)
2,0.25,(Chicken)
3,0.5,(Milk)
4,0.25,(Pear)
5,0.5,(Rice)
6,0.375,"(Beer, Apple)"
7,0.25,"(Apple, Rice)"
8,0.25,"(Chicken, Beer)"
9,0.375,"(Beer, Milk)"


In [12]:
rules = association_rules(freq_itemset, metric='confidence', min_threshold=0.5)
rules.shape

(28, 10)

In [14]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Beer),(Apple),0.75,0.5,0.375,0.5,1.0,0.0,1.0,0.0
1,(Apple),(Beer),0.5,0.75,0.375,0.75,1.0,0.0,1.0,0.0
2,(Apple),(Rice),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
3,(Rice),(Apple),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
4,(Chicken),(Beer),0.25,0.75,0.25,1.0,1.333333,0.0625,inf,0.333333
5,(Beer),(Milk),0.75,0.5,0.375,0.5,1.0,0.0,1.0,0.0
6,(Milk),(Beer),0.5,0.75,0.375,0.75,1.0,0.0,1.0,0.0
7,(Beer),(Rice),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5,1.0
8,(Rice),(Beer),0.5,0.75,0.5,1.0,1.333333,0.125,inf,0.5
9,(Chicken),(Rice),0.25,0.5,0.25,1.0,2.0,0.125,inf,0.666667


In [15]:
rules = rules[['antecedents','consequents','support','confidence']] 

In [16]:
rules

Unnamed: 0,antecedents,consequents,support,confidence
0,(Beer),(Apple),0.375,0.5
1,(Apple),(Beer),0.375,0.75
2,(Apple),(Rice),0.25,0.5
3,(Rice),(Apple),0.25,0.5
4,(Chicken),(Beer),0.25,1.0
5,(Beer),(Milk),0.375,0.5
6,(Milk),(Beer),0.375,0.75
7,(Beer),(Rice),0.5,0.666667
8,(Rice),(Beer),0.5,1.0
9,(Chicken),(Rice),0.25,1.0


Sort the Rules

In [17]:
rules.sort_values(by = ['confidence','support'], ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence
8,(Rice),(Beer),0.5,1.0
4,(Chicken),(Beer),0.25,1.0
9,(Chicken),(Rice),0.25,1.0
15,"(Apple, Rice)",(Beer),0.25,1.0
18,"(Chicken, Beer)",(Rice),0.25,1.0
19,"(Chicken, Rice)",(Beer),0.25,1.0
21,(Chicken),"(Beer, Rice)",0.25,1.0
25,"(Milk, Rice)",(Beer),0.25,1.0
1,(Apple),(Beer),0.375,0.75
6,(Milk),(Beer),0.375,0.75


Filter the Rules

In [19]:
nrules = rules[rules['confidence'] > 0.5]
nrules.sort_values(by = ['confidence','support'], ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence
8,(Rice),(Beer),0.5,1.0
4,(Chicken),(Beer),0.25,1.0
9,(Chicken),(Rice),0.25,1.0
15,"(Apple, Rice)",(Beer),0.25,1.0
18,"(Chicken, Beer)",(Rice),0.25,1.0
19,"(Chicken, Rice)",(Beer),0.25,1.0
21,(Chicken),"(Beer, Rice)",0.25,1.0
25,"(Milk, Rice)",(Beer),0.25,1.0
1,(Apple),(Beer),0.375,0.75
6,(Milk),(Beer),0.375,0.75


In [20]:
nrules[nrules['antecedents'] == {'Rice'}]

Unnamed: 0,antecedents,consequents,support,confidence
8,(Rice),(Beer),0.5,1.0


In [22]:
nrules[nrules['antecedents'] == {'Rice'}]['consequents'] #recommendation

8    (Beer)
Name: consequents, dtype: object

In [26]:
nrules[nrules['antecedents'] == {'Apple', 'Beer'}][['consequents','confidence']] #recommendation

Unnamed: 0,consequents,confidence
13,(Rice),0.666667


Export the Rules

In [27]:
nrules.to_csv('rules.csv',index=False)