In [1]:
pip install mlxtend

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.

Collecting mlxtend
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   - -------------------------------------- 0.0/1.4 MB 653.6 kB/s eta 0:00:03
   ----- ---------------------------------- 0.2/1.4 MB 2.1 MB/s eta 0:00:01
   ------------------ --------------------- 0.7/1.4 MB 4.6 MB/s eta 0:00:01
   ---------------------------------------  1.4/1.4 MB 7.6 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 7.1 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.1


##### Create the dataset

In [2]:
dataset = [['Apple','Beer','Rice','Chicken'],['Apple','Beer','Rice'],['Apple','Beer'],['Apple','Pear'],['Milk','Beer','Rice','Chicken'],['Milk','Beer','Rice'],['Milk','Beer'],['Milk', 'Pear']]

In [3]:
dataset

[['Apple', 'Beer', 'Rice', 'Chicken'],
 ['Apple', 'Beer', 'Rice'],
 ['Apple', 'Beer'],
 ['Apple', 'Pear'],
 ['Milk', 'Beer', 'Rice', 'Chicken'],
 ['Milk', 'Beer', 'Rice'],
 ['Milk', 'Beer'],
 ['Milk', 'Pear']]

In [5]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [6]:
te = TransactionEncoder()
encoded = te.fit_transform(dataset)

In [7]:
encoded

array([[ True,  True,  True, False, False,  True],
       [ True,  True, False, False, False,  True],
       [ True,  True, False, False, False, False],
       [ True, False, False, False,  True, False],
       [False,  True,  True,  True, False,  True],
       [False,  True, False,  True, False,  True],
       [False,  True, False,  True, False, False],
       [False, False, False,  True,  True, False]])

In [8]:
te.columns_

['Apple', 'Beer', 'Chicken', 'Milk', 'Pear', 'Rice']

In [12]:
df = pd.DataFrame(encoded, columns=te.columns_)

In [13]:
df

Unnamed: 0,Apple,Beer,Chicken,Milk,Pear,Rice
0,True,True,True,False,False,True
1,True,True,False,False,False,True
2,True,True,False,False,False,False
3,True,False,False,False,True,False
4,False,True,True,True,False,True
5,False,True,False,True,False,True
6,False,True,False,True,False,False
7,False,False,False,True,True,False


In [15]:
from mlxtend.frequent_patterns import apriori, association_rules

In [16]:
# 1 Find frequent itemset
freq_itemset = apriori(df, min_support=0.25, use_colnames=True)

In [17]:
freq_itemset

Unnamed: 0,support,itemsets
0,0.5,(Apple)
1,0.75,(Beer)
2,0.25,(Chicken)
3,0.5,(Milk)
4,0.25,(Pear)
5,0.5,(Rice)
6,0.375,"(Apple, Beer)"
7,0.25,"(Apple, Rice)"
8,0.25,"(Chicken, Beer)"
9,0.375,"(Milk, Beer)"


In [18]:
# step 2 set the rules

rules = association_rules(freq_itemset, metric='confidence',min_threshold=0.5)

In [19]:
rules.shape

(28, 10)

In [21]:
rules = rules[['antecedents','consequents', 'support','confidence']]

In [22]:
rules

Unnamed: 0,antecedents,consequents,support,confidence
0,(Apple),(Beer),0.375,0.75
1,(Beer),(Apple),0.375,0.5
2,(Apple),(Rice),0.25,0.5
3,(Rice),(Apple),0.25,0.5
4,(Chicken),(Beer),0.25,1.0
5,(Milk),(Beer),0.375,0.75
6,(Beer),(Milk),0.375,0.5
7,(Rice),(Beer),0.5,1.0
8,(Beer),(Rice),0.5,0.666667
9,(Rice),(Chicken),0.25,0.5


##### sort the values

In [23]:
rules.sort_values(by=['confidence','support'],ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence
7,(Rice),(Beer),0.5,1.0
4,(Chicken),(Beer),0.25,1.0
10,(Chicken),(Rice),0.25,1.0
13,"(Apple, Rice)",(Beer),0.25,1.0
18,"(Rice, Chicken)",(Beer),0.25,1.0
19,"(Beer, Chicken)",(Rice),0.25,1.0
21,(Chicken),"(Rice, Beer)",0.25,1.0
23,"(Rice, Milk)",(Beer),0.25,1.0
0,(Apple),(Beer),0.375,0.75
5,(Milk),(Beer),0.375,0.75


##### Filter the rules

In [24]:
nrules = rules[rules['confidence'] > 0.5]

In [25]:
nrules.shape

(13, 4)

In [26]:
nrules[nrules['antecedents'] == {'Rice'}]['consequents']

7    (Beer)
Name: consequents, dtype: object

In [27]:
nrules[nrules['antecedents'] == {'Apple','Beer'}]['consequents']

14    (Rice)
Name: consequents, dtype: object

##### Export the rules

In [28]:
nrules.to_csv('rules.csv', index = False)