### Package
- Tutorial:
    - [apriori](http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/apriori/)
    - [fp growth](http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/fpgrowth/)
    - [association_rules](http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/)

In [184]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Apriori
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import association_rules

# FP-growth
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [230]:
#load preprocessed data from Homework0
dataset = pd.read_csv('201703_Taiwan_preprocessed.csv', index_col=[0])
dataset.head()

Unnamed: 0,device_id,lat,lon,date_time,PM2.5,PM10,PM1,Temperature,Humidity,Date,Time
1248,74DA388FF5F6,25.059,121.499,2017-03-10 00:00:00,40.666667,45.5,30.833333,19.703333,91.0,2017-03-10,00:00:00
1249,74DA388FF5F6,25.059,121.499,2017-03-10 00:10:00,40.333333,45.0,30.666667,19.786667,91.0,2017-03-10,00:10:00
1250,74DA388FF5F6,25.059,121.499,2017-03-10 00:20:00,40.0,44.5,30.5,19.87,91.0,2017-03-10,00:20:00
1251,74DA388FF5F6,25.059,121.499,2017-03-10 00:30:00,39.666667,44.0,30.333333,19.953333,91.0,2017-03-10,00:30:00
1252,74DA388FF5F6,25.059,121.499,2017-03-10 00:40:00,39.333333,43.5,30.166667,20.036667,91.0,2017-03-10,00:40:00


### Task1 - transaction (pm2.5, humidity, temperature)
-  device_id=74DA3895C538

In [186]:
## Select 
task1_df = dataset[(dataset['device_id'] == '74DA3895C538')]
task1_df.reset_index(inplace=True, drop=True)
task1_df = task1_df[['PM2.5','Humidity','Temperature']]
task1_df.head()

Unnamed: 0,PM2.5,Humidity,Temperature
0,89.0,85.0,22.12
1,88.0,85.0,22.12
2,91.0,85.0,22.12
3,89.0,86.0,22.0
4,90.0,86.0,21.87


#### discretization - divided by 10

In [187]:
task1_d10 = task1_df.copy()
task1_d10[['PM2.5', 'Humidity', 'Temperature']] = task1_d10[['PM2.5', 'Humidity', 'Temperature']]/10
task1_d10[['PM2.5', 'Humidity', 'Temperature']] = task1_d10[['PM2.5', 'Humidity', 'Temperature']].astype(int)
task1_d10[['PM2.5', 'Humidity', 'Temperature']] = task1_d10[['PM2.5', 'Humidity', 'Temperature']].astype(str)

task1_d10['PM2.5'] = task1_d10['PM2.5']+'p'
task1_d10['Humidity'] = task1_d10['Humidity']+'h'
task1_d10['Temperature'] = task1_d10['Temperature']+'t'

In [188]:
task1_d10.head()

Unnamed: 0,PM2.5,Humidity,Temperature
0,8p,8h,2t
1,8p,8h,2t
2,9p,8h,2t
3,8p,8h,2t
4,9p,8h,2t


#### discretization - divided by 20

In [189]:
task1_d20 = task1_df.copy()
task1_d20[['PM2.5', 'Humidity', 'Temperature']] = task1_d20[['PM2.5', 'Humidity', 'Temperature']]/20
task1_d20[['PM2.5', 'Humidity', 'Temperature']] = task1_d20[['PM2.5', 'Humidity', 'Temperature']].astype(int)
task1_d20[['PM2.5', 'Humidity', 'Temperature']] = task1_d20[['PM2.5', 'Humidity', 'Temperature']].astype(str)

task1_d20['PM2.5'] = task1_d20['PM2.5']+'p'
task1_d20['Humidity'] = task1_d20['Humidity']+'h'
task1_d20['Temperature'] = task1_d20['Temperature']+'t'

In [190]:
task1_d20.head()

Unnamed: 0,PM2.5,Humidity,Temperature
0,4p,4h,1t
1,4p,4h,1t
2,4p,4h,1t
3,4p,4h,1t
4,4p,4h,1t


#### Turn dataframe to numpy array

In [191]:
x_d10 = task1_d10.iloc[:, 0:3].values # 
print(x_d10[0:5, :]) 

[['8p' '8h' '2t']
 ['8p' '8h' '2t']
 ['9p' '8h' '2t']
 ['8p' '8h' '2t']
 ['9p' '8h' '2t']]


In [192]:
x_d20 = task1_d20.iloc[:, 0:3].values # to dataset
print(x_d20[0:5, :]) 

[['4p' '4h' '1t']
 ['4p' '4h' '1t']
 ['4p' '4h' '1t']
 ['4p' '4h' '1t']
 ['4p' '4h' '1t']]


#### Apriori - divided by 10

In [193]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)
apr1_d10 = pd.DataFrame(te_ary , columns=te.columns_)

frequent_itemsets = apriori(apr1_d10, min_support=sup, use_colnames=True)
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

Unnamed: 0,support,itemsets
3,1.0,(2t)
29,0.604167,"(2t, 8h)"
10,0.604167,(8h)
12,0.270833,(9h)
31,0.270833,"(2t, 9h)"


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
30,(2t),(8h),1.0,0.604167,0.604167,0.604167,1.0,0.0,1.0
31,(8h),(2t),0.604167,1.0,0.604167,1.0,1.0,0.0,inf
35,(9h),(2t),0.270833,1.0,0.270833,1.0,1.0,0.0,inf
34,(2t),(9h),1.0,0.270833,0.270833,0.270833,1.0,0.0,1.0
37,(9p),(2t),0.215278,1.0,0.215278,1.0,1.0,0.0,inf


#### Apriori - divided by 20

In [194]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%


te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)
apr1_d20 = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(apr1_d20, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
1,1.0,(1t)
13,0.875,"(1t, 4h)"
5,0.875,(4h)
6,0.291667,(4p)
14,0.291667,"(1t, 4p)"


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
10,(1t),(4h),1.0,0.875,0.875,0.875,1.0,0.0,1.0
11,(4h),(1t),0.875,1.0,0.875,1.0,1.0,0.0,inf
12,(1t),(4p),1.0,0.291667,0.291667,0.291667,1.0,0.0,1.0
13,(4p),(1t),0.291667,1.0,0.291667,1.0,1.0,0.0,inf
4,(1t),(2p),1.0,0.270833,0.270833,0.270833,1.0,0.0,1.0


#### FP-growth - divided by 10

In [195]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)

fpg_d10 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d10, min_support=sup , use_colnames=True)
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

Unnamed: 0,support,itemsets
0,1.0,(2t)
1,0.604167,(8h)
14,0.604167,"(2t, 8h)"
5,0.270833,(9h)
30,0.270833,"(2t, 9h)"


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2t),(8h),1.0,0.604167,0.604167,0.604167,1.0,0.0,1.0
1,(8h),(2t),0.604167,1.0,0.604167,1.0,1.0,0.0,inf
57,(9h),(2t),0.270833,1.0,0.270833,1.0,1.0,0.0,inf
56,(2t),(9h),1.0,0.270833,0.270833,0.270833,1.0,0.0,1.0
21,(9p),(2t),0.215278,1.0,0.215278,1.0,1.0,0.0,inf


#### FP-growth - divided by 20

In [196]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)

fpg_d20 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d20, min_support=sup , use_colnames=True)
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()

Unnamed: 0,support,itemsets
0,1.0,(1t)
1,0.875,(4h)
8,0.875,"(1t, 4h)"
2,0.291667,(4p)
9,0.291667,"(1t, 4p)"


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(1t),(4h),1.0,0.875,0.875,0.875,1.0,0.0,1.0
1,(4h),(1t),0.875,1.0,0.875,1.0,1.0,0.0,inf
2,(1t),(4p),1.0,0.291667,0.291667,0.291667,1.0,0.0,1.0
3,(4p),(1t),0.291667,1.0,0.291667,1.0,1.0,0.0,inf
67,(2p),(1t),0.270833,1.0,0.270833,1.0,1.0,0.0,inf


### Task2 - transaction (PM2.5, Humidity , Time)
   - device_id = 74DA3895C538

In [197]:
## Select 
task2_df = dataset[(dataset['device_id'] == '74DA3895C538')]
task2_df.reset_index(inplace=True, drop=True)
task2_df = task2_df.loc[:,['PM2.5','Humidity','Time']]

In [198]:
task2_df.head()

Unnamed: 0,PM2.5,Humidity,Time
0,89.0,85.0,00:00:00
1,88.0,85.0,00:10:00
2,91.0,85.0,00:20:00
3,89.0,86.0,00:30:00
4,90.0,86.0,00:40:00


#### discretization - divided by 10

In [199]:
# discretization 
task2_d10 = task2_df.copy()
task2_d10[['PM2.5', 'Humidity']] = task2_d10[['PM2.5', 'Humidity']]/10
task2_d10[['PM2.5', 'Humidity']]= task2_d10[['PM2.5', 'Humidity']].astype(int)
task2_d10[['PM2.5', 'Humidity', 'Time']] = task2_d10[['PM2.5', 'Humidity', 'Time']].astype(str)

task2_d10['PM2.5'] = task2_d10['PM2.5']+'p'
task2_d10['Humidity'] = task2_d10['Humidity']+'h'
task2_d10['Time'] = task2_d10['Time']+'tm'


In [200]:
task2_d10.head()

Unnamed: 0,PM2.5,Humidity,Time
0,8p,8h,00:00:00tm
1,8p,8h,00:10:00tm
2,9p,8h,00:20:00tm
3,8p,8h,00:30:00tm
4,9p,8h,00:40:00tm


#### discretization - divided by 20

In [201]:
task2_d20 = task2_df.copy()
task2_d20[['PM2.5', 'Humidity']] = task2_d20[['PM2.5', 'Humidity']]/20
task2_d20[['PM2.5', 'Humidity']]= task2_d20[['PM2.5', 'Humidity']].astype(int)
task2_d20[['PM2.5', 'Humidity', 'Time']] = task2_d20[['PM2.5', 'Humidity', 'Time']].astype(str)

task2_d20['PM2.5'] = task2_d20['PM2.5']+'p'
task2_d20['Humidity'] = task2_d20['Humidity']+'h'
task2_d20['Time'] = task2_d20['Time']+'t'

In [202]:
task2_d20.head()

Unnamed: 0,PM2.5,Humidity,Time
0,4p,4h,00:00:00t
1,4p,4h,00:10:00t
2,4p,4h,00:20:00t
3,4p,4h,00:30:00t
4,4p,4h,00:40:00t


#### Turn dataframe to numpy array

In [203]:
x_d10 = task2_d10.iloc[:, 0:3].values # 
print(x_d10[0:5, :]) 

[['8p' '8h' '00:00:00tm']
 ['8p' '8h' '00:10:00tm']
 ['9p' '8h' '00:20:00tm']
 ['8p' '8h' '00:30:00tm']
 ['9p' '8h' '00:40:00tm']]


In [204]:
x_d20 = task2_d20.iloc[:, 0:3].values # 
print(x_d20[0:5, :]) 

[['4p' '4h' '00:00:00t']
 ['4p' '4h' '00:10:00t']
 ['4p' '4h' '00:20:00t']
 ['4p' '4h' '00:30:00t']
 ['4p' '4h' '00:40:00t']]


#### Apriori - divided by 10

In [229]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%


te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)
apr1_d10 = pd.DataFrame(te_ary , columns=te.columns_)


frequent_itemsets = apriori(apr1_d10, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
20,0.270833,(6P1)
11,0.229167,(3P1)
30,0.215278,(9P2)
76,0.208333,"(6P1, 9P2)"
23,0.208333,(7P1)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
91,(9P2),(6P1),0.215278,0.270833,0.208333,0.967742,3.573201,0.150029,22.604167
90,(6P1),(9P2),0.270833,0.215278,0.208333,0.769231,3.573201,0.150029,3.400463
58,(5P2),(3P1),0.180556,0.229167,0.166667,0.923077,4.027972,0.125289,10.020833
59,(3P1),(5P2),0.229167,0.180556,0.166667,0.727273,4.027972,0.125289,3.00463
12,(6P1),(11P10),0.270833,0.145833,0.145833,0.538462,3.692308,0.106337,1.850694


#### Apriori - divided by 20

In [206]:
# divided by 20
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)
apr1_d20 = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(apr1_d20, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
4,0.875,(4h)
5,0.291667,(4p)
1,0.270833,(2p)
14,0.263889,"(4h, 4p)"
6,0.243056,(5p)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
15,(4p),(4h),0.291667,0.875,0.263889,0.904762,1.034014,0.008681,1.3125
14,(4h),(4p),0.875,0.291667,0.263889,0.301587,1.034014,0.008681,1.014205
17,(5p),(4h),0.243056,0.875,0.222222,0.914286,1.044898,0.009549,1.458333
16,(4h),(5p),0.875,0.243056,0.222222,0.253968,1.044898,0.009549,1.014628
4,(4h),(2p),0.875,0.270833,0.222222,0.253968,0.937729,-0.014757,0.977394


#### FP-growth - divided by 10


In [226]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)

fpg_d10 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d10, min_support=sup , use_colnames=True)

print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()


frequent_itemsets:


Unnamed: 0,support,itemsets
0,0.270833,(6P1)
18,0.229167,(3P1)
3,0.215278,(9P2)
6,0.208333,(7P1)
37,0.208333,"(6P1, 9P2)"


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
21,(9P2),(6P1),0.215278,0.270833,0.208333,0.967742,3.573201,0.150029,22.604167
20,(6P1),(9P2),0.270833,0.215278,0.208333,0.769231,3.573201,0.150029,3.400463
147,(5P2),(3P1),0.180556,0.229167,0.166667,0.923077,4.027972,0.125289,10.020833
146,(3P1),(5P2),0.229167,0.180556,0.166667,0.727273,4.027972,0.125289,3.00463
23,(11P10),(6P1),0.145833,0.270833,0.145833,1.0,3.692308,0.106337,inf


#### FP-growth - divided by 20

In [227]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)

fpg_d20 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d20, min_support=sup , use_colnames=True)

print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()


frequent_itemsets:


Unnamed: 0,support,itemsets
0,0.479167,(3P1)
10,0.305556,(1P1)
1,0.291667,(4P2)
21,0.270833,"(6P10, 3P1)"
12,0.270833,(2P2)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(4P2),(3P1),0.291667,0.479167,0.270833,0.928571,1.937888,0.131076,7.291667
13,(3P1),(6P10),0.479167,0.270833,0.270833,0.565217,2.086957,0.141059,1.677083
1,(3P1),(4P2),0.479167,0.291667,0.270833,0.565217,1.937888,0.131076,1.629167
12,(6P10),(3P1),0.270833,0.479167,0.270833,1.0,2.086957,0.141059,inf
95,(1P1),(2P2),0.305556,0.270833,0.256944,0.840909,3.104895,0.17419,4.583333


### Task3  - transaction (PM2.5, PM10, PM1)
   - device_id = 74DA3895C538 

In [209]:
### Select 
task3_df = dataset[(dataset['device_id'] == '74DA3895C538')]
task3_df.reset_index(inplace=True, drop=True)
task3_df = task3_df.loc[:,['PM2.5','PM10','PM1']]

In [210]:
task3_df.head()

Unnamed: 0,PM2.5,PM10,PM1
0,89.0,106.0,63.0
1,88.0,106.0,62.0
2,91.0,112.0,64.0
3,89.0,109.0,63.0
4,90.0,108.0,64.0


#### discretization - divided by 10

In [211]:

task3_d10 = task3_df.copy()
task3_d10[['PM2.5', 'PM10', 'PM1']] = task3_d10[['PM2.5', 'PM10', 'PM1']]/10
task3_d10[['PM2.5', 'PM10', 'PM1']] = task3_d10[['PM2.5', 'PM10', 'PM1']].astype(int)
task3_d10[['PM2.5', 'PM10', 'PM1']] = task3_d10[['PM2.5', 'PM10', 'PM1']].astype(str)

task3_d10['PM2.5'] = task3_d10['PM2.5']+'P2'
task3_d10['PM10'] = task3_d10['PM10']+'P10'
task3_d10['PM1'] = task3_d10['PM1']+'P1'

In [212]:
task3_d10.head()

Unnamed: 0,PM2.5,PM10,PM1
0,8P2,10P10,6P1
1,8P2,10P10,6P1
2,9P2,11P10,6P1
3,8P2,10P10,6P1
4,9P2,10P10,6P1


In [213]:
task3_d20 = task3_df.copy()
task3_d20[['PM2.5', 'PM10', 'PM1']] = task3_d20[['PM2.5', 'PM10', 'PM1']]/20
task3_d20[['PM2.5', 'PM10', 'PM1']] = task3_d20[['PM2.5', 'PM10', 'PM1']].astype(int)
task3_d20[['PM2.5', 'PM10', 'PM1']] = task3_d20[['PM2.5', 'PM10', 'PM1']].astype(str)

task3_d20['PM2.5'] = task3_d20['PM2.5']+'P2'
task3_d20['PM10'] = task3_d20['PM10']+'P10'
task3_d20['PM1'] = task3_d20['PM1']+'P1'

In [214]:
task3_d20.head()

Unnamed: 0,PM2.5,PM10,PM1
0,4P2,5P10,3P1
1,4P2,5P10,3P1
2,4P2,5P10,3P1
3,4P2,5P10,3P1
4,4P2,5P10,3P1


#### Turn dataframe to numpy array

In [215]:
x_d10 = task3_d10.iloc[:, 0:3].values # 
print(x_d10[0:5, :]) 

[['8P2' '10P10' '6P1']
 ['8P2' '10P10' '6P1']
 ['9P2' '11P10' '6P1']
 ['8P2' '10P10' '6P1']
 ['9P2' '10P10' '6P1']]


In [216]:
x_d20 = task3_d20.iloc[:, 0:3].values # 
print(x_d20[0:5, :]) 

[['4P2' '5P10' '3P1']
 ['4P2' '5P10' '3P1']
 ['4P2' '5P10' '3P1']
 ['4P2' '5P10' '3P1']
 ['4P2' '5P10' '3P1']]


#### Apriori - divided by 10

In [225]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%


te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)
apr1_d10 = pd.DataFrame(te_ary , columns=te.columns_)

frequent_itemsets = apriori(apr1_d10, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
20,0.270833,(6P1)
11,0.229167,(3P1)
30,0.215278,(9P2)
76,0.208333,"(6P1, 9P2)"
23,0.208333,(7P1)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
91,(9P2),(6P1),0.215278,0.270833,0.208333,0.967742,3.573201,0.150029,22.604167
90,(6P1),(9P2),0.270833,0.215278,0.208333,0.769231,3.573201,0.150029,3.400463
58,(5P2),(3P1),0.180556,0.229167,0.166667,0.923077,4.027972,0.125289,10.020833
59,(3P1),(5P2),0.229167,0.180556,0.166667,0.727273,4.027972,0.125289,3.00463
12,(6P1),(11P10),0.270833,0.145833,0.145833,0.538462,3.692308,0.106337,1.850694


#### Apriori - divided by 20

In [218]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)
apr1_d20 = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(apr1_d20, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
7,0.479167,(3P1)
1,0.305556,(1P1)
12,0.291667,(4P2)
36,0.270833,"(6P10, 3P1)"
6,0.270833,(2P2)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
32,(4P2),(3P1),0.291667,0.479167,0.270833,0.928571,1.937888,0.131076,7.291667
33,(3P1),(4P2),0.479167,0.291667,0.270833,0.565217,1.937888,0.131076,1.629167
38,(6P10),(3P1),0.270833,0.479167,0.270833,1.0,2.086957,0.141059,inf
39,(3P1),(6P10),0.479167,0.270833,0.270833,0.565217,2.086957,0.141059,1.677083
8,(2P2),(1P1),0.270833,0.305556,0.256944,0.948718,3.104895,0.17419,13.541667


#### FP-growth - divided by 10

In [222]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)

fpg_d10 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d10, min_support=sup , use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
0,0.270833,(6P1)
18,0.229167,(3P1)
3,0.215278,(9P2)
6,0.208333,(7P1)
37,0.208333,"(6P1, 9P2)"


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
21,(9P2),(6P1),0.215278,0.270833,0.208333,0.967742,3.573201,0.150029,22.604167
20,(6P1),(9P2),0.270833,0.215278,0.208333,0.769231,3.573201,0.150029,3.400463
147,(5P2),(3P1),0.180556,0.229167,0.166667,0.923077,4.027972,0.125289,10.020833
146,(3P1),(5P2),0.229167,0.180556,0.166667,0.727273,4.027972,0.125289,3.00463
23,(11P10),(6P1),0.145833,0.270833,0.145833,1.0,3.692308,0.106337,inf


#### FP-growth - divided by 20

In [223]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)

fpg_d20 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d20, min_support=sup , use_colnames=True)

print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()


frequent_itemsets:


Unnamed: 0,support,itemsets
0,0.479167,(3P1)
10,0.305556,(1P1)
1,0.291667,(4P2)
21,0.270833,"(6P10, 3P1)"
12,0.270833,(2P2)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(4P2),(3P1),0.291667,0.479167,0.270833,0.928571,1.937888,0.131076,7.291667
13,(3P1),(6P10),0.479167,0.270833,0.270833,0.565217,2.086957,0.141059,1.677083
1,(3P1),(4P2),0.479167,0.291667,0.270833,0.565217,1.937888,0.131076,1.629167
12,(6P10),(3P1),0.270833,0.479167,0.270833,1.0,2.086957,0.141059,inf
95,(1P1),(2P2),0.305556,0.270833,0.256944,0.840909,3.104895,0.17419,4.583333
