### Package
- Tutorial:
    - [apriori](http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/apriori/)
    - [fp growth](http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/fpgrowth/)
    - [association_rules](http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Apriori
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import association_rules

# FP-growth
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
#load preprocessed data from Homework0
dataset = pd.read_csv('201703_Taiwan_preprocessed.csv', index_col=[0])
dataset.head()

  mask |= (ar1 == a)


Unnamed: 0,device_id,lat,lon,date_time,PM2.5,PM10,PM1,Temperature,Humidity,Time,Date
0,74DA388FF5F6,25.059,121.499,2017-03-01 08:00:00,41.0,48.0,28.0,20.0,89.0,08:00:00,2017-03-01
1,74DA388FF5F6,25.059,121.499,2017-03-01 08:10:00,44.5,52.5,31.5,20.06,88.5,08:10:00,2017-03-01
2,74DA388FF5F6,25.059,121.499,2017-03-01 08:20:00,48.0,57.0,35.0,20.12,88.0,08:20:00,2017-03-01
3,74DA388FF5F6,25.059,121.499,2017-03-01 08:30:00,47.5,57.5,34.5,19.995,88.0,08:30:00,2017-03-01
4,74DA388FF5F6,25.059,121.499,2017-03-01 08:40:00,47.0,58.0,34.0,19.87,88.0,08:40:00,2017-03-01


### Task1 - transaction (PM2.5, humidity, temperature)
-  device_id=74DA3895C538

In [3]:
## Select 
task1_df = dataset[(dataset['device_id'] == '74DA3895C538')]
task1_df.reset_index(inplace=True, drop=True)
task1_df = task1_df[['PM2.5','Humidity','Temperature']]
task1_df.head()

Unnamed: 0,PM2.5,Humidity,Temperature
0,44.0,78.0,19.62
1,42.5,77.5,19.995
2,41.0,77.0,20.37
3,41.0,76.333333,20.58
4,41.0,75.666667,20.79


#### discretization - divided by 10

In [4]:
task1_d10 = task1_df.copy()
task1_d10[['PM2.5', 'Humidity', 'Temperature']] = task1_d10[['PM2.5', 'Humidity', 'Temperature']]/10
task1_d10[['PM2.5', 'Humidity', 'Temperature']] = task1_d10[['PM2.5', 'Humidity', 'Temperature']].astype(int)
task1_d10[['PM2.5', 'Humidity', 'Temperature']] = task1_d10[['PM2.5', 'Humidity', 'Temperature']].astype(str)

task1_d10['PM2.5'] = task1_d10['PM2.5']+'p'
task1_d10['Humidity'] = task1_d10['Humidity']+'h'
task1_d10['Temperature'] = task1_d10['Temperature']+'t'

In [5]:
task1_d10.head()

Unnamed: 0,PM2.5,Humidity,Temperature
0,4p,7h,1t
1,4p,7h,1t
2,4p,7h,2t
3,4p,7h,2t
4,4p,7h,2t


#### discretization - divided by 20

In [6]:
task1_d20 = task1_df.copy()
task1_d20[['PM2.5', 'Humidity', 'Temperature']] = task1_d20[['PM2.5', 'Humidity', 'Temperature']]/20
task1_d20[['PM2.5', 'Humidity', 'Temperature']] = task1_d20[['PM2.5', 'Humidity', 'Temperature']].astype(int)
task1_d20[['PM2.5', 'Humidity', 'Temperature']] = task1_d20[['PM2.5', 'Humidity', 'Temperature']].astype(str)

task1_d20['PM2.5'] = task1_d20['PM2.5']+'p'
task1_d20['Humidity'] = task1_d20['Humidity']+'h'
task1_d20['Temperature'] = task1_d20['Temperature']+'t'

In [7]:
task1_d20.head()

Unnamed: 0,PM2.5,Humidity,Temperature
0,2p,3h,0t
1,2p,3h,0t
2,2p,3h,1t
3,2p,3h,1t
4,2p,3h,1t


#### Turn dataframe to numpy array

In [8]:
x_d10 = task1_d10.iloc[:, 0:3].values # 
print(x_d10[0:5, :]) 

[['4p' '7h' '1t']
 ['4p' '7h' '1t']
 ['4p' '7h' '2t']
 ['4p' '7h' '2t']
 ['4p' '7h' '2t']]


In [9]:
x_d20 = task1_d20.iloc[:, 0:3].values # to dataset
print(x_d20[0:5, :]) 

[['2p' '3h' '0t']
 ['2p' '3h' '0t']
 ['2p' '3h' '1t']
 ['2p' '3h' '1t']
 ['2p' '3h' '1t']]


#### Apriori - divided by 10

In [10]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)
apr1_d10 = pd.DataFrame(te_ary , columns=te.columns_)

frequent_itemsets = apriori(apr1_d10, min_support=sup, use_colnames=True)
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

Unnamed: 0,support,itemsets
5,0.749094,(2t)
12,0.379529,(7h)
14,0.337183,(8h)
41,0.300272,"(2t, 7h)"
7,0.289402,(4p)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
46,(2t),(7h),0.749094,0.379529,0.300272,0.400846,1.056168,0.015969,1.035579
47,(7h),(2t),0.379529,0.749094,0.300272,0.791169,1.056168,0.015969,1.20148
51,(8h),(2t),0.337183,0.749094,0.223279,0.662189,0.883987,-0.029303,0.742741
50,(2t),(8h),0.749094,0.337183,0.223279,0.298065,0.883987,-0.029303,0.944272
36,(2t),(4p),0.749094,0.289402,0.211277,0.282044,0.974573,-0.005512,0.989751


#### Apriori - divided by 20

In [11]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%


te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)
apr1_d20 = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(apr1_d20, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
3,0.752264,(1t)
6,0.517437,(3h)
5,0.492074,(2p)
8,0.442708,(4h)
25,0.406476,"(1t, 3h)"


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
28,(1t),(3h),0.752264,0.517437,0.406476,0.540337,1.044258,0.017227,1.04982
29,(3h),(1t),0.517437,0.752264,0.406476,0.785558,1.044258,0.017227,1.155256
26,(1t),(2p),0.752264,0.492074,0.380888,0.506321,1.028953,0.010718,1.028859
27,(2p),(1t),0.492074,0.752264,0.380888,0.774045,1.028953,0.010718,1.096394
33,(1t),(4h),0.752264,0.442708,0.312953,0.416014,0.939703,-0.020081,0.95429


#### FP-growth - divided by 10

In [12]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)

fpg_d10 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d10, min_support=sup , use_colnames=True)
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

Unnamed: 0,support,itemsets
3,0.749094,(2t)
0,0.379529,(7h)
9,0.337183,(8h)
18,0.300272,"(2t, 7h)"
1,0.289402,(4p)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2t),(7h),0.749094,0.379529,0.300272,0.400846,1.056168,0.015969,1.035579
1,(7h),(2t),0.379529,0.749094,0.300272,0.791169,1.056168,0.015969,1.20148
160,(2t),(8h),0.749094,0.337183,0.223279,0.298065,0.883987,-0.029303,0.944272
161,(8h),(2t),0.337183,0.749094,0.223279,0.662189,0.883987,-0.029303,0.742741
4,(2t),(4p),0.749094,0.289402,0.211277,0.282044,0.974573,-0.005512,0.989751


#### FP-growth - divided by 20

In [13]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)

fpg_d20 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d20, min_support=sup , use_colnames=True)
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()

Unnamed: 0,support,itemsets
3,0.752264,(1t)
0,0.517437,(3h)
1,0.492074,(2p)
7,0.442708,(4h)
11,0.406476,"(1t, 3h)"


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(1t),(3h),0.752264,0.517437,0.406476,0.540337,1.044258,0.017227,1.04982
1,(3h),(1t),0.517437,0.752264,0.406476,0.785558,1.044258,0.017227,1.155256
4,(1t),(2p),0.752264,0.492074,0.380888,0.506321,1.028953,0.010718,1.028859
5,(2p),(1t),0.492074,0.752264,0.380888,0.774045,1.028953,0.010718,1.096394
93,(1t),(4h),0.752264,0.442708,0.312953,0.416014,0.939703,-0.020081,0.95429


### Task2 - transaction (PM2.5, Humidity , Time)
   - device_id = 74DA3895C538

In [14]:
## Select 
task2_df = dataset[(dataset['device_id'] == '74DA3895C538')]
task2_df.reset_index(inplace=True, drop=True)
task2_df = task2_df.loc[:,['PM2.5','Humidity','Time']]

In [15]:
task2_df.head()

Unnamed: 0,PM2.5,Humidity,Time
0,44.0,78.0,08:00:00
1,42.5,77.5,08:10:00
2,41.0,77.0,08:20:00
3,41.0,76.333333,08:30:00
4,41.0,75.666667,08:40:00


#### discretization - divided by 10

In [16]:
# discretization 
task2_d10 = task2_df.copy()
task2_d10[['PM2.5', 'Humidity']] = task2_d10[['PM2.5', 'Humidity']]/10
task2_d10[['PM2.5', 'Humidity']]= task2_d10[['PM2.5', 'Humidity']].astype(int)
task2_d10[['PM2.5', 'Humidity', 'Time']] = task2_d10[['PM2.5', 'Humidity', 'Time']].astype(str)

task2_d10['PM2.5'] = task2_d10['PM2.5']+'p'
task2_d10['Humidity'] = task2_d10['Humidity']+'h'
task2_d10['Time'] = task2_d10['Time']+'tm'


In [17]:
task2_d10.head()

Unnamed: 0,PM2.5,Humidity,Time
0,4p,7h,08:00:00tm
1,4p,7h,08:10:00tm
2,4p,7h,08:20:00tm
3,4p,7h,08:30:00tm
4,4p,7h,08:40:00tm


#### discretization - divided by 20

In [18]:
task2_d20 = task2_df.copy()
task2_d20[['PM2.5', 'Humidity']] = task2_d20[['PM2.5', 'Humidity']]/20
task2_d20[['PM2.5', 'Humidity']]= task2_d20[['PM2.5', 'Humidity']].astype(int)
task2_d20[['PM2.5', 'Humidity', 'Time']] = task2_d20[['PM2.5', 'Humidity', 'Time']].astype(str)

task2_d20['PM2.5'] = task2_d20['PM2.5']+'p'
task2_d20['Humidity'] = task2_d20['Humidity']+'h'
task2_d20['Time'] = task2_d20['Time']+'t'

In [19]:
task2_d20.head()

Unnamed: 0,PM2.5,Humidity,Time
0,2p,3h,08:00:00t
1,2p,3h,08:10:00t
2,2p,3h,08:20:00t
3,2p,3h,08:30:00t
4,2p,3h,08:40:00t


#### Turn dataframe to numpy array

In [20]:
x_d10 = task2_d10.iloc[:, 0:3].values # 
print(x_d10[0:5, :]) 

[['4p' '7h' '08:00:00tm']
 ['4p' '7h' '08:10:00tm']
 ['4p' '7h' '08:20:00tm']
 ['4p' '7h' '08:30:00tm']
 ['4p' '7h' '08:40:00tm']]


In [21]:
x_d20 = task2_d20.iloc[:, 0:3].values # 
print(x_d20[0:5, :]) 

[['2p' '3h' '08:00:00t']
 ['2p' '3h' '08:10:00t']
 ['2p' '3h' '08:20:00t']
 ['2p' '3h' '08:30:00t']
 ['2p' '3h' '08:40:00t']]


#### Apriori - divided by 10

In [22]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%


te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)
apr1_d10 = pd.DataFrame(te_ary , columns=te.columns_)


frequent_itemsets = apriori(apr1_d10, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
10,0.379529,(7h)
12,0.337183,(8h)
5,0.289402,(4p)
7,0.202672,(5p)
4,0.149683,(3p)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
23,(7h),(4p),0.379529,0.289402,0.105525,0.278043,0.960749,-0.004311,0.984266
22,(4p),(7h),0.289402,0.379529,0.105525,0.364632,0.960749,-0.004311,0.976554
24,(4p),(8h),0.289402,0.337183,0.091486,0.316119,0.937529,-0.006096,0.969199
25,(8h),(4p),0.337183,0.289402,0.091486,0.271323,0.937529,-0.006096,0.975189
33,(7h),(5p),0.379529,0.202672,0.07654,0.201671,0.995059,-0.00038,0.998746


#### Apriori - divided by 20

In [23]:
# divided by 20
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)
apr1_d20 = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(apr1_d20, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
4,0.517437,(3h)
3,0.492074,(2p)
6,0.442708,(4h)
15,0.261322,"(2p, 3h)"
1,0.213542,(1p)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
13,(3h),(2p),0.517437,0.492074,0.261322,0.505033,1.026335,0.006705,1.026181
12,(2p),(3h),0.492074,0.517437,0.261322,0.531063,1.026335,0.006705,1.029058
14,(4h),(2p),0.442708,0.492074,0.209239,0.472634,0.960494,-0.008606,0.963138
15,(2p),(4h),0.492074,0.442708,0.209239,0.425219,0.960494,-0.008606,0.969571
6,(3h),(1p),0.517437,0.213542,0.118433,0.228884,1.071847,0.007939,1.019896


#### FP-growth - divided by 10


In [24]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)

fpg_d10 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d10, min_support=sup , use_colnames=True)

print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()


frequent_itemsets:


Unnamed: 0,support,itemsets
0,0.379529,(7h)
7,0.337183,(8h)
1,0.289402,(4p)
4,0.202672,(5p)
3,0.149683,(3p)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(4p),(7h),0.289402,0.379529,0.105525,0.364632,0.960749,-0.004311,0.976554
1,(7h),(4p),0.379529,0.289402,0.105525,0.278043,0.960749,-0.004311,0.984266
2,(4p),(8h),0.289402,0.337183,0.091486,0.316119,0.937529,-0.006096,0.969199
3,(8h),(4p),0.337183,0.289402,0.091486,0.271323,0.937529,-0.006096,0.975189
14,(5p),(7h),0.202672,0.379529,0.07654,0.377654,0.995059,-0.00038,0.996987


#### FP-growth - divided by 20

In [25]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)

fpg_d20 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d20, min_support=sup , use_colnames=True)

print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()


frequent_itemsets:


Unnamed: 0,support,itemsets
0,0.517437,(3h)
1,0.492074,(2p)
5,0.442708,(4h)
9,0.261322,"(2p, 3h)"
2,0.213542,(1p)


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2p),(3h),0.492074,0.517437,0.261322,0.531063,1.026335,0.006705,1.029058
1,(3h),(2p),0.517437,0.492074,0.261322,0.505033,1.026335,0.006705,1.026181
15,(2p),(4h),0.492074,0.442708,0.209239,0.425219,0.960494,-0.008606,0.969571
14,(4h),(2p),0.442708,0.492074,0.209239,0.472634,0.960494,-0.008606,0.963138
2,(3h),(1p),0.517437,0.213542,0.118433,0.228884,1.071847,0.007939,1.019896


### Task3  - transaction (PM2.5, PM10, PM1)
   - device_id = 74DA3895C538 

In [26]:
### Select 
task3_df = dataset[(dataset['device_id'] == '74DA3895C538')]
task3_df.reset_index(inplace=True, drop=True)
task3_df = task3_df.loc[:,['PM2.5','PM10','PM1']]

In [27]:
task3_df.head()

Unnamed: 0,PM2.5,PM10,PM1
0,44.0,58.0,30.0
1,42.5,56.5,29.0
2,41.0,55.0,28.0
3,41.0,54.666667,28.0
4,41.0,54.333333,28.0


#### discretization - divided by 10

In [28]:

task3_d10 = task3_df.copy()
task3_d10[['PM2.5', 'PM10', 'PM1']] = task3_d10[['PM2.5', 'PM10', 'PM1']]/10
task3_d10[['PM2.5', 'PM10', 'PM1']] = task3_d10[['PM2.5', 'PM10', 'PM1']].astype(int)
task3_d10[['PM2.5', 'PM10', 'PM1']] = task3_d10[['PM2.5', 'PM10', 'PM1']].astype(str)

task3_d10['PM2.5'] = task3_d10['PM2.5']+'P2'
task3_d10['PM10'] = task3_d10['PM10']+'P10'
task3_d10['PM1'] = task3_d10['PM1']+'P1'

In [29]:
task3_d10.head()

Unnamed: 0,PM2.5,PM10,PM1
0,4P2,5P10,3P1
1,4P2,5P10,2P1
2,4P2,5P10,2P1
3,4P2,5P10,2P1
4,4P2,5P10,2P1


In [30]:
task3_d20 = task3_df.copy()
task3_d20[['PM2.5', 'PM10', 'PM1']] = task3_d20[['PM2.5', 'PM10', 'PM1']]/20
task3_d20[['PM2.5', 'PM10', 'PM1']] = task3_d20[['PM2.5', 'PM10', 'PM1']].astype(int)
task3_d20[['PM2.5', 'PM10', 'PM1']] = task3_d20[['PM2.5', 'PM10', 'PM1']].astype(str)

task3_d20['PM2.5'] = task3_d20['PM2.5']+'P2'
task3_d20['PM10'] = task3_d20['PM10']+'P10'
task3_d20['PM1'] = task3_d20['PM1']+'P1'

In [31]:
task3_d20.head()

Unnamed: 0,PM2.5,PM10,PM1
0,2P2,2P10,1P1
1,2P2,2P10,1P1
2,2P2,2P10,1P1
3,2P2,2P10,1P1
4,2P2,2P10,1P1


#### Turn dataframe to numpy array

In [32]:
x_d10 = task3_d10.iloc[:, 0:3].values # 
print(x_d10[0:5, :]) 

[['4P2' '5P10' '3P1']
 ['4P2' '5P10' '2P1']
 ['4P2' '5P10' '2P1']
 ['4P2' '5P10' '2P1']
 ['4P2' '5P10' '2P1']]


In [33]:
x_d20 = task3_d20.iloc[:, 0:3].values # 
print(x_d20[0:5, :]) 

[['2P2' '2P10' '1P1']
 ['2P2' '2P10' '1P1']
 ['2P2' '2P10' '1P1']
 ['2P2' '2P10' '1P1']
 ['2P2' '2P10' '1P1']]


#### Apriori - divided by 10

In [34]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%


te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)
apr1_d10 = pd.DataFrame(te_ary , columns=te.columns_)

frequent_itemsets = apriori(apr1_d10, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
13,0.359149,(3P1)
18,0.289402,(4P2)
10,0.271739,(2P1)
23,0.210371,(6P10)
61,0.209692,"(6P10, 3P1)"


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
59,(3P1),(6P10),0.359149,0.210371,0.209692,0.583859,2.775372,0.134137,1.897502
58,(6P10),(3P1),0.210371,0.359149,0.209692,0.996771,2.775372,0.134137,198.450332
53,(4P2),(3P1),0.289402,0.359149,0.187953,0.649452,1.808311,0.084015,1.828143
52,(3P1),(4P2),0.359149,0.289402,0.187953,0.523329,1.808311,0.084015,1.490752
56,(5P2),(3P1),0.202672,0.359149,0.17029,0.840223,2.339487,0.0975,4.010923


#### Apriori - divided by 20

In [35]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)
apr1_d20 = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(apr1_d20, min_support=sup, use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
3,0.630888,(1P1)
8,0.492074,(2P2)
26,0.459692,"(1P1, 2P2)"
10,0.393795,(3P10)
37,0.352355,"(3P10, 2P2)"


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
19,(2P2),(1P1),0.492074,0.630888,0.459692,0.934192,1.480759,0.149248,5.608959
18,(1P1),(2P2),0.630888,0.492074,0.459692,0.728643,1.480759,0.149248,1.8718
41,(2P2),(3P10),0.492074,0.393795,0.352355,0.716061,1.818358,0.158579,2.13498
40,(3P10),(2P2),0.393795,0.492074,0.352355,0.894767,1.818358,0.158579,4.826682
21,(3P10),(1P1),0.393795,0.630888,0.320426,0.813686,1.289748,0.071985,1.981131


#### FP-growth - divided by 10

In [36]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d10).transform(x_d10)

fpg_d10 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d10, min_support=sup , use_colnames=True)
print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_10 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_10.head()

frequent_itemsets:


Unnamed: 0,support,itemsets
0,0.359149,(3P1)
1,0.289402,(4P2)
3,0.271739,(2P1)
4,0.210371,(6P10)
39,0.209692,"(6P10, 3P1)"


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
23,(3P1),(6P10),0.359149,0.210371,0.209692,0.583859,2.775372,0.134137,1.897502
22,(6P10),(3P1),0.210371,0.359149,0.209692,0.996771,2.775372,0.134137,198.450332
0,(3P1),(4P2),0.359149,0.289402,0.187953,0.523329,1.808311,0.084015,1.490752
1,(4P2),(3P1),0.289402,0.359149,0.187953,0.649452,1.808311,0.084015,1.828143
52,(5P2),(3P1),0.202672,0.359149,0.17029,0.840223,2.339487,0.0975,4.010923


#### FP-growth - divided by 20

In [37]:
sup = 0.01 # support: 1%
conf = 0.01 # confidence: 1%

te = TransactionEncoder()
te_ary = te.fit(x_d20).transform(x_d20)

fpg_d20 = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(fpg_d20, min_support=sup , use_colnames=True)

print('frequent_itemsets:')
frequent_itemsets.sort_values(by ='support' , ascending=False).head()

print('association_rules:')
rules_20 = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf).sort_values(['support'],ascending=False)
rules_20.head()


frequent_itemsets:


Unnamed: 0,support,itemsets
0,0.630888,(1P1)
1,0.492074,(2P2)
17,0.459692,"(1P1, 2P2)"
3,0.393795,(3P10)
21,0.352355,"(3P10, 2P2)"


association_rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(1P1),(2P2),0.630888,0.492074,0.459692,0.728643,1.480759,0.149248,1.8718
1,(2P2),(1P1),0.492074,0.630888,0.459692,0.934192,1.480759,0.149248,5.608959
12,(3P10),(2P2),0.393795,0.492074,0.352355,0.894767,1.818358,0.158579,4.826682
13,(2P2),(3P10),0.492074,0.393795,0.352355,0.716061,1.818358,0.158579,2.13498
15,(3P10),(1P1),0.393795,0.630888,0.320426,0.813686,1.289748,0.071985,1.981131
