In [None]:
from tpot import TPOTClassifier
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [None]:
# importing data
train_df = pd.read_csv("train.csv")

In [None]:
train_df.head()

Unnamed: 0,id,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P
0,1,b,18.42,10.415,y,p,aa,v,0.125,t,f,0,f,g,120.0,375,1
1,2,a,21.75,11.75,u,g,c,v,0.25,f,f,0,t,g,180.0,0,1
2,3,b,30.17,1.085,y,p,c,v,0.04,f,f,0,f,g,170.0,179,1
3,4,b,22.67,2.54,y,p,c,h,2.585,t,f,0,f,g,0.0,0,0
4,5,a,36.0,1.0,u,g,c,v,2.0,t,t,11,f,g,0.0,456,0


In [None]:
train_df = train_df.drop('id',axis=1)

In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 552 entries, 0 to 551
Data columns (total 16 columns):
A    544 non-null object
B    543 non-null float64
C    552 non-null float64
D    547 non-null object
E    547 non-null object
F    545 non-null object
G    545 non-null object
H    552 non-null float64
I    552 non-null object
J    552 non-null object
K    552 non-null int64
L    552 non-null object
M    552 non-null object
N    541 non-null float64
O    552 non-null int64
P    552 non-null int64
dtypes: float64(4), int64(3), object(9)
memory usage: 69.1+ KB


In [None]:
cat_cols = ['A', 'D', 'E', 'F', 'G', 'I', 'J', 'L', 'M']

In [None]:
# encoding categorical column values

for header in cat_cols:
    train_df[header] = train_df[header].astype("category").cat.codes
   

In [None]:
# creating object for normalising the continuous feature values

min_max_scaler = MinMaxScaler()
scaler_test = MinMaxScaler()

In [None]:
ls = [x for x in cat_cols]                                  # creating a copy of list of names of categorical columns       

In [None]:
ls.append('P')

In [None]:
temp = train_df.drop(ls ,axis=1)                            # dataframe with continuous features only 

In [None]:
temp.isnull().sum()

B     9
C     0
H     0
K     0
N    11
O     0
dtype: int64

In [None]:
temp.describe()

Unnamed: 0,B,C,H,K,N,O
count,543.0,552.0,552.0,552.0,541.0,552.0
mean,31.364899,4.780136,2.259176,2.318841,175.162662,1087.576087
std,11.873665,4.938454,3.442564,4.297867,154.873746,5758.841945
min,13.75,0.0,0.0,0.0,0.0,0.0
25%,22.5,1.0,0.165,0.0,60.0,0.0
50%,28.17,2.75,1.0,0.0,152.0,3.5
75%,37.665,7.56125,2.75,3.0,268.0,364.75
max,80.25,28.0,28.5,40.0,1160.0,100000.0


In [None]:
# imputing NaN with mean of the feature value

temp_to_normalise = temp.fillna(temp.mean())   

In [None]:
temp.columns

Index(['B', 'C', 'H', 'K', 'N', 'O'], dtype='object')

In [None]:
# Normalising the continuous features in the range (0,1)

scaled_inp_features = min_max_scaler.fit_transform(temp_to_normalise)
scaled_inp_features_df = pd.DataFrame(scaled_inp_features)
scaled_inp_features_df.columns = temp.columns

In [None]:
scaled_inp_features_df.shape

(552, 6)

In [None]:
scaled_inp_features_df.head(2)  

Unnamed: 0,B,C,H,K,N,O
0,0.070226,0.371964,0.004386,0.0,0.103448,0.00375
1,0.120301,0.419643,0.008772,0.0,0.155172,0.0


In [None]:
scaled_inp_features_df.index

RangeIndex(start=0, stop=552, step=1)

In [None]:
categorical_col_df = train_df[cat_cols].copy(deep=True)                      # dataframe with categorical features only

In [None]:
categorical_col_df.head()

Unnamed: 0,A,D,E,F,G,I,J,L,M
0,1,2,2,0,7,1,0,0,0
1,0,1,0,1,7,0,0,1,0
2,1,2,2,1,7,0,0,0,0
3,1,2,2,1,3,1,0,0,0
4,0,1,0,1,7,1,1,0,0


In [None]:
categorical_col_df.nunique()

A     3
D     4
E     4
F    15
G    10
I     2
J     2
L     2
M     3
dtype: int64

In [None]:
final_columns = temp.columns.values.tolist() + cat_cols

In [None]:
type(temp.columns)

pandas.core.indexes.base.Index

In [None]:
categorical_col_df.index

RangeIndex(start=0, stop=552, step=1)

In [None]:
# training set
final_df = pd.concat([scaled_inp_features_df,categorical_col_df],axis=1)
final_df.columns = final_columns


In [None]:
final_df.shape

(552, 15)

In [None]:
train_df['P'].shape

(552,)

In [None]:
target_series = train_df['P']
target_series.reset_index(inplace=True, drop=True)

In [None]:
final_df.head()

Unnamed: 0,B,C,H,K,N,O,A,D,E,F,G,I,J,L,M
0,0.070226,0.371964,0.004386,0.0,0.103448,0.00375,1,2,2,0,7,1,0,0,0
1,0.120301,0.419643,0.008772,0.0,0.155172,0.0,0,1,0,1,7,0,0,1,0
2,0.246917,0.03875,0.001404,0.0,0.146552,0.00179,1,2,2,1,7,0,0,0,0
3,0.134135,0.090714,0.090702,0.0,0.0,0.0,1,2,2,1,3,1,0,0,0
4,0.334586,0.035714,0.070175,0.275,0.0,0.00456,0,1,0,1,7,1,1,0,0


In [None]:
# concatenating target series with feature set
final_df1= pd.concat([final_df,target_series],axis=1,ignore_index=True)

# adding target column name to column names list
final_df1.columns = final_columns + ['P']          

In [None]:
final_df1.head()

Unnamed: 0,B,C,H,K,N,O,A,D,E,F,G,I,J,L,M,P
0,0.070226,0.371964,0.004386,0.0,0.103448,0.00375,1,2,2,0,7,1,0,0,0,1
1,0.120301,0.419643,0.008772,0.0,0.155172,0.0,0,1,0,1,7,0,0,1,0,1
2,0.246917,0.03875,0.001404,0.0,0.146552,0.00179,1,2,2,1,7,0,0,0,0,1
3,0.134135,0.090714,0.090702,0.0,0.0,0.0,1,2,2,1,3,1,0,0,0,0
4,0.334586,0.035714,0.070175,0.275,0.0,0.00456,0,1,0,1,7,1,1,0,0,0


In [None]:
final_df1.shape

(552, 16)

In [None]:
final_df1.columns.values

array(['B', 'C', 'H', 'K', 'N', 'O', 'A', 'D', 'E', 'F', 'G', 'I', 'J',
       'L', 'M', 'P'], dtype=object)

In [None]:
final_df1.head()

Unnamed: 0,B,C,H,K,N,O,A,D,E,F,G,I,J,L,M,P
0,0.070226,0.371964,0.004386,0.0,0.103448,0.00375,1,2,2,0,7,1,0,0,0,1
1,0.120301,0.419643,0.008772,0.0,0.155172,0.0,0,1,0,1,7,0,0,1,0,1
2,0.246917,0.03875,0.001404,0.0,0.146552,0.00179,1,2,2,1,7,0,0,0,0,1
3,0.134135,0.090714,0.090702,0.0,0.0,0.0,1,2,2,1,3,1,0,0,0,0
4,0.334586,0.035714,0.070175,0.275,0.0,0.00456,0,1,0,1,7,1,1,0,0,0


In [None]:
# indices of categorical columns

cat_dims = [final_df1.columns.get_loc(i) for i in cat_cols]   


In [None]:
cat_dims

[6, 7, 8, 9, 10, 11, 12, 13, 14]

In [None]:
encoded_set = pd.get_dummies(final_df1, columns=cat_cols )

In [None]:
encoded_set.shape

(552, 52)

In [None]:
split_limit_index = int(0.8*train_df.shape[0])

In [None]:
sample_train = encoded_set[:split_limit_index].drop('P',axis=1)
sample_val = encoded_set[split_limit_index:].drop('P',axis=1)

In [None]:
sample_train.head()

Unnamed: 0,B,C,H,K,N,O,A_-1,A_0,A_1,D_-1,...,G_8,I_0,I_1,J_0,J_1,L_0,L_1,M_0,M_1,M_2
0,0.070226,0.371964,0.004386,0.0,0.103448,0.00375,0,0,1,0,...,0,0,1,1,0,1,0,1,0,0
1,0.120301,0.419643,0.008772,0.0,0.155172,0.0,0,1,0,0,...,0,1,0,1,0,0,1,1,0,0
2,0.246917,0.03875,0.001404,0.0,0.146552,0.00179,0,0,1,0,...,0,1,0,1,0,1,0,1,0,0
3,0.134135,0.090714,0.090702,0.0,0.0,0.0,0,0,1,0,...,0,0,1,1,0,1,0,1,0,0
4,0.334586,0.035714,0.070175,0.275,0.0,0.00456,0,1,0,0,...,0,0,1,0,1,1,0,1,0,0


In [None]:
sample_train.shape

(441, 51)

In [None]:
train_label = final_df1['P'][:split_limit_index]
val_label = final_df1['P'][split_limit_index:]

In [None]:
sample_train.head()

Unnamed: 0,B,C,H,K,N,O,A_-1,A_0,A_1,D_-1,...,G_8,I_0,I_1,J_0,J_1,L_0,L_1,M_0,M_1,M_2
0,0.070226,0.371964,0.004386,0.0,0.103448,0.00375,0,0,1,0,...,0,0,1,1,0,1,0,1,0,0
1,0.120301,0.419643,0.008772,0.0,0.155172,0.0,0,1,0,0,...,0,1,0,1,0,0,1,1,0,0
2,0.246917,0.03875,0.001404,0.0,0.146552,0.00179,0,0,1,0,...,0,1,0,1,0,1,0,1,0,0
3,0.134135,0.090714,0.090702,0.0,0.0,0.0,0,0,1,0,...,0,0,1,1,0,1,0,1,0,0
4,0.334586,0.035714,0.070175,0.275,0.0,0.00456,0,1,0,0,...,0,0,1,0,1,1,0,1,0,0


In [None]:
sample_val.head()

Unnamed: 0,B,C,H,K,N,O,A_-1,A_0,A_1,D_-1,...,G_8,I_0,I_1,J_0,J_1,L_0,L_1,M_0,M_1,M_2
441,0.253083,0.380893,0.002982,0.3,0.111207,3e-05,0,1,0,0,...,0,1,0,0,1,0,1,1,0,0
442,0.101504,0.08625,0.070175,0.275,0.172414,0.03,0,0,1,0,...,0,0,1,0,1,0,1,1,0,0
443,0.130376,0.401786,0.026316,0.1,0.0,0.00321,0,1,0,0,...,0,0,1,0,1,1,0,1,0,0
444,0.20797,0.116071,0.178421,0.05,0.318103,1e-05,0,0,1,0,...,0,1,0,0,1,0,1,1,0,0
445,0.047669,0.017857,0.005789,0.15,0.206897,0.00035,0,1,0,0,...,0,1,0,0,1,0,1,1,0,0


In [None]:
train_label.shape

(441,)

In [None]:
val_label.shape

(111,)

In [None]:
train_set = sample_train.drop('P',axis=1)
val_set = sample_val.drop('P',axis=1)

In [None]:
train_label = sample_train['P']
val_label = sample_val['P']

Applying TPOT classifier

In [None]:
# ,scoring='neg_log_loss' ,random_state = 2,periodic_checkpoint_folder:    (to be added below)

In [None]:
tpot1 = TPOTClassifier(generations=100, population_size=100, verbosity=2)

In [None]:
%time tpot1.fit(sample_train.values, train_label)

Optimization Progress:   2%|▏         | 200/10100 [01:44<1:44:25,  1.58pipeline/s]

Generation 1 - Current best internal CV score: 0.8844117267209093


Optimization Progress:   3%|▎         | 300/10100 [03:05<1:59:06,  1.37pipeline/s]

Generation 2 - Current best internal CV score: 0.8889061087433812


Optimization Progress:   4%|▍         | 400/10100 [04:36<1:22:38,  1.96pipeline/s]

Generation 3 - Current best internal CV score: 0.8889061087433812


Optimization Progress:   5%|▍         | 500/10100 [05:50<2:05:42,  1.27pipeline/s]

Generation 4 - Current best internal CV score: 0.8889061087433812


Optimization Progress:   6%|▌         | 600/10100 [06:50<1:16:40,  2.07pipeline/s]

Generation 5 - Current best internal CV score: 0.8889061087433812


Optimization Progress:   7%|▋         | 700/10100 [08:11<1:03:16,  2.48pipeline/s]

Generation 6 - Current best internal CV score: 0.8889061087433812


Optimization Progress:   8%|▊         | 800/10100 [09:31<52:48,  2.94pipeline/s]  

Generation 7 - Current best internal CV score: 0.8889061087433812


Optimization Progress:   9%|▉         | 900/10100 [10:59<3:04:09,  1.20s/pipeline]

Generation 8 - Current best internal CV score: 0.8932971716388997


Optimization Progress:  10%|▉         | 1000/10100 [12:12<1:34:57,  1.60pipeline/s]

Generation 9 - Current best internal CV score: 0.8932971716388997


Optimization Progress:  11%|█         | 1100/10100 [13:17<52:04,  2.88pipeline/s]  

Generation 10 - Current best internal CV score: 0.8932971716388997


Optimization Progress:  12%|█▏        | 1200/10100 [14:37<1:48:56,  1.36pipeline/s]

Generation 11 - Current best internal CV score: 0.8935038098928064


Optimization Progress:  13%|█▎        | 1300/10100 [16:09<1:13:21,  2.00pipeline/s]

Generation 12 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  14%|█▍        | 1400/10100 [17:28<1:46:44,  1.36pipeline/s]

Generation 13 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  15%|█▍        | 1500/10100 [19:20<1:20:13,  1.79pipeline/s] 

Generation 14 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  16%|█▌        | 1600/10100 [20:45<1:27:52,  1.61pipeline/s]

Generation 15 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  17%|█▋        | 1700/10100 [22:32<3:29:45,  1.50s/pipeline]

Generation 16 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  18%|█▊        | 1800/10100 [24:15<1:25:30,  1.62pipeline/s] 

Generation 17 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  19%|█▉        | 1900/10100 [25:37<1:58:10,  1.16pipeline/s]

Generation 18 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  20%|█▉        | 2000/10100 [27:09<1:22:44,  1.63pipeline/s]

Generation 19 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  21%|██        | 2100/10100 [28:22<1:22:32,  1.62pipeline/s]

Generation 20 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  22%|██▏       | 2200/10100 [29:44<1:01:08,  2.15pipeline/s]

Generation 21 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  23%|██▎       | 2300/10100 [31:20<1:13:22,  1.77pipeline/s]

Generation 22 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  24%|██▍       | 2400/10100 [32:53<2:10:00,  1.01s/pipeline]

Generation 23 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  25%|██▍       | 2500/10100 [34:34<1:19:43,  1.59pipeline/s]

Generation 24 - Current best internal CV score: 0.8979981919152783


Optimization Progress:  26%|██▌       | 2600/10100 [36:11<1:20:40,  1.55pipeline/s]

Generation 25 - Current best internal CV score: 0.8979981919152784


Optimization Progress:  27%|██▋       | 2700/10100 [37:58<2:01:54,  1.01pipeline/s] 

Generation 26 - Current best internal CV score: 0.900142063799561


Optimization Progress:  28%|██▊       | 2800/10100 [39:42<2:21:07,  1.16s/pipeline]

Generation 27 - Current best internal CV score: 0.900142063799561


Optimization Progress:  29%|██▊       | 2900/10100 [41:44<1:59:47,  1.00pipeline/s] 

Generation 28 - Current best internal CV score: 0.900142063799561


Optimization Progress:  30%|██▉       | 3000/10100 [43:41<2:00:38,  1.02s/pipeline] 

Generation 29 - Current best internal CV score: 0.900142063799561


Optimization Progress:  31%|███       | 3100/10100 [45:22<1:20:56,  1.44pipeline/s]

Generation 30 - Current best internal CV score: 0.900142063799561


Optimization Progress:  32%|███▏      | 3200/10100 [47:13<5:44:10,  2.99s/pipeline]

Generation 31 - Current best internal CV score: 0.900142063799561


Optimization Progress:  33%|███▎      | 3300/10100 [48:45<1:30:54,  1.25pipeline/s]

Generation 32 - Current best internal CV score: 0.900142063799561


Optimization Progress:  34%|███▎      | 3400/10100 [50:15<1:24:26,  1.32pipeline/s]

Generation 33 - Current best internal CV score: 0.900142063799561


Optimization Progress:  35%|███▍      | 3500/10100 [51:57<1:12:54,  1.51pipeline/s]

Generation 34 - Current best internal CV score: 0.900142063799561


Optimization Progress:  36%|███▌      | 3600/10100 [53:52<1:43:41,  1.04pipeline/s] 

Generation 35 - Current best internal CV score: 0.900297042489991


Optimization Progress:  37%|███▋      | 3700/10100 [55:15<59:34,  1.79pipeline/s]  

Generation 36 - Current best internal CV score: 0.900297042489991


Optimization Progress:  38%|███▊      | 3800/10100 [57:06<1:40:53,  1.04pipeline/s]

Generation 37 - Current best internal CV score: 0.900297042489991


Optimization Progress:  39%|███▊      | 3900/10100 [58:55<1:35:06,  1.09pipeline/s]

Generation 38 - Current best internal CV score: 0.900297042489991


Optimization Progress:  40%|███▉      | 4000/10100 [1:01:15<1:48:28,  1.07s/pipeline]

Generation 39 - Current best internal CV score: 0.900297042489991


Optimization Progress:  41%|████      | 4100/10100 [1:03:02<1:27:01,  1.15pipeline/s]

Generation 40 - Current best internal CV score: 0.900297042489991


Optimization Progress:  42%|████▏     | 4200/10100 [1:05:06<1:21:24,  1.21pipeline/s]

Generation 41 - Current best internal CV score: 0.900297042489991


Optimization Progress:  43%|████▎     | 4300/10100 [1:07:54<1:34:35,  1.02pipeline/s] 

Generation 42 - Current best internal CV score: 0.900297042489991


Optimization Progress:  44%|████▎     | 4400/10100 [1:10:02<2:11:06,  1.38s/pipeline]

Generation 43 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  45%|████▍     | 4500/10100 [1:12:25<1:45:40,  1.13s/pipeline] 

Generation 44 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  46%|████▌     | 4600/10100 [1:14:22<1:09:18,  1.32pipeline/s]

Generation 45 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  47%|████▋     | 4700/10100 [1:16:38<46:20,  1.94pipeline/s]  

Generation 46 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  48%|████▊     | 4800/10100 [1:18:43<1:22:05,  1.08pipeline/s]

Generation 47 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  49%|████▊     | 4900/10100 [1:20:40<57:58,  1.50pipeline/s]  

Generation 48 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  50%|████▉     | 5000/10100 [1:22:39<1:26:03,  1.01s/pipeline]

Generation 49 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  50%|█████     | 5100/10100 [1:24:36<1:10:20,  1.18pipeline/s]

Generation 50 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  51%|█████▏    | 5200/10100 [1:26:32<1:32:35,  1.13s/pipeline]

Generation 51 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  52%|█████▏    | 5300/10100 [1:28:34<1:35:54,  1.20s/pipeline]

Generation 52 - Current best internal CV score: 0.9069352963967454


Optimization Progress:  53%|█████▎    | 5400/10100 [1:30:34<1:56:18,  1.48s/pipeline]

Generation 53 - Current best internal CV score: 0.909234146971458


Optimization Progress:  54%|█████▍    | 5500/10100 [1:32:35<1:04:04,  1.20pipeline/s]

Generation 54 - Current best internal CV score: 0.909234146971458


Optimization Progress:  55%|█████▌    | 5600/10100 [1:34:30<1:23:24,  1.11s/pipeline]

Generation 55 - Current best internal CV score: 0.909234146971458


Optimization Progress:  56%|█████▋    | 5700/10100 [1:36:34<1:13:46,  1.01s/pipeline]

Generation 56 - Current best internal CV score: 0.909234146971458


Optimization Progress:  57%|█████▋    | 5800/10100 [1:38:38<2:09:34,  1.81s/pipeline]

Generation 57 - Current best internal CV score: 0.909234146971458


Optimization Progress:  58%|█████▊    | 5900/10100 [1:40:29<1:02:52,  1.11pipeline/s]

Generation 58 - Current best internal CV score: 0.909234146971458


Optimization Progress:  59%|█████▉    | 6000/10100 [1:42:50<55:20,  1.23pipeline/s]  

Generation 59 - Current best internal CV score: 0.909234146971458


Optimization Progress:  60%|██████    | 6100/10100 [1:45:16<3:22:39,  3.04s/pipeline]

Generation 60 - Current best internal CV score: 0.909234146971458


Optimization Progress:  61%|██████▏   | 6200/10100 [1:47:11<54:09,  1.20pipeline/s]  

Generation 61 - Current best internal CV score: 0.909234146971458


Optimization Progress:  62%|██████▏   | 6300/10100 [1:49:17<56:49,  1.11pipeline/s]  

Generation 62 - Current best internal CV score: 0.909234146971458


Optimization Progress:  63%|██████▎   | 6400/10100 [1:51:24<1:07:21,  1.09s/pipeline]

Generation 63 - Current best internal CV score: 0.909234146971458


Optimization Progress:  64%|██████▍   | 6500/10100 [1:53:37<1:01:02,  1.02s/pipeline]

Generation 64 - Current best internal CV score: 0.9114813379826939


Optimization Progress:  65%|██████▌   | 6600/10100 [1:56:02<1:13:49,  1.27s/pipeline]

Generation 65 - Current best internal CV score: 0.9114813379826939


Optimization Progress:  66%|██████▋   | 6700/10100 [1:58:25<55:21,  1.02pipeline/s]  

Generation 66 - Current best internal CV score: 0.9114813379826939


Optimization Progress:  67%|██████▋   | 6800/10100 [2:00:36<53:45,  1.02pipeline/s]  

Generation 67 - Current best internal CV score: 0.9114813379826939


Optimization Progress:  68%|██████▊   | 6900/10100 [2:02:45<58:41,  1.10s/pipeline]  

Generation 68 - Current best internal CV score: 0.9114813379826939


Optimization Progress:  69%|██████▉   | 7000/10100 [2:04:43<38:21,  1.35pipeline/s]  

Generation 69 - Current best internal CV score: 0.9114813379826939


Optimization Progress:  70%|███████   | 7100/10100 [2:06:56<50:10,  1.00s/pipeline]  

Generation 70 - Current best internal CV score: 0.9114813379826939


Optimization Progress:  71%|███████▏  | 7200/10100 [2:09:26<36:00,  1.34pipeline/s]  

Generation 71 - Current best internal CV score: 0.91372852899393


Optimization Progress:  72%|███████▏  | 7300/10100 [2:11:21<35:41,  1.31pipeline/s]  

Generation 72 - Current best internal CV score: 0.91372852899393


Optimization Progress:  73%|███████▎  | 7400/10100 [2:13:21<1:06:37,  1.48s/pipeline]

Generation 73 - Current best internal CV score: 0.91372852899393


Optimization Progress:  74%|███████▍  | 7500/10100 [2:15:38<55:24,  1.28s/pipeline]  

Generation 74 - Current best internal CV score: 0.91372852899393


Optimization Progress:  75%|███████▌  | 7600/10100 [2:17:18<36:23,  1.15pipeline/s]  

Generation 75 - Current best internal CV score: 0.91372852899393


Optimization Progress:  76%|███████▌  | 7700/10100 [2:19:17<1:07:58,  1.70s/pipeline]

Generation 76 - Current best internal CV score: 0.91372852899393


Optimization Progress:  77%|███████▋  | 7800/10100 [2:20:59<1:01:37,  1.61s/pipeline]

Generation 77 - Current best internal CV score: 0.91372852899393


Optimization Progress:  78%|███████▊  | 7900/10100 [2:22:24<22:01,  1.67pipeline/s]  

Generation 78 - Current best internal CV score: 0.91372852899393


Optimization Progress:  79%|███████▉  | 8000/10100 [2:23:48<22:22,  1.56pipeline/s]  

Generation 79 - Current best internal CV score: 0.91372852899393


Optimization Progress:  80%|████████  | 8100/10100 [2:25:31<33:39,  1.01s/pipeline]  

Generation 80 - Current best internal CV score: 0.91372852899393


Optimization Progress:  81%|████████  | 8200/10100 [2:27:11<26:52,  1.18pipeline/s]  

Generation 81 - Current best internal CV score: 0.91372852899393


Optimization Progress:  82%|████████▏ | 8300/10100 [2:28:37<37:21,  1.25s/pipeline]  

Generation 82 - Current best internal CV score: 0.91372852899393


Optimization Progress:  83%|████████▎ | 8400/10100 [2:29:57<23:27,  1.21pipeline/s]

Generation 83 - Current best internal CV score: 0.91372852899393


Optimization Progress:  84%|████████▍ | 8500/10100 [2:31:26<17:27,  1.53pipeline/s]

Generation 84 - Current best internal CV score: 0.91372852899393


Optimization Progress:  85%|████████▌ | 8600/10100 [2:33:06<30:48,  1.23s/pipeline]  

Generation 85 - Current best internal CV score: 0.91372852899393


Optimization Progress:  86%|████████▌ | 8700/10100 [2:34:34<16:12,  1.44pipeline/s]

Generation 86 - Current best internal CV score: 0.91372852899393


Optimization Progress:  87%|████████▋ | 8800/10100 [2:35:55<20:57,  1.03pipeline/s]

Generation 87 - Current best internal CV score: 0.91372852899393


Optimization Progress:  88%|████████▊ | 8900/10100 [2:37:51<19:14,  1.04pipeline/s]  

Generation 88 - Current best internal CV score: 0.91372852899393


Optimization Progress:  89%|████████▉ | 9000/10100 [2:39:07<17:43,  1.03pipeline/s]

Generation 89 - Current best internal CV score: 0.91372852899393


Optimization Progress:  90%|█████████ | 9100/10100 [2:41:04<24:50,  1.49s/pipeline]  

Generation 90 - Current best internal CV score: 0.91372852899393


Optimization Progress:  91%|█████████ | 9200/10100 [2:42:35<17:12,  1.15s/pipeline]

Generation 91 - Current best internal CV score: 0.91372852899393


Optimization Progress:  92%|█████████▏| 9300/10100 [2:43:59<07:58,  1.67pipeline/s]

Generation 92 - Current best internal CV score: 0.91372852899393


Optimization Progress:  93%|█████████▎| 9400/10100 [2:45:21<13:33,  1.16s/pipeline]

Generation 93 - Current best internal CV score: 0.91372852899393


Optimization Progress:  94%|█████████▍| 9500/10100 [2:46:35<10:55,  1.09s/pipeline]

Generation 94 - Current best internal CV score: 0.91372852899393


Optimization Progress:  95%|█████████▌| 9600/10100 [2:48:00<06:50,  1.22pipeline/s]

Generation 95 - Current best internal CV score: 0.91372852899393


Optimization Progress:  96%|█████████▌| 9700/10100 [2:49:25<04:32,  1.47pipeline/s]

Generation 96 - Current best internal CV score: 0.91372852899393


Optimization Progress:  97%|█████████▋| 9800/10100 [2:51:08<05:09,  1.03s/pipeline]

Generation 97 - Current best internal CV score: 0.91372852899393


Optimization Progress:  98%|█████████▊| 9900/10100 [2:52:46<03:47,  1.14s/pipeline]

Generation 98 - Current best internal CV score: 0.91372852899393


Optimization Progress:  99%|█████████▉| 10000/10100 [2:54:07<01:01,  1.62pipeline/s]

Generation 99 - Current best internal CV score: 0.91372852899393


                                                                                    

Generation 100 - Current best internal CV score: 0.91372852899393

Best pipeline: RandomForestClassifier(GradientBoostingClassifier(GradientBoostingClassifier(Normalizer(MaxAbsScaler(input_matrix), norm=max), learning_rate=0.001, max_depth=2, max_features=0.05, min_samples_leaf=12, min_samples_split=20, n_estimators=100, subsample=0.35000000000000003), learning_rate=0.01, max_depth=5, max_features=0.55, min_samples_leaf=15, min_samples_split=20, n_estimators=100, subsample=0.7500000000000001), bootstrap=False, criterion=entropy, max_features=0.1, min_samples_leaf=13, min_samples_split=15, n_estimators=100)
CPU times: user 2h 40min 13s, sys: 1min 51s, total: 2h 42min 4s
Wall time: 2h 55min 35s


TPOTClassifier(config_dict={'sklearn.kernel_approximation.RBFSampler': {'gamma': array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1.  ])}, 'sklearn.cluster.FeatureAgglomeration': {'linkage': ['ward', 'complete', 'average'], 'affin..., 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1.  ])}, 'sklearn.preprocessing.RobustScaler': {}},
        crossover_rate=0.1, cv=5, disable_update_check=False,
        early_stop=None, generations=100, max_eval_time_mins=5,
        max_time_mins=None, memory=None, mutation_rate=0.9, n_jobs=1,
        offspring_size=100, periodic_checkpoint_folder=None,
        population_size=100, random_state=None, scoring=None,
        subsample=1.0, verbosity=2, warm_start=False)

In [None]:
print(tpot1.score(sample_val.values, val_label.values))
tpot1.export('tpot_Binary_classification_pipeline.py')

0.8378378378378378


True

#### References
* https://epistasislab.github.io/tpot/