In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
data = pd.concat([train,test]).reset_index(drop=True)

In [4]:
data.head(3)

Unnamed: 0,ID,Datetime,Sensor1_PM2.5,Sensor2_PM2.5,Temperature,Relative_Humidity,Offset_fault
0,ID_QF0ZTQJ2SF5Q,2021-11-03 04:06:31,52.58,49.52,17.4,96.0,0.0
1,ID_4GTK689CNX5S,2021-11-08 18:43:23,35.25,33.4,25.0,75.0,0.0
2,ID_DL7VVKW9U7XQ,2021-11-07 09:50:33,19.18,23.5,24.9,75.0,0.0


In [5]:
data['created_at'] = pd.to_datetime(data['Datetime'])
data['year'] = data['created_at'].dt.year
data['year'] = data['year'].astype(float)
data['month'] = data['created_at'].dt.month
data['day'] = data['created_at'].dt.day
data['weekday'] = data['created_at'].dt.weekday
data['weekofyear'] = data['created_at'].dt.weekofyear
data['hour'] = data['created_at'].dt.hour

  data['weekofyear'] = data['created_at'].dt.weekofyear


In [6]:
# combination between time features
data['day_hour']  = data['day'].astype(str) + '-' + data['hour'].astype(str)
data['month_day'] = data['month'].astype(str) + '-' + data['day'].astype(str)

In [7]:
from sklearn.preprocessing import LabelEncoder

In [8]:
def LE(df,LE_cols = []) :
    LE = LabelEncoder()
    for le_col in LE_cols :
        data[le_col] = LE.fit_transform(data[le_col])

In [9]:
# Label Encoder
LE(data,LE_cols = ['day_hour','month_day'])

In [10]:
# Feature Engineering
data = data.sort_values('Datetime').reset_index(drop=True)

In [11]:
data.head(3)

Unnamed: 0,ID,Datetime,Sensor1_PM2.5,Sensor2_PM2.5,Temperature,Relative_Humidity,Offset_fault,created_at,year,month,day,weekday,weekofyear,hour,day_hour,month_day
0,ID_37STUCVVPZKP,2021-10-15 16:00:31,9.17,9.28,29.9,53.0,0.0,2021-10-15 16:00:31,2021.0,10,15,4,41,16,152,30
1,ID_YZZUOMF0TQIF,2021-10-15 16:00:34,9.93,11.25,29.9,53.0,0.0,2021-10-15 16:00:34,2021.0,10,15,4,41,16,152,30
2,ID_UF68QLIPG3LE,2021-10-15 16:01:01,6.53,3.15,29.9,53.0,1.0,2021-10-15 16:01:01,2021.0,10,15,4,41,16,152,30


In [12]:
def LAG(data,LagFeature,shift=1,NewFeatures=[]) :
    data[NewFeatures[0]]   = data[LagFeature]  - data[LagFeature].shift(shift)
    data[NewFeatures[1]]   = data[LagFeature].shift(shift)

In [13]:
LAG(data,LagFeature='Sensor1_PM2.5',shift=1,NewFeatures=['sensor1_diff_next','sensor1_next'])
LAG(data,LagFeature='Sensor1_PM2.5',shift=-1,NewFeatures=['sensor1_diff_before','sensor1_before'])
LAG(data,LagFeature='Sensor2_PM2.5',shift=1,NewFeatures=['sensor2_diff_next','sensor2_next'])
LAG(data,LagFeature='Sensor2_PM2.5',shift=-1,NewFeatures=['sensor2_diff_before','sensor2_before'])

In [14]:
data['SplitBy']   = data['year'].astype(int).astype(str) + '-' + data['month'].astype(str) + '-' + data['day'].astype(str)

In [15]:
data = data.sort_values('SplitBy').reset_index(drop=True)

In [16]:
data.head(3)

Unnamed: 0,ID,Datetime,Sensor1_PM2.5,Sensor2_PM2.5,Temperature,Relative_Humidity,Offset_fault,created_at,year,month,...,month_day,sensor1_diff_next,sensor1_next,sensor1_diff_before,sensor1_before,sensor2_diff_next,sensor2_next,sensor2_diff_before,sensor2_before,SplitBy
0,ID_37STUCVVPZKP,2021-10-15 16:00:31,9.17,9.28,29.9,53.0,0.0,2021-10-15 16:00:31,2021.0,10,...,30,,,-0.76,9.93,,,-1.97,11.25,2021-10-15
1,ID_FFR983O9P6PN,2021-10-15 21:28:47,66.65,65.68,21.7,83.0,0.0,2021-10-15 21:28:47,2021.0,10,...,30,12.7,53.95,15.6,51.05,20.23,45.45,22.81,42.87,2021-10-15
2,ID_IOIOHCC348SZ,2021-10-15 21:29:48,51.05,42.87,21.7,83.0,1.0,2021-10-15 21:29:48,2021.0,10,...,30,-15.6,66.65,-19.52,70.57,-22.81,65.68,-30.41,73.28,2021-10-15


In [17]:
train = data[data['ID'].isin(train['ID'].values)].reset_index(drop=True)
train['Offset_fault'] = train['Offset_fault'].astype('int')
test = data[~data['ID'].isin(train['ID'].values)].reset_index(drop=True)

In [18]:
class CFG :
  SEED = 42
  n_splits = 5

  lgb_params = {'boosting_type': 'gbdt','objective': 'binary','metric': 'auc',
                'n_estimators': 2500,'reg_lambda' :50,'num_leaves' : 81,
                'seed': SEED,'silent':True,'early_stopping_rounds': 100,
               }
  remove_features = ['ID', 'Datetime',"created_at",'SplitBy' , 'folds', 'Offset_fault',]
  TARGET_COL = 'Offset_fault'

In [19]:
class CustomSplit :
  def __init__(self,) :
    self.n_splits = CFG.n_splits

  def Split(self,Train_) :
    kf = GroupKFold(n_splits=self.n_splits)

    Train = Train_.copy()
    Train = Train.drop_duplicates('SplitBy').reset_index(drop=True)
    
    groups = Train['SplitBy']
    Train["folds"]=-1   
    for fold, (_, val_index) in enumerate(kf.split(Train,Train['Offset_fault'],groups)):
          Train.loc[val_index, "folds"] = fold
    return Train

  def apply(self,train) :
    mapper = dict(zip(self.Split(train)['SplitBy'].tolist(),
                      self.Split(train)['folds'].tolist()))

    train['folds'] = train['SplitBy'].map(mapper)
    return train

In [20]:
from sklearn.model_selection import GroupKFold

In [22]:
split = CustomSplit() 

train = split.apply(train)

In [23]:
train.head(3)

Unnamed: 0,ID,Datetime,Sensor1_PM2.5,Sensor2_PM2.5,Temperature,Relative_Humidity,Offset_fault,created_at,year,month,...,sensor1_diff_next,sensor1_next,sensor1_diff_before,sensor1_before,sensor2_diff_next,sensor2_next,sensor2_diff_before,sensor2_before,SplitBy,folds
0,ID_37STUCVVPZKP,2021-10-15 16:00:31,9.17,9.28,29.9,53.0,0,2021-10-15 16:00:31,2021.0,10,...,,,-0.76,9.93,,,-1.97,11.25,2021-10-15,3
1,ID_FFR983O9P6PN,2021-10-15 21:28:47,66.65,65.68,21.7,83.0,0,2021-10-15 21:28:47,2021.0,10,...,12.7,53.95,15.6,51.05,20.23,45.45,22.81,42.87,2021-10-15,3
2,ID_IOIOHCC348SZ,2021-10-15 21:29:48,51.05,42.87,21.7,83.0,1,2021-10-15 21:29:48,2021.0,10,...,-15.6,66.65,-19.52,70.57,-22.81,65.68,-30.41,73.28,2021-10-15,3


In [24]:
features_columns = [col for col in train.columns if col not in CFG.remove_features]
len(features_columns)

20

In [31]:
x = train[features_columns]
y = train["Offset_fault"]

In [33]:
(x.shape,y.shape)

((297177, 20), (297177,))

### Model

In [34]:
from sklearn.model_selection import train_test_split, StratifiedKFold, RepeatedStratifiedKFold
import lightgbm as lgb
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score

In [37]:
x_train, x_val, y_train, y_val = train_test_split(x,y,test_size = 0.1,stratify= y,random_state = 42)

In [38]:
(x_train.shape,y_train.shape,x_val.shape,y_val.shape)

((267459, 20), (267459,), (29718, 20), (29718,))

In [39]:
skf = RepeatedStratifiedKFold(n_splits=5, random_state=None)

In [54]:
test_score=[]
train_score=[]
sub_list = []

In [56]:
test = test[features_columns]

In [57]:
for train_index, test_index in skf.split(x_train,y_train):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = x_train.iloc[train_index], x_train.iloc[test_index]
    Y_train, Y_test = y_train.iloc[train_index], y_train.iloc[test_index]
    
    
    model = LGBMClassifier() 
    #eval_dataset = lgb.Dataset(x_val, label=y_val)
    
    model.fit(X_train,Y_train,eval_set= [(X_test,Y_test)],eval_metric="accuracy")
    train_preds = model.predict(X_train)
    print("Train Accuracy score: ",accuracy_score(Y_train,train_preds))
    train_score.append(np.mean(accuracy_score(Y_train,train_preds)))
    
    test_preds = model.predict(X_test)
    print("Test Accuracy score: ", accuracy_score(Y_test,test_preds))
    test_score.append(np.mean(accuracy_score(Y_test,test_preds)))

    submission = model.predict(test)
    sub_list.append(submission) 

[1]	valid_0's binary_logloss: 0.568199
[2]	valid_0's binary_logloss: 0.495405
[3]	valid_0's binary_logloss: 0.435457
[4]	valid_0's binary_logloss: 0.385296
[5]	valid_0's binary_logloss: 0.342758
[6]	valid_0's binary_logloss: 0.306187
[7]	valid_0's binary_logloss: 0.274426
[8]	valid_0's binary_logloss: 0.246651
[9]	valid_0's binary_logloss: 0.222424
[10]	valid_0's binary_logloss: 0.20105
[11]	valid_0's binary_logloss: 0.182046
[12]	valid_0's binary_logloss: 0.165141
[13]	valid_0's binary_logloss: 0.150216
[14]	valid_0's binary_logloss: 0.136804
[15]	valid_0's binary_logloss: 0.125023
[16]	valid_0's binary_logloss: 0.114356
[17]	valid_0's binary_logloss: 0.104862
[18]	valid_0's binary_logloss: 0.0964223
[19]	valid_0's binary_logloss: 0.0888522
[20]	valid_0's binary_logloss: 0.0820075
[21]	valid_0's binary_logloss: 0.0760533
[22]	valid_0's binary_logloss: 0.0705765
[23]	valid_0's binary_logloss: 0.065745
[24]	valid_0's binary_logloss: 0.0611866
[25]	valid_0's binary_logloss: 0.0571351
[26

Train Accuracy score:  0.9974295101581085
Test Accuracy score:  0.9955507365587377
[1]	valid_0's binary_logloss: 0.568073
[2]	valid_0's binary_logloss: 0.495128
[3]	valid_0's binary_logloss: 0.435136
[4]	valid_0's binary_logloss: 0.38476
[5]	valid_0's binary_logloss: 0.341998
[6]	valid_0's binary_logloss: 0.305338
[7]	valid_0's binary_logloss: 0.273335
[8]	valid_0's binary_logloss: 0.245413
[9]	valid_0's binary_logloss: 0.220991
[10]	valid_0's binary_logloss: 0.199363
[11]	valid_0's binary_logloss: 0.180481
[12]	valid_0's binary_logloss: 0.163694
[13]	valid_0's binary_logloss: 0.148693
[14]	valid_0's binary_logloss: 0.135309
[15]	valid_0's binary_logloss: 0.123411
[16]	valid_0's binary_logloss: 0.112784
[17]	valid_0's binary_logloss: 0.103219
[18]	valid_0's binary_logloss: 0.0947079
[19]	valid_0's binary_logloss: 0.0870805
[20]	valid_0's binary_logloss: 0.0801832
[21]	valid_0's binary_logloss: 0.0740932
[22]	valid_0's binary_logloss: 0.0686531
[23]	valid_0's binary_logloss: 0.0636838
[

[100]	valid_0's binary_logloss: 0.0130916
Train Accuracy score:  0.9973453850360102
Test Accuracy score:  0.9957937635534285
[1]	valid_0's binary_logloss: 0.568118
[2]	valid_0's binary_logloss: 0.495061
[3]	valid_0's binary_logloss: 0.434967
[4]	valid_0's binary_logloss: 0.384693
[5]	valid_0's binary_logloss: 0.341987
[6]	valid_0's binary_logloss: 0.305034
[7]	valid_0's binary_logloss: 0.273174
[8]	valid_0's binary_logloss: 0.245295
[9]	valid_0's binary_logloss: 0.220958
[10]	valid_0's binary_logloss: 0.199451
[11]	valid_0's binary_logloss: 0.180512
[12]	valid_0's binary_logloss: 0.16372
[13]	valid_0's binary_logloss: 0.148934
[14]	valid_0's binary_logloss: 0.135646
[15]	valid_0's binary_logloss: 0.123727
[16]	valid_0's binary_logloss: 0.113122
[17]	valid_0's binary_logloss: 0.103623
[18]	valid_0's binary_logloss: 0.0951562
[19]	valid_0's binary_logloss: 0.0875937
[20]	valid_0's binary_logloss: 0.0806931
[21]	valid_0's binary_logloss: 0.0746118
[22]	valid_0's binary_logloss: 0.0691794


[100]	valid_0's binary_logloss: 0.0137817
Train Accuracy score:  0.9974996144265237
Test Accuracy score:  0.9955133477903237
[1]	valid_0's binary_logloss: 0.56811
[2]	valid_0's binary_logloss: 0.495218
[3]	valid_0's binary_logloss: 0.435294
[4]	valid_0's binary_logloss: 0.385112
[5]	valid_0's binary_logloss: 0.342429
[6]	valid_0's binary_logloss: 0.305902
[7]	valid_0's binary_logloss: 0.273812
[8]	valid_0's binary_logloss: 0.245869
[9]	valid_0's binary_logloss: 0.22148
[10]	valid_0's binary_logloss: 0.200056
[11]	valid_0's binary_logloss: 0.181077
[12]	valid_0's binary_logloss: 0.16433
[13]	valid_0's binary_logloss: 0.149386
[14]	valid_0's binary_logloss: 0.136106
[15]	valid_0's binary_logloss: 0.124339
[16]	valid_0's binary_logloss: 0.113705
[17]	valid_0's binary_logloss: 0.104123
[18]	valid_0's binary_logloss: 0.0955766
[19]	valid_0's binary_logloss: 0.0878997
[20]	valid_0's binary_logloss: 0.0809945
[21]	valid_0's binary_logloss: 0.0748253
[22]	valid_0's binary_logloss: 0.0692559
[2

Train Accuracy score:  0.9974388573938971
Test Accuracy score:  0.9960180961639123
[1]	valid_0's binary_logloss: 0.568104
[2]	valid_0's binary_logloss: 0.495203
[3]	valid_0's binary_logloss: 0.435177
[4]	valid_0's binary_logloss: 0.384983
[5]	valid_0's binary_logloss: 0.342216
[6]	valid_0's binary_logloss: 0.305455
[7]	valid_0's binary_logloss: 0.273483
[8]	valid_0's binary_logloss: 0.245541
[9]	valid_0's binary_logloss: 0.221258
[10]	valid_0's binary_logloss: 0.199677
[11]	valid_0's binary_logloss: 0.180807
[12]	valid_0's binary_logloss: 0.1641
[13]	valid_0's binary_logloss: 0.149108
[14]	valid_0's binary_logloss: 0.135676
[15]	valid_0's binary_logloss: 0.123764
[16]	valid_0's binary_logloss: 0.113126
[17]	valid_0's binary_logloss: 0.103588
[18]	valid_0's binary_logloss: 0.0951548
[19]	valid_0's binary_logloss: 0.0875554
[20]	valid_0's binary_logloss: 0.080863
[21]	valid_0's binary_logloss: 0.0747601
[22]	valid_0's binary_logloss: 0.069324
[23]	valid_0's binary_logloss: 0.0643503
[24]

Train Accuracy score:  0.9973453974426082
Test Accuracy score:  0.9957562954515713
[1]	valid_0's binary_logloss: 0.567991
[2]	valid_0's binary_logloss: 0.494925
[3]	valid_0's binary_logloss: 0.434912
[4]	valid_0's binary_logloss: 0.384683
[5]	valid_0's binary_logloss: 0.341971
[6]	valid_0's binary_logloss: 0.305053
[7]	valid_0's binary_logloss: 0.272971
[8]	valid_0's binary_logloss: 0.245066
[9]	valid_0's binary_logloss: 0.220747
[10]	valid_0's binary_logloss: 0.19938
[11]	valid_0's binary_logloss: 0.180299
[12]	valid_0's binary_logloss: 0.163404
[13]	valid_0's binary_logloss: 0.148406
[14]	valid_0's binary_logloss: 0.135021
[15]	valid_0's binary_logloss: 0.123116
[16]	valid_0's binary_logloss: 0.112581
[17]	valid_0's binary_logloss: 0.103105
[18]	valid_0's binary_logloss: 0.0947181
[19]	valid_0's binary_logloss: 0.0871709
[20]	valid_0's binary_logloss: 0.0803131
[21]	valid_0's binary_logloss: 0.0741847
[22]	valid_0's binary_logloss: 0.0686012
[23]	valid_0's binary_logloss: 0.0637303
[

Train Accuracy score:  0.9973781003612706
Test Accuracy score:  0.9957563747850146
[1]	valid_0's binary_logloss: 0.568254
[2]	valid_0's binary_logloss: 0.49529
[3]	valid_0's binary_logloss: 0.435336
[4]	valid_0's binary_logloss: 0.385055
[5]	valid_0's binary_logloss: 0.34242
[6]	valid_0's binary_logloss: 0.305552
[7]	valid_0's binary_logloss: 0.273511
[8]	valid_0's binary_logloss: 0.245785
[9]	valid_0's binary_logloss: 0.22143
[10]	valid_0's binary_logloss: 0.199929
[11]	valid_0's binary_logloss: 0.180948
[12]	valid_0's binary_logloss: 0.164177
[13]	valid_0's binary_logloss: 0.149256
[14]	valid_0's binary_logloss: 0.135917
[15]	valid_0's binary_logloss: 0.124157
[16]	valid_0's binary_logloss: 0.113579
[17]	valid_0's binary_logloss: 0.104161
[18]	valid_0's binary_logloss: 0.0957791
[19]	valid_0's binary_logloss: 0.0881751
[20]	valid_0's binary_logloss: 0.0813668
[21]	valid_0's binary_logloss: 0.0752292
[22]	valid_0's binary_logloss: 0.0697606
[23]	valid_0's binary_logloss: 0.0648892
[24

Train Accuracy score:  0.9974855935728407
Test Accuracy score:  0.9957376804008076
[1]	valid_0's binary_logloss: 0.568111
[2]	valid_0's binary_logloss: 0.495222
[3]	valid_0's binary_logloss: 0.435222
[4]	valid_0's binary_logloss: 0.384913
[5]	valid_0's binary_logloss: 0.342195
[6]	valid_0's binary_logloss: 0.305609
[7]	valid_0's binary_logloss: 0.273429
[8]	valid_0's binary_logloss: 0.245584
[9]	valid_0's binary_logloss: 0.221281
[10]	valid_0's binary_logloss: 0.199746
[11]	valid_0's binary_logloss: 0.180777
[12]	valid_0's binary_logloss: 0.163987
[13]	valid_0's binary_logloss: 0.149043
[14]	valid_0's binary_logloss: 0.13581
[15]	valid_0's binary_logloss: 0.124012
[16]	valid_0's binary_logloss: 0.113357
[17]	valid_0's binary_logloss: 0.103903
[18]	valid_0's binary_logloss: 0.0953979
[19]	valid_0's binary_logloss: 0.0878158
[20]	valid_0's binary_logloss: 0.0809755
[21]	valid_0's binary_logloss: 0.0749809
[22]	valid_0's binary_logloss: 0.0695092
[23]	valid_0's binary_logloss: 0.0644759
[

Train Accuracy score:  0.9973594058896933
Test Accuracy score:  0.9953077095640469
[1]	valid_0's binary_logloss: 0.568213
[2]	valid_0's binary_logloss: 0.495242
[3]	valid_0's binary_logloss: 0.435338
[4]	valid_0's binary_logloss: 0.385216
[5]	valid_0's binary_logloss: 0.342607
[6]	valid_0's binary_logloss: 0.305535
[7]	valid_0's binary_logloss: 0.273572
[8]	valid_0's binary_logloss: 0.245746
[9]	valid_0's binary_logloss: 0.22128
[10]	valid_0's binary_logloss: 0.199809
[11]	valid_0's binary_logloss: 0.180877
[12]	valid_0's binary_logloss: 0.164193
[13]	valid_0's binary_logloss: 0.149259
[14]	valid_0's binary_logloss: 0.135988
[15]	valid_0's binary_logloss: 0.124132
[16]	valid_0's binary_logloss: 0.113606
[17]	valid_0's binary_logloss: 0.104138
[18]	valid_0's binary_logloss: 0.0956939
[19]	valid_0's binary_logloss: 0.0880206
[20]	valid_0's binary_logloss: 0.0813492
[21]	valid_0's binary_logloss: 0.0752532
[22]	valid_0's binary_logloss: 0.0697051
[23]	valid_0's binary_logloss: 0.0646596
[

Train Accuracy score:  0.9973594058896933
Test Accuracy score:  0.9961676512375682
[1]	valid_0's binary_logloss: 0.56809
[2]	valid_0's binary_logloss: 0.49516
[3]	valid_0's binary_logloss: 0.435323
[4]	valid_0's binary_logloss: 0.385077
[5]	valid_0's binary_logloss: 0.342512
[6]	valid_0's binary_logloss: 0.305589
[7]	valid_0's binary_logloss: 0.273529
[8]	valid_0's binary_logloss: 0.245661
[9]	valid_0's binary_logloss: 0.221386
[10]	valid_0's binary_logloss: 0.200157
[11]	valid_0's binary_logloss: 0.181161
[12]	valid_0's binary_logloss: 0.164372
[13]	valid_0's binary_logloss: 0.14952
[14]	valid_0's binary_logloss: 0.136142
[15]	valid_0's binary_logloss: 0.124364
[16]	valid_0's binary_logloss: 0.113733
[17]	valid_0's binary_logloss: 0.104326
[18]	valid_0's binary_logloss: 0.0958744
[19]	valid_0's binary_logloss: 0.0883427
[20]	valid_0's binary_logloss: 0.0815662
[21]	valid_0's binary_logloss: 0.0753328
[22]	valid_0's binary_logloss: 0.0698612
[23]	valid_0's binary_logloss: 0.0649726
[24

Train Accuracy score:  0.9974575637478501
Test Accuracy score:  0.9958497691200389
[1]	valid_0's binary_logloss: 0.568166
[2]	valid_0's binary_logloss: 0.495158
[3]	valid_0's binary_logloss: 0.43509
[4]	valid_0's binary_logloss: 0.38483
[5]	valid_0's binary_logloss: 0.342066
[6]	valid_0's binary_logloss: 0.305471
[7]	valid_0's binary_logloss: 0.27346
[8]	valid_0's binary_logloss: 0.245386
[9]	valid_0's binary_logloss: 0.22101
[10]	valid_0's binary_logloss: 0.199475
[11]	valid_0's binary_logloss: 0.180419
[12]	valid_0's binary_logloss: 0.163632
[13]	valid_0's binary_logloss: 0.148768
[14]	valid_0's binary_logloss: 0.135457
[15]	valid_0's binary_logloss: 0.123604
[16]	valid_0's binary_logloss: 0.112885
[17]	valid_0's binary_logloss: 0.103367
[18]	valid_0's binary_logloss: 0.0949172
[19]	valid_0's binary_logloss: 0.0873237
[20]	valid_0's binary_logloss: 0.0804084
[21]	valid_0's binary_logloss: 0.0742928
[22]	valid_0's binary_logloss: 0.068816
[23]	valid_0's binary_logloss: 0.0638893
[24]	

Train Accuracy score:  0.9973407114181159
Test Accuracy score:  0.9959246242428774
[1]	valid_0's binary_logloss: 0.568352
[2]	valid_0's binary_logloss: 0.49544
[3]	valid_0's binary_logloss: 0.435546
[4]	valid_0's binary_logloss: 0.385412
[5]	valid_0's binary_logloss: 0.342737
[6]	valid_0's binary_logloss: 0.30622
[7]	valid_0's binary_logloss: 0.274236
[8]	valid_0's binary_logloss: 0.246524
[9]	valid_0's binary_logloss: 0.222027
[10]	valid_0's binary_logloss: 0.20057
[11]	valid_0's binary_logloss: 0.181618
[12]	valid_0's binary_logloss: 0.16492
[13]	valid_0's binary_logloss: 0.150222
[14]	valid_0's binary_logloss: 0.137009
[15]	valid_0's binary_logloss: 0.125305
[16]	valid_0's binary_logloss: 0.114716
[17]	valid_0's binary_logloss: 0.105274
[18]	valid_0's binary_logloss: 0.0968236
[19]	valid_0's binary_logloss: 0.0892501
[20]	valid_0's binary_logloss: 0.0824008
[21]	valid_0's binary_logloss: 0.0763982
[22]	valid_0's binary_logloss: 0.0710537
[23]	valid_0's binary_logloss: 0.0662246
[24]

Train Accuracy score:  0.997396794832848
Test Accuracy score:  0.9958498467060495
[1]	valid_0's binary_logloss: 0.568031
[2]	valid_0's binary_logloss: 0.494951
[3]	valid_0's binary_logloss: 0.434876
[4]	valid_0's binary_logloss: 0.384673
[5]	valid_0's binary_logloss: 0.341962
[6]	valid_0's binary_logloss: 0.305289
[7]	valid_0's binary_logloss: 0.273292
[8]	valid_0's binary_logloss: 0.245298
[9]	valid_0's binary_logloss: 0.220817
[10]	valid_0's binary_logloss: 0.199256
[11]	valid_0's binary_logloss: 0.180198
[12]	valid_0's binary_logloss: 0.163397
[13]	valid_0's binary_logloss: 0.148593
[14]	valid_0's binary_logloss: 0.135261
[15]	valid_0's binary_logloss: 0.123432
[16]	valid_0's binary_logloss: 0.112874
[17]	valid_0's binary_logloss: 0.103483
[18]	valid_0's binary_logloss: 0.0949134
[19]	valid_0's binary_logloss: 0.0872791
[20]	valid_0's binary_logloss: 0.0803503
[21]	valid_0's binary_logloss: 0.0741555
[22]	valid_0's binary_logloss: 0.0685649
[23]	valid_0's binary_logloss: 0.0634869
[

Train Accuracy score:  0.9974388573938971
Test Accuracy score:  0.9956629028639796
[1]	valid_0's binary_logloss: 0.568187
[2]	valid_0's binary_logloss: 0.495365
[3]	valid_0's binary_logloss: 0.435535
[4]	valid_0's binary_logloss: 0.385341
[5]	valid_0's binary_logloss: 0.342704
[6]	valid_0's binary_logloss: 0.306149
[7]	valid_0's binary_logloss: 0.274114
[8]	valid_0's binary_logloss: 0.246228
[9]	valid_0's binary_logloss: 0.221841
[10]	valid_0's binary_logloss: 0.200301
[11]	valid_0's binary_logloss: 0.181348
[12]	valid_0's binary_logloss: 0.164523
[13]	valid_0's binary_logloss: 0.14971
[14]	valid_0's binary_logloss: 0.136327
[15]	valid_0's binary_logloss: 0.12453
[16]	valid_0's binary_logloss: 0.113898
[17]	valid_0's binary_logloss: 0.104476
[18]	valid_0's binary_logloss: 0.0959997
[19]	valid_0's binary_logloss: 0.0884081
[20]	valid_0's binary_logloss: 0.081553
[21]	valid_0's binary_logloss: 0.0753731
[22]	valid_0's binary_logloss: 0.0699632
[23]	valid_0's binary_logloss: 0.0649231
[24

Train Accuracy score:  0.9975136352802068
Test Accuracy score:  0.995270320795633
[1]	valid_0's binary_logloss: 0.56818
[2]	valid_0's binary_logloss: 0.49513
[3]	valid_0's binary_logloss: 0.435086
[4]	valid_0's binary_logloss: 0.384746
[5]	valid_0's binary_logloss: 0.342128
[6]	valid_0's binary_logloss: 0.305577
[7]	valid_0's binary_logloss: 0.273488
[8]	valid_0's binary_logloss: 0.245597
[9]	valid_0's binary_logloss: 0.221151
[10]	valid_0's binary_logloss: 0.199674
[11]	valid_0's binary_logloss: 0.180635
[12]	valid_0's binary_logloss: 0.163926
[13]	valid_0's binary_logloss: 0.148914
[14]	valid_0's binary_logloss: 0.135554
[15]	valid_0's binary_logloss: 0.123608
[16]	valid_0's binary_logloss: 0.11294
[17]	valid_0's binary_logloss: 0.103432
[18]	valid_0's binary_logloss: 0.0949519
[19]	valid_0's binary_logloss: 0.0874612
[20]	valid_0's binary_logloss: 0.0805796
[21]	valid_0's binary_logloss: 0.0745228
[22]	valid_0's binary_logloss: 0.0689792
[23]	valid_0's binary_logloss: 0.0640649
[24]

Train Accuracy score:  0.99735007103866
Test Accuracy score:  0.9959806322558935
[1]	valid_0's binary_logloss: 0.568155
[2]	valid_0's binary_logloss: 0.495167
[3]	valid_0's binary_logloss: 0.435094
[4]	valid_0's binary_logloss: 0.384848
[5]	valid_0's binary_logloss: 0.34222
[6]	valid_0's binary_logloss: 0.305534
[7]	valid_0's binary_logloss: 0.273486
[8]	valid_0's binary_logloss: 0.245666
[9]	valid_0's binary_logloss: 0.221278
[10]	valid_0's binary_logloss: 0.199684
[11]	valid_0's binary_logloss: 0.180685
[12]	valid_0's binary_logloss: 0.164005
[13]	valid_0's binary_logloss: 0.148985
[14]	valid_0's binary_logloss: 0.135694
[15]	valid_0's binary_logloss: 0.123781
[16]	valid_0's binary_logloss: 0.113188
[17]	valid_0's binary_logloss: 0.10375
[18]	valid_0's binary_logloss: 0.0951997
[19]	valid_0's binary_logloss: 0.0876118
[20]	valid_0's binary_logloss: 0.0807305
[21]	valid_0's binary_logloss: 0.0747402
[22]	valid_0's binary_logloss: 0.0692475
[23]	valid_0's binary_logloss: 0.0643268
[24]

Train Accuracy score:  0.997382773979165
Test Accuracy score:  0.9959246242428774
[1]	valid_0's binary_logloss: 0.568202
[2]	valid_0's binary_logloss: 0.495263
[3]	valid_0's binary_logloss: 0.435282
[4]	valid_0's binary_logloss: 0.384964
[5]	valid_0's binary_logloss: 0.342386
[6]	valid_0's binary_logloss: 0.305768
[7]	valid_0's binary_logloss: 0.273834
[8]	valid_0's binary_logloss: 0.245851
[9]	valid_0's binary_logloss: 0.2214
[10]	valid_0's binary_logloss: 0.199849
[11]	valid_0's binary_logloss: 0.180826
[12]	valid_0's binary_logloss: 0.163966
[13]	valid_0's binary_logloss: 0.149124
[14]	valid_0's binary_logloss: 0.135784
[15]	valid_0's binary_logloss: 0.123947
[16]	valid_0's binary_logloss: 0.113352
[17]	valid_0's binary_logloss: 0.103947
[18]	valid_0's binary_logloss: 0.0953945
[19]	valid_0's binary_logloss: 0.0878088
[20]	valid_0's binary_logloss: 0.0810644
[21]	valid_0's binary_logloss: 0.0749348
[22]	valid_0's binary_logloss: 0.0695452
[23]	valid_0's binary_logloss: 0.0646267
[24

Train Accuracy score:  0.997476246337052
Test Accuracy score:  0.9953824871008748
[1]	valid_0's binary_logloss: 0.568109
[2]	valid_0's binary_logloss: 0.495011
[3]	valid_0's binary_logloss: 0.435052
[4]	valid_0's binary_logloss: 0.384818
[5]	valid_0's binary_logloss: 0.342117
[6]	valid_0's binary_logloss: 0.305267
[7]	valid_0's binary_logloss: 0.273553
[8]	valid_0's binary_logloss: 0.245584
[9]	valid_0's binary_logloss: 0.22109
[10]	valid_0's binary_logloss: 0.199509
[11]	valid_0's binary_logloss: 0.180608
[12]	valid_0's binary_logloss: 0.163782
[13]	valid_0's binary_logloss: 0.148858
[14]	valid_0's binary_logloss: 0.13546
[15]	valid_0's binary_logloss: 0.123592
[16]	valid_0's binary_logloss: 0.112997
[17]	valid_0's binary_logloss: 0.103539
[18]	valid_0's binary_logloss: 0.0950941
[19]	valid_0's binary_logloss: 0.087632
[20]	valid_0's binary_logloss: 0.0808345
[21]	valid_0's binary_logloss: 0.0747025
[22]	valid_0's binary_logloss: 0.0693266
[23]	valid_0's binary_logloss: 0.0644273
[24]

Train Accuracy score:  0.9973640795075877
Test Accuracy score:  0.9960367905481193
[1]	valid_0's binary_logloss: 0.568254
[2]	valid_0's binary_logloss: 0.4955
[3]	valid_0's binary_logloss: 0.435505
[4]	valid_0's binary_logloss: 0.385208
[5]	valid_0's binary_logloss: 0.342516
[6]	valid_0's binary_logloss: 0.305642
[7]	valid_0's binary_logloss: 0.27393
[8]	valid_0's binary_logloss: 0.246192
[9]	valid_0's binary_logloss: 0.221691
[10]	valid_0's binary_logloss: 0.200319
[11]	valid_0's binary_logloss: 0.181294
[12]	valid_0's binary_logloss: 0.164473
[13]	valid_0's binary_logloss: 0.149561
[14]	valid_0's binary_logloss: 0.1363
[15]	valid_0's binary_logloss: 0.124469
[16]	valid_0's binary_logloss: 0.113847
[17]	valid_0's binary_logloss: 0.104383
[18]	valid_0's binary_logloss: 0.0959088
[19]	valid_0's binary_logloss: 0.0883923
[20]	valid_0's binary_logloss: 0.0816111
[21]	valid_0's binary_logloss: 0.0755919
[22]	valid_0's binary_logloss: 0.0701202
[23]	valid_0's binary_logloss: 0.0652363
[24]	

[100]	valid_0's binary_logloss: 0.0138244
Train Accuracy score:  0.9974295101581085
Test Accuracy score:  0.9954011814850818
[1]	valid_0's binary_logloss: 0.56822
[2]	valid_0's binary_logloss: 0.495308
[3]	valid_0's binary_logloss: 0.435298
[4]	valid_0's binary_logloss: 0.384989
[5]	valid_0's binary_logloss: 0.342362
[6]	valid_0's binary_logloss: 0.305732
[7]	valid_0's binary_logloss: 0.273994
[8]	valid_0's binary_logloss: 0.24603
[9]	valid_0's binary_logloss: 0.221626
[10]	valid_0's binary_logloss: 0.200157
[11]	valid_0's binary_logloss: 0.181107
[12]	valid_0's binary_logloss: 0.164201
[13]	valid_0's binary_logloss: 0.149303
[14]	valid_0's binary_logloss: 0.135918
[15]	valid_0's binary_logloss: 0.124043
[16]	valid_0's binary_logloss: 0.113374
[17]	valid_0's binary_logloss: 0.103849
[18]	valid_0's binary_logloss: 0.0953535
[19]	valid_0's binary_logloss: 0.0877686
[20]	valid_0's binary_logloss: 0.0808586
[21]	valid_0's binary_logloss: 0.0747878
[22]	valid_0's binary_logloss: 0.0694072
[

[100]	valid_0's binary_logloss: 0.0129811
Train Accuracy score:  0.9974762581320571
Test Accuracy score:  0.9957936849189584
[1]	valid_0's binary_logloss: 0.568043
[2]	valid_0's binary_logloss: 0.494973
[3]	valid_0's binary_logloss: 0.435017
[4]	valid_0's binary_logloss: 0.384764
[5]	valid_0's binary_logloss: 0.34204
[6]	valid_0's binary_logloss: 0.305384
[7]	valid_0's binary_logloss: 0.273442
[8]	valid_0's binary_logloss: 0.245423
[9]	valid_0's binary_logloss: 0.220919
[10]	valid_0's binary_logloss: 0.199302
[11]	valid_0's binary_logloss: 0.180287
[12]	valid_0's binary_logloss: 0.163419
[13]	valid_0's binary_logloss: 0.148537
[14]	valid_0's binary_logloss: 0.135167
[15]	valid_0's binary_logloss: 0.123308
[16]	valid_0's binary_logloss: 0.112713
[17]	valid_0's binary_logloss: 0.103142
[18]	valid_0's binary_logloss: 0.0946842
[19]	valid_0's binary_logloss: 0.0871064
[20]	valid_0's binary_logloss: 0.0801972
[21]	valid_0's binary_logloss: 0.0741187
[22]	valid_0's binary_logloss: 0.0685872


Train Accuracy score:  0.9974341837760028
Test Accuracy score:  0.9957189860166006
[1]	valid_0's binary_logloss: 0.5682
[2]	valid_0's binary_logloss: 0.495369
[3]	valid_0's binary_logloss: 0.435436
[4]	valid_0's binary_logloss: 0.385127
[5]	valid_0's binary_logloss: 0.342585
[6]	valid_0's binary_logloss: 0.306021
[7]	valid_0's binary_logloss: 0.274362
[8]	valid_0's binary_logloss: 0.246477
[9]	valid_0's binary_logloss: 0.222159
[10]	valid_0's binary_logloss: 0.200614
[11]	valid_0's binary_logloss: 0.181702
[12]	valid_0's binary_logloss: 0.164866
[13]	valid_0's binary_logloss: 0.149905
[14]	valid_0's binary_logloss: 0.136701
[15]	valid_0's binary_logloss: 0.124803
[16]	valid_0's binary_logloss: 0.114143
[17]	valid_0's binary_logloss: 0.104544
[18]	valid_0's binary_logloss: 0.0960448
[19]	valid_0's binary_logloss: 0.0883948
[20]	valid_0's binary_logloss: 0.0814859
[21]	valid_0's binary_logloss: 0.0753052
[22]	valid_0's binary_logloss: 0.0699287
[23]	valid_0's binary_logloss: 0.0649501
[2

Train Accuracy score:  0.997476246337052
Test Accuracy score:  0.9953637927166679
[1]	valid_0's binary_logloss: 0.568137
[2]	valid_0's binary_logloss: 0.495162
[3]	valid_0's binary_logloss: 0.435229
[4]	valid_0's binary_logloss: 0.384867
[5]	valid_0's binary_logloss: 0.34227
[6]	valid_0's binary_logloss: 0.305225
[7]	valid_0's binary_logloss: 0.273222
[8]	valid_0's binary_logloss: 0.245399
[9]	valid_0's binary_logloss: 0.220921
[10]	valid_0's binary_logloss: 0.199456
[11]	valid_0's binary_logloss: 0.180536
[12]	valid_0's binary_logloss: 0.163786
[13]	valid_0's binary_logloss: 0.148889
[14]	valid_0's binary_logloss: 0.135614
[15]	valid_0's binary_logloss: 0.123768
[16]	valid_0's binary_logloss: 0.113124
[17]	valid_0's binary_logloss: 0.103618
[18]	valid_0's binary_logloss: 0.0951612
[19]	valid_0's binary_logloss: 0.0875897
[20]	valid_0's binary_logloss: 0.0807274
[21]	valid_0's binary_logloss: 0.0746916
[22]	valid_0's binary_logloss: 0.0690872
[23]	valid_0's binary_logloss: 0.0640986
[2

[99]	valid_0's binary_logloss: 0.0131624
[100]	valid_0's binary_logloss: 0.0131336
Train Accuracy score:  0.997490267190735
Test Accuracy score:  0.9957002916323936
[1]	valid_0's binary_logloss: 0.568249
[2]	valid_0's binary_logloss: 0.495266
[3]	valid_0's binary_logloss: 0.435287
[4]	valid_0's binary_logloss: 0.385073
[5]	valid_0's binary_logloss: 0.342313
[6]	valid_0's binary_logloss: 0.305245
[7]	valid_0's binary_logloss: 0.273483
[8]	valid_0's binary_logloss: 0.245581
[9]	valid_0's binary_logloss: 0.221236
[10]	valid_0's binary_logloss: 0.199808
[11]	valid_0's binary_logloss: 0.180958
[12]	valid_0's binary_logloss: 0.164007
[13]	valid_0's binary_logloss: 0.149028
[14]	valid_0's binary_logloss: 0.135666
[15]	valid_0's binary_logloss: 0.123775
[16]	valid_0's binary_logloss: 0.113168
[17]	valid_0's binary_logloss: 0.103672
[18]	valid_0's binary_logloss: 0.0951919
[19]	valid_0's binary_logloss: 0.087695
[20]	valid_0's binary_logloss: 0.0809028
[21]	valid_0's binary_logloss: 0.0747053
[

[100]	valid_0's binary_logloss: 0.0128413
Train Accuracy score:  0.9974108156865311
Test Accuracy score:  0.9957189860166006
[1]	valid_0's binary_logloss: 0.568088
[2]	valid_0's binary_logloss: 0.49502
[3]	valid_0's binary_logloss: 0.434976
[4]	valid_0's binary_logloss: 0.384474
[5]	valid_0's binary_logloss: 0.341783
[6]	valid_0's binary_logloss: 0.305205
[7]	valid_0's binary_logloss: 0.273616
[8]	valid_0's binary_logloss: 0.245606
[9]	valid_0's binary_logloss: 0.221274
[10]	valid_0's binary_logloss: 0.199723
[11]	valid_0's binary_logloss: 0.180757
[12]	valid_0's binary_logloss: 0.163946
[13]	valid_0's binary_logloss: 0.149169
[14]	valid_0's binary_logloss: 0.13587
[15]	valid_0's binary_logloss: 0.123998
[16]	valid_0's binary_logloss: 0.113373
[17]	valid_0's binary_logloss: 0.103884
[18]	valid_0's binary_logloss: 0.095431
[19]	valid_0's binary_logloss: 0.087875
[20]	valid_0's binary_logloss: 0.0811709
[21]	valid_0's binary_logloss: 0.0749471
[22]	valid_0's binary_logloss: 0.0694224
[23

Train Accuracy score:  0.9974482165557467
Test Accuracy score:  0.9957562954515713


In [58]:
print("Training Score: ",np.mean(train_score))
print("Test Score: ",np.mean(test_score))

Training Score:  0.9974068922934345
Test Score:  0.995701397491665


In [59]:
predictions = pd.DataFrame(sub_list)

In [61]:
predictions.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,127351,127352,127353,127354,127355,127356,127357,127358,127359,127360
0,0,0,0,1,0,0,1,0,1,0,...,0,1,0,0,1,0,1,0,0,0
1,0,0,0,1,0,0,1,0,1,0,...,0,1,0,0,1,0,1,0,0,0
2,0,0,0,1,0,0,1,0,1,0,...,0,1,0,0,1,0,1,0,0,0


In [None]:
#predictions = np.where(np.mean(predictions,axis=0)> 0.45, 1, 0)

In [62]:
predictions1 = predictions.T

In [63]:
predictions1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127356,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
127357,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
127358,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
127359,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
