In [25]:
import pandas as pd
import numpy as np
import os
import csv
from datetime import datetime 

#plot
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
%matplotlib inline

#modle
import time
from sklearn.svm import OneClassSVM
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score,accuracy_score
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report
from sklearn.metrics import matthews_corrcoef,cohen_kappa_score,balanced_accuracy_score

In [28]:
folder_path = './dataset/'
dfs = []  # DataFrame


for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        dfs.append(df)

# merge DataFrame
data = pd.concat(dfs, ignore_index=True)

# preprocessing 

In [29]:
data

Unnamed: 0,datetime,latitude,longitude,pressure,salinity,temperature,label
0,-1.727837,-0.525536,-0.258923,2.746968,0.520970,-1.427503,0
1,-1.727837,-0.525536,-0.258923,2.547744,0.485044,-1.407491,0
2,-1.727837,-0.525536,-0.258923,2.375234,0.449119,-1.388803,0
3,-1.727837,-0.525536,-0.258923,2.207051,0.416187,-1.368130,0
4,-1.727837,-0.525536,-0.258923,2.049026,0.383256,-1.354899,0
...,...,...,...,...,...,...,...
1252904,2.761838,1.380304,-4.159339,-1.288014,0.886180,0.349335,0
1252905,2.761838,1.380304,-4.159339,-1.291476,0.886180,0.348190,0
1252906,2.761838,1.380304,-4.159339,-1.294650,0.886180,0.348190,0
1252907,2.761838,1.380304,-4.159339,-1.297535,0.886180,0.348190,0


In [30]:
#compute error ratio
anomaly_data=data.loc[data['label'] == 1]
anomaly_ratio=len(anomaly_data)/len(data)*100
anomaly_ratio

6.939610139283858

In [31]:
#check
data.isna().sum()

datetime       0
latitude       0
longitude      0
pressure       0
salinity       0
temperature    0
label          0
dtype: int64

In [32]:
len(dfs)

11

In [33]:
for i, df in enumerate(dfs):
    globals()[f'df_{i+1}'] = df.copy()

In [34]:
df_1

Unnamed: 0,datetime,latitude,longitude,pressure,salinity,temperature,label
0,-1.727837,-0.525536,-0.258923,2.746968,0.520970,-1.427503,0
1,-1.727837,-0.525536,-0.258923,2.547744,0.485044,-1.407491,0
2,-1.727837,-0.525536,-0.258923,2.375234,0.449119,-1.388803,0
3,-1.727837,-0.525536,-0.258923,2.207051,0.416187,-1.368130,0
4,-1.727837,-0.525536,-0.258923,2.049026,0.383256,-1.354899,0
...,...,...,...,...,...,...,...
29152,1.728131,0.670699,-0.383230,-0.969808,0.413194,0.857274,0
29153,1.728131,0.670699,-0.383230,-0.982601,0.383256,0.872324,0
29154,1.728131,0.670699,-0.383230,-0.996334,0.386250,0.874474,0
29155,1.728131,0.670699,-0.383230,-1.009503,0.386250,0.877451,0


In [35]:
df_11

Unnamed: 0,datetime,latitude,longitude,pressure,salinity,temperature,label
0,-1.450859,-0.617060,0.244396,1.121664,-1.174308,-0.929796,0
1,-1.450859,-0.617060,0.244396,1.116182,-1.163079,-0.929796,0
2,-1.450859,-0.617060,0.244396,1.110123,-1.151850,-0.925216,0
3,-1.450859,-0.617060,0.244396,1.104642,-1.146236,-0.924643,0
4,-1.450859,-0.617060,0.244396,1.099160,-1.129393,-0.918345,0
...,...,...,...,...,...,...,...
317431,2.761838,1.380304,-4.159339,-1.288014,0.886180,0.349335,0
317432,2.761838,1.380304,-4.159339,-1.291476,0.886180,0.348190,0
317433,2.761838,1.380304,-4.159339,-1.294650,0.886180,0.348190,0
317434,2.761838,1.380304,-4.159339,-1.297535,0.886180,0.348190,0


# splite train and test

In [36]:
#split the data and make sure the proportion in train and test set are equal
def getRandomSplit(data, size):
    from sklearn.model_selection import StratifiedShuffleSplit
    from sklearn.model_selection import train_test_split
    #train_set, test_set = train_test_split(data, test_size = size,random_state = 42)
    split = StratifiedShuffleSplit(n_splits = 1,test_size = size,random_state = 42)

    for train_index,test_index in split.split(data,data.iloc[:,-1]):
        train_set = data.iloc[train_index,:]
        test_set = data.iloc[test_index,:]

    print(len(train_set),len(test_set))
    return train_set,test_set

In [37]:
#split the last 10% data 

# window
def sliding_window(data, window_size):
    # from last raw
    for i in range(len(data)-1, window_size-2, -10):
        data_slice = data[i-window_size+1:i+1]
        ratio = (data_slice["label"] == 1).sum() / window_size*100
        #print(ratio)
        if anomaly_ratio*0.85 <= ratio <= anomaly_ratio*1.15:
            #print(data_slice)
            return data_slice


def getLastSplit(data, size):
    data.sort_values("datetime", ascending=True, inplace=True)
    #print(data)
    row,col=data.shape
    window_size = int(size*row)
    test_set = sliding_window(data, window_size)
    time = test_set.iloc[0,0]
    #print(time)
    train_set = data[data['datetime'] < time]
    print(len(train_set),len(test_set))
    return train_set,test_set
# windows keep the same rate of anomaly

In [38]:
train_set1,test_set1 = getRandomSplit(data,0.1)
train_set2,test_set2 = getLastSplit(data,0.1)

1127618 125291
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.587516960651289
1.58751696065

1.7255966158512253
1.7255966158512253
1.7255966158512253
1.7255966158512253
1.7255966158512253
1.7255966158512253
1.7263947641471786
1.7335780988107592
1.7407614334743395
1.7407614334743395
1.7407614334743395
1.7407614334743395
1.741559581770293
1.741559581770293
1.741559581770293
1.7431558783621997
1.7511373613217334
1.7591188442812675
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7671003272408015
1.7694947721286616
1.7694947721286616
1.7694947721286616
1.7694947721286616
1.7694947721286616
1.7694947721286616
1.770292920424615
1.771091068720568
1.771091068720568
1.771091068720568
1.771091068720568
1.771091068720568
1.771091068720568
1.7

2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0145262989863517
2.0161225955782585
2.0241040785377926
2.031287413201373
2.0320855614973263
2.0328837097932797
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.0344800063851864
2.03448000638

2.2372096735573472
2.2372096735573472
2.2372096735573472
2.2372096735573472
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2388059701492535
2.2404022667411603
2.2404022667411603
2.2412004150371136
2.2412004150371136
2.2412004150

2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652566046
2.5923856652

2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.7975097773166255
2.798307925612579
2.798307925612579
2.798307925612579
2.798307925612579
2.798307925612579
2.798307925612579
2.798307925612579
2.803096815388299
2.8110782983478333
2.81746348471546
2.8198579296033204
2.8198579296033204
2.8238486710830872
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.829435709154761
2.8294357091

2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.932396839332748
2.9331949876287013
2.9331949876287013
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.9339931359246547
2.934791284220608
2.941974618884189
2.9483598052518154
2.9515523984356293
2.9571394365073034
2.96113017798707
2.96352462287493
2.9699098092425573
2.9699098092425573
2.9699098092425573
2.9699098092425573
2.9699098092425573
2.9699098092425573
2.9699098092425573
2.9699098092425573
2.969909809242557

3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627823446
3.1790246627

3.427248782823849
3.427248782823849
3.427248782823849
3.427248782823849
3.427248782823849
3.427248782823849
3.427248782823849
3.427248782823849
3.427248782823849
3.426450634527895
3.425652486231942
3.417671003272408
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.4128821134966874
3.413680261792641
3.4168728549764547
3.4168728549764547
3.4224598930481283
3.4304413760076624
3.438422858967196
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.4440098970388697
3.44400

3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6132173357809885
3.6108228908931275
3.606832149413361
3.606832149413361
3.606832149413361
3.606832149413361
3.606832149413361
3.606832149413361
3.606832149413361
3.606832149413361
3.606832149413361
3.606832149413361
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.6060340011174077
3.607630297709315
3.6132173357809885
3.606832149413361
3.6100247425971745
3.614813632372895
3.6227

3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.749700694389017
3.744911804613297
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.742517359725437
3.74251735

3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.948439620081411
3.94843962

5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.748264027456301
5.745869582568441
5.737888099608908
5.735493654721047
5.730704764945327
5.722723281985793
5.7147417990262595
5.709952909250539
5.708356612658632
5.708356612658632
5.701971426291005
5.693989943331471
5.686008460371937
5.6844121637800304
5.6844121637800304
5.6844121637800304
5.6844121637800304
5.6844121637800304
5.6836140154840775
5.682815867188124
5.682815867188124
5.682815867188124
5.682815867188124
5.682815867188124
5.688402905259797
5.6963843882193315
5.697182536515284
5.697980684811238
5.697980684811238
5.697980684811238
5.697980684811238
5.697980684811238
5.697980684811238
5.703567722882911
5.711549205842446
5.719530688801979
5.725117726873654
5.

In [39]:
#keep the same train size
train_set1 = train_set1.sample(n=len(train_set2))
len(train_set1)

1030712

In [40]:
instance = test_set1[(test_set1['label']==1)]
rate=len(instance)/len(test_set1)*100
print(rate)

6.939844043067739


In [41]:
instance = test_set2[(test_set2['label']==1)]
rate=len(instance)/len(test_set2)*100
print(rate)

5.901508500279352


# supervised

In [42]:
# metric
def computeMetric(y_tru,y_pre):
    acc = accuracy_score(y_tru,y_pre)
    pre = precision_score(y_tru,y_pre)
    recall=recall_score(y_tru,y_pre)
    cm=confusion_matrix(y_tru,y_pre)
    f1 = f1_score(y_tru,y_pre)
    mcc=matthews_corrcoef(y_tru, y_pre)
    kappa=cohen_kappa_score(y_tru, y_pre)
    bac=balanced_accuracy_score(y_tru,y_pre)
    print("acc:",acc)
    print("balanced acc:",bac)
    print("precision:",pre)
    print("recall:",recall)
    print("cm:",cm)
    print("f1:",f1)
    print("MCC:", mcc)
    print("Kappa:",kappa)
    metric=[pre,recall,f1]
    return metric
    
    
    # confusion matrix
    cmap1 = sns.diverging_palette(260,-10,s=50, l=75, n=5, as_cmap=True)
    plt.subplots(figsize=(12,8))
    cf_matrix = confusion_matrix(y_tru, y_pre)
    sns.heatmap(cf_matrix/np.sum(cf_matrix), cmap = cmap1, annot = True, annot_kws = {'size':15})

In [43]:
def classSuper(train_set,test_set):
    x,y=data.shape
    score=[]
    models=[]
    # fit model and predict
    
    
    ##lightgbm
    print("lightgbm")
    clf1 = lgb.LGBMClassifier()
    clf1 = clf1.fit(train_set.iloc[:,0:y-1],train_set.iloc[:,y-1])
    y_pre1=clf1.predict(test_set.iloc[:,0:y-1])
    y_tru1=test_set.iloc[:,y-1]
    metric1=computeMetric(y_tru1,y_pre1)
    
    
    ## GradientBoosting
    print("GradientBoosting")
    from sklearn.ensemble import GradientBoostingClassifier
    clf2 = GradientBoostingClassifier()
    clf2.fit(train_set.iloc[:,0:y-1],train_set.iloc[:,y-1])
    y_pre2 = clf2.predict(test_set.iloc[:,0:y-1])
    metric2=computeMetric(y_tru1,y_pre2)
    
    ##catboost
    print("catboost")
    clf3 = CatBoostClassifier(loss_function='Logloss')
    clf3.fit(train_set.iloc[:,0:y-1],train_set.iloc[:,y-1])
    y_pre3 = clf3.predict(test_set.iloc[:,0:y-1])
    metric3=computeMetric(y_tru1,y_pre3)

    
    
    #KNN
    print("KNN")
    from sklearn.neighbors import KNeighborsClassifier
    clf4 = KNeighborsClassifier()
    dic4={'n_neighbors':[1,2,3,4,5,6,7,8,9]}
    #para11=getPar(clf11,dic11,p_set,10)
    clf4 = KNeighborsClassifier(n_jobs=-1)
    clf4.fit(train_set.iloc[:,0:y-1],train_set.iloc[:,y-1])

    y_pre4=clf4.predict(test_set.iloc[:,0:y-1])
    metric4=computeMetric(y_tru1,y_pre4)
    metrics = [metric1, metric2, metric3, metric4]
    return metrics
    

In [44]:
metrics1=classSuper(train_set1,test_set1)

lightgbm
acc: 0.9982121620866623
balanced acc: 0.9912166660080899
precision: 0.991072463768116
recall: 0.9830937320299022
cm: [[116519     77]
 [   147   8548]]
f1: 0.987066974595843
MCC: 0.9861159920502729
Kappa: 0.9861066978707577
GradientBoosting
acc: 0.9965999153969559
balanced acc: 0.9780043147863693
precision: 0.994380007174459
recall: 0.956411730879816
cm: [[116549     47]
 [   379   8316]]
f1: 0.9750263805838904
MCC: 0.973415197513034
Kappa: 0.9732028984378841
catboost
Learning rate set to 0.199317
0:	learn: 0.3474916	total: 158ms	remaining: 2m 38s
1:	learn: 0.1950704	total: 239ms	remaining: 1m 59s
2:	learn: 0.1337258	total: 315ms	remaining: 1m 44s
3:	learn: 0.0919467	total: 400ms	remaining: 1m 39s
4:	learn: 0.0730382	total: 482ms	remaining: 1m 35s
5:	learn: 0.0605283	total: 548ms	remaining: 1m 30s
6:	learn: 0.0526871	total: 616ms	remaining: 1m 27s
7:	learn: 0.0449021	total: 694ms	remaining: 1m 26s
8:	learn: 0.0385732	total: 757ms	remaining: 1m 23s
9:	learn: 0.0354250	total: 83

151:	learn: 0.0069295	total: 11s	remaining: 1m 1s
152:	learn: 0.0069087	total: 11.1s	remaining: 1m 1s
153:	learn: 0.0068936	total: 11.2s	remaining: 1m 1s
154:	learn: 0.0068843	total: 11.3s	remaining: 1m 1s
155:	learn: 0.0068754	total: 11.3s	remaining: 1m 1s
156:	learn: 0.0068700	total: 11.4s	remaining: 1m 1s
157:	learn: 0.0068669	total: 11.5s	remaining: 1m 1s
158:	learn: 0.0068657	total: 11.6s	remaining: 1m 1s
159:	learn: 0.0068572	total: 11.6s	remaining: 1m 1s
160:	learn: 0.0068469	total: 11.7s	remaining: 1m 1s
161:	learn: 0.0068426	total: 11.8s	remaining: 1m
162:	learn: 0.0068362	total: 11.9s	remaining: 1m
163:	learn: 0.0068203	total: 11.9s	remaining: 1m
164:	learn: 0.0067961	total: 12s	remaining: 1m
165:	learn: 0.0067892	total: 12.1s	remaining: 1m
166:	learn: 0.0067892	total: 12.1s	remaining: 1m
167:	learn: 0.0067892	total: 12.2s	remaining: 1m
168:	learn: 0.0067839	total: 12.3s	remaining: 1m
169:	learn: 0.0067829	total: 12.3s	remaining: 1m
170:	learn: 0.0067721	total: 12.4s	remainin

311:	learn: 0.0065394	total: 19.9s	remaining: 43.9s
312:	learn: 0.0065394	total: 20s	remaining: 43.8s
313:	learn: 0.0065394	total: 20s	remaining: 43.7s
314:	learn: 0.0065394	total: 20.1s	remaining: 43.6s
315:	learn: 0.0065394	total: 20.1s	remaining: 43.5s
316:	learn: 0.0065394	total: 20.2s	remaining: 43.5s
317:	learn: 0.0065394	total: 20.2s	remaining: 43.4s
318:	learn: 0.0065394	total: 20.3s	remaining: 43.3s
319:	learn: 0.0065394	total: 20.3s	remaining: 43.2s
320:	learn: 0.0065394	total: 20.4s	remaining: 43.1s
321:	learn: 0.0065394	total: 20.4s	remaining: 43s
322:	learn: 0.0065394	total: 20.5s	remaining: 42.9s
323:	learn: 0.0065394	total: 20.5s	remaining: 42.9s
324:	learn: 0.0065394	total: 20.6s	remaining: 42.8s
325:	learn: 0.0065394	total: 20.7s	remaining: 42.7s
326:	learn: 0.0065322	total: 20.7s	remaining: 42.7s
327:	learn: 0.0065250	total: 20.8s	remaining: 42.6s
328:	learn: 0.0065150	total: 20.9s	remaining: 42.5s
329:	learn: 0.0065107	total: 20.9s	remaining: 42.5s
330:	learn: 0.0064

471:	learn: 0.0063431	total: 28.6s	remaining: 32s
472:	learn: 0.0063431	total: 28.7s	remaining: 31.9s
473:	learn: 0.0063431	total: 28.7s	remaining: 31.9s
474:	learn: 0.0063431	total: 28.8s	remaining: 31.8s
475:	learn: 0.0063431	total: 28.8s	remaining: 31.7s
476:	learn: 0.0063431	total: 28.9s	remaining: 31.7s
477:	learn: 0.0063431	total: 28.9s	remaining: 31.6s
478:	learn: 0.0063431	total: 29s	remaining: 31.5s
479:	learn: 0.0063431	total: 29s	remaining: 31.4s
480:	learn: 0.0063431	total: 29.1s	remaining: 31.4s
481:	learn: 0.0063431	total: 29.1s	remaining: 31.3s
482:	learn: 0.0063431	total: 29.2s	remaining: 31.2s
483:	learn: 0.0063431	total: 29.2s	remaining: 31.2s
484:	learn: 0.0063431	total: 29.3s	remaining: 31.1s
485:	learn: 0.0063431	total: 29.3s	remaining: 31s
486:	learn: 0.0063431	total: 29.4s	remaining: 31s
487:	learn: 0.0063431	total: 29.5s	remaining: 30.9s
488:	learn: 0.0063431	total: 29.5s	remaining: 30.8s
489:	learn: 0.0063431	total: 29.6s	remaining: 30.8s
490:	learn: 0.0063431	

631:	learn: 0.0063340	total: 36.1s	remaining: 21s
632:	learn: 0.0063340	total: 36.2s	remaining: 21s
633:	learn: 0.0063340	total: 36.2s	remaining: 20.9s
634:	learn: 0.0063340	total: 36.2s	remaining: 20.8s
635:	learn: 0.0063340	total: 36.3s	remaining: 20.8s
636:	learn: 0.0063340	total: 36.3s	remaining: 20.7s
637:	learn: 0.0063340	total: 36.4s	remaining: 20.6s
638:	learn: 0.0063340	total: 36.4s	remaining: 20.6s
639:	learn: 0.0063340	total: 36.5s	remaining: 20.5s
640:	learn: 0.0063340	total: 36.5s	remaining: 20.4s
641:	learn: 0.0063340	total: 36.6s	remaining: 20.4s
642:	learn: 0.0063340	total: 36.6s	remaining: 20.3s
643:	learn: 0.0063340	total: 36.6s	remaining: 20.3s
644:	learn: 0.0063340	total: 36.7s	remaining: 20.2s
645:	learn: 0.0063340	total: 36.7s	remaining: 20.1s
646:	learn: 0.0063340	total: 36.8s	remaining: 20.1s
647:	learn: 0.0063340	total: 36.8s	remaining: 20s
648:	learn: 0.0063340	total: 36.9s	remaining: 19.9s
649:	learn: 0.0063340	total: 36.9s	remaining: 19.9s
650:	learn: 0.0063

790:	learn: 0.0063248	total: 43.8s	remaining: 11.6s
791:	learn: 0.0063248	total: 43.9s	remaining: 11.5s
792:	learn: 0.0063248	total: 43.9s	remaining: 11.5s
793:	learn: 0.0063248	total: 44s	remaining: 11.4s
794:	learn: 0.0063248	total: 44s	remaining: 11.4s
795:	learn: 0.0063248	total: 44.1s	remaining: 11.3s
796:	learn: 0.0063248	total: 44.1s	remaining: 11.2s
797:	learn: 0.0063248	total: 44.2s	remaining: 11.2s
798:	learn: 0.0063248	total: 44.2s	remaining: 11.1s
799:	learn: 0.0063248	total: 44.3s	remaining: 11.1s
800:	learn: 0.0063248	total: 44.3s	remaining: 11s
801:	learn: 0.0063248	total: 44.4s	remaining: 11s
802:	learn: 0.0063248	total: 44.4s	remaining: 10.9s
803:	learn: 0.0063248	total: 44.5s	remaining: 10.8s
804:	learn: 0.0063248	total: 44.6s	remaining: 10.8s
805:	learn: 0.0063248	total: 44.6s	remaining: 10.7s
806:	learn: 0.0063248	total: 44.7s	remaining: 10.7s
807:	learn: 0.0063248	total: 44.7s	remaining: 10.6s
808:	learn: 0.0063248	total: 44.8s	remaining: 10.6s
809:	learn: 0.006324

950:	learn: 0.0063248	total: 52s	remaining: 2.68s
951:	learn: 0.0063248	total: 52s	remaining: 2.62s
952:	learn: 0.0063248	total: 52s	remaining: 2.57s
953:	learn: 0.0063248	total: 52.1s	remaining: 2.51s
954:	learn: 0.0063248	total: 52.1s	remaining: 2.46s
955:	learn: 0.0063248	total: 52.2s	remaining: 2.4s
956:	learn: 0.0063248	total: 52.2s	remaining: 2.35s
957:	learn: 0.0063248	total: 52.3s	remaining: 2.29s
958:	learn: 0.0063248	total: 52.3s	remaining: 2.24s
959:	learn: 0.0063248	total: 52.4s	remaining: 2.18s
960:	learn: 0.0063248	total: 52.4s	remaining: 2.13s
961:	learn: 0.0063248	total: 52.5s	remaining: 2.07s
962:	learn: 0.0063248	total: 52.5s	remaining: 2.02s
963:	learn: 0.0063248	total: 52.5s	remaining: 1.96s
964:	learn: 0.0063248	total: 52.6s	remaining: 1.91s
965:	learn: 0.0063248	total: 52.6s	remaining: 1.85s
966:	learn: 0.0063248	total: 52.7s	remaining: 1.8s
967:	learn: 0.0063248	total: 52.7s	remaining: 1.74s
968:	learn: 0.0063248	total: 52.8s	remaining: 1.69s
969:	learn: 0.006324

In [45]:
metrics1

[[0.991072463768116, 0.9830937320299022, 0.987066974595843],
 [0.994380007174459, 0.956411730879816, 0.9750263805838904],
 [0.9969700501107097, 0.9838987924094307, 0.9903912942810836],
 [0.9920921037329922, 0.9811385853939045, 0.9865849427547126]]

In [51]:
train_set22,test_set22=getRandomSplit(train_set2, 0.13)
train_set22

896719 133993


Unnamed: 0,datetime,latitude,longitude,pressure,salinity,temperature,label
109808,-0.542553,-0.939366,-0.194388,0.141589,0.712054,-0.725296,0
718577,-0.367039,0.495542,1.114812,0.123602,0.980504,-0.505567,1
1139011,0.245823,-0.477607,0.082319,-0.536432,1.178129,0.666542,0
837220,-1.559574,0.660886,-1.364975,-0.447471,-0.780539,0.790271,0
126979,0.921607,-0.918877,-0.326422,0.631437,0.848241,-0.798081,0
...,...,...,...,...,...,...,...
904813,0.714793,0.606526,0.949442,2.997420,1.433479,-1.294401,0
605492,-0.567880,0.702072,0.335692,1.642069,0.644231,-1.078659,0
1045082,-0.532770,-0.229959,0.607982,1.463554,-1.337126,-0.995642,0
273718,-1.243217,-0.835997,-1.562090,-0.274166,1.176303,0.902356,0


In [52]:
instance = test_set22[(test_set22['label']==1)]
rate=len(instance)/len(test_set22)*100
print(rate)

7.623532572597076


In [53]:
metrics3=classSuper(train_set22,test_set2)

lightgbm
acc: 0.9240162822252375
balanced acc: 0.7692277954562778
precision: 0.4025307170364937
recall: 0.5937246416012983
cm: [[111380   6516]
 [  3004   4390]]
f1: 0.47978142076502733
MCC: 0.4501152866531991
Kappa: 0.4404201117084271
GradientBoosting
acc: 0.9663341048766861
balanced acc: 0.7645864886551397
precision: 0.8344566133108677
recall: 0.5358398701649987
cm: [[117110    786]
 [  3432   3962]]
f1: 0.6526107725251195
MCC: 0.6530739566048634
Kappa: 0.6358013884596886
catboost
Learning rate set to 0.18781
0:	learn: 0.3808596	total: 82.6ms	remaining: 1m 22s
1:	learn: 0.2182577	total: 140ms	remaining: 1m 9s
2:	learn: 0.1481522	total: 191ms	remaining: 1m 3s
3:	learn: 0.1017503	total: 242ms	remaining: 1m
4:	learn: 0.0734599	total: 297ms	remaining: 59s
5:	learn: 0.0571694	total: 360ms	remaining: 59.6s
6:	learn: 0.0455613	total: 414ms	remaining: 58.8s
7:	learn: 0.0393450	total: 465ms	remaining: 57.6s
8:	learn: 0.0362531	total: 522ms	remaining: 57.5s
9:	learn: 0.0322501	total: 579ms	rem

153:	learn: 0.0067346	total: 8.82s	remaining: 48.4s
154:	learn: 0.0067235	total: 8.88s	remaining: 48.4s
155:	learn: 0.0067027	total: 8.93s	remaining: 48.3s
156:	learn: 0.0066864	total: 8.98s	remaining: 48.2s
157:	learn: 0.0066812	total: 9.03s	remaining: 48.1s
158:	learn: 0.0066737	total: 9.08s	remaining: 48s
159:	learn: 0.0066679	total: 9.13s	remaining: 47.9s
160:	learn: 0.0066625	total: 9.18s	remaining: 47.8s
161:	learn: 0.0066514	total: 9.23s	remaining: 47.8s
162:	learn: 0.0066319	total: 9.28s	remaining: 47.7s
163:	learn: 0.0066181	total: 9.34s	remaining: 47.6s
164:	learn: 0.0066080	total: 9.38s	remaining: 47.5s
165:	learn: 0.0065997	total: 9.43s	remaining: 47.4s
166:	learn: 0.0065928	total: 9.49s	remaining: 47.3s
167:	learn: 0.0065804	total: 9.54s	remaining: 47.2s
168:	learn: 0.0065735	total: 9.59s	remaining: 47.2s
169:	learn: 0.0065685	total: 9.64s	remaining: 47.1s
170:	learn: 0.0065614	total: 9.7s	remaining: 47s
171:	learn: 0.0065570	total: 9.75s	remaining: 46.9s
172:	learn: 0.006

316:	learn: 0.0055050	total: 17.8s	remaining: 38.4s
317:	learn: 0.0054967	total: 17.9s	remaining: 38.3s
318:	learn: 0.0054914	total: 17.9s	remaining: 38.3s
319:	learn: 0.0054839	total: 18s	remaining: 38.2s
320:	learn: 0.0054779	total: 18s	remaining: 38.1s
321:	learn: 0.0054757	total: 18.1s	remaining: 38.1s
322:	learn: 0.0054740	total: 18.1s	remaining: 38s
323:	learn: 0.0054660	total: 18.2s	remaining: 38s
324:	learn: 0.0054589	total: 18.3s	remaining: 37.9s
325:	learn: 0.0054509	total: 18.3s	remaining: 37.9s
326:	learn: 0.0054509	total: 18.4s	remaining: 37.8s
327:	learn: 0.0054501	total: 18.4s	remaining: 37.7s
328:	learn: 0.0054405	total: 18.4s	remaining: 37.6s
329:	learn: 0.0054382	total: 18.5s	remaining: 37.6s
330:	learn: 0.0054303	total: 18.6s	remaining: 37.5s
331:	learn: 0.0054303	total: 18.6s	remaining: 37.4s
332:	learn: 0.0054303	total: 18.6s	remaining: 37.3s
333:	learn: 0.0054303	total: 18.7s	remaining: 37.2s
334:	learn: 0.0054303	total: 18.7s	remaining: 37.1s
335:	learn: 0.005430

479:	learn: 0.0054269	total: 25.7s	remaining: 27.8s
480:	learn: 0.0054269	total: 25.7s	remaining: 27.8s
481:	learn: 0.0054269	total: 25.8s	remaining: 27.7s
482:	learn: 0.0054269	total: 25.8s	remaining: 27.7s
483:	learn: 0.0054269	total: 25.9s	remaining: 27.6s
484:	learn: 0.0054269	total: 26s	remaining: 27.6s
485:	learn: 0.0054269	total: 26s	remaining: 27.5s
486:	learn: 0.0054269	total: 26s	remaining: 27.4s
487:	learn: 0.0054269	total: 26.1s	remaining: 27.4s
488:	learn: 0.0054269	total: 26.1s	remaining: 27.3s
489:	learn: 0.0054269	total: 26.2s	remaining: 27.3s
490:	learn: 0.0054269	total: 26.2s	remaining: 27.2s
491:	learn: 0.0054269	total: 26.3s	remaining: 27.1s
492:	learn: 0.0054269	total: 26.3s	remaining: 27.1s
493:	learn: 0.0054269	total: 26.4s	remaining: 27s
494:	learn: 0.0054269	total: 26.4s	remaining: 26.9s
495:	learn: 0.0054269	total: 26.4s	remaining: 26.9s
496:	learn: 0.0054269	total: 26.5s	remaining: 26.8s
497:	learn: 0.0054269	total: 26.5s	remaining: 26.7s
498:	learn: 0.005426

640:	learn: 0.0054269	total: 33.4s	remaining: 18.7s
641:	learn: 0.0054269	total: 33.4s	remaining: 18.6s
642:	learn: 0.0054269	total: 33.5s	remaining: 18.6s
643:	learn: 0.0054269	total: 33.5s	remaining: 18.5s
644:	learn: 0.0054269	total: 33.6s	remaining: 18.5s
645:	learn: 0.0054269	total: 33.7s	remaining: 18.5s
646:	learn: 0.0054269	total: 33.8s	remaining: 18.4s
647:	learn: 0.0054269	total: 33.9s	remaining: 18.4s
648:	learn: 0.0054269	total: 33.9s	remaining: 18.3s
649:	learn: 0.0054269	total: 34s	remaining: 18.3s
650:	learn: 0.0054269	total: 34.1s	remaining: 18.3s
651:	learn: 0.0054269	total: 34.1s	remaining: 18.2s
652:	learn: 0.0054269	total: 34.2s	remaining: 18.2s
653:	learn: 0.0054269	total: 34.2s	remaining: 18.1s
654:	learn: 0.0054269	total: 34.3s	remaining: 18.1s
655:	learn: 0.0054269	total: 34.3s	remaining: 18s
656:	learn: 0.0054269	total: 34.4s	remaining: 18s
657:	learn: 0.0054269	total: 34.4s	remaining: 17.9s
658:	learn: 0.0054269	total: 34.5s	remaining: 17.8s
659:	learn: 0.0054

802:	learn: 0.0054269	total: 41s	remaining: 10.1s
803:	learn: 0.0054269	total: 41.1s	remaining: 10s
804:	learn: 0.0054269	total: 41.1s	remaining: 9.96s
805:	learn: 0.0054269	total: 41.2s	remaining: 9.91s
806:	learn: 0.0054269	total: 41.2s	remaining: 9.85s
807:	learn: 0.0054269	total: 41.2s	remaining: 9.8s
808:	learn: 0.0054269	total: 41.3s	remaining: 9.75s
809:	learn: 0.0054269	total: 41.3s	remaining: 9.69s
810:	learn: 0.0054269	total: 41.4s	remaining: 9.64s
811:	learn: 0.0054269	total: 41.4s	remaining: 9.59s
812:	learn: 0.0054269	total: 41.4s	remaining: 9.53s
813:	learn: 0.0054269	total: 41.5s	remaining: 9.48s
814:	learn: 0.0054269	total: 41.5s	remaining: 9.42s
815:	learn: 0.0054269	total: 41.6s	remaining: 9.37s
816:	learn: 0.0054269	total: 41.6s	remaining: 9.32s
817:	learn: 0.0054269	total: 41.6s	remaining: 9.26s
818:	learn: 0.0054269	total: 41.7s	remaining: 9.21s
819:	learn: 0.0054269	total: 41.7s	remaining: 9.16s
820:	learn: 0.0054269	total: 41.8s	remaining: 9.11s
821:	learn: 0.005

963:	learn: 0.0054269	total: 48.5s	remaining: 1.81s
964:	learn: 0.0054269	total: 48.5s	remaining: 1.76s
965:	learn: 0.0054269	total: 48.6s	remaining: 1.71s
966:	learn: 0.0054269	total: 48.6s	remaining: 1.66s
967:	learn: 0.0054269	total: 48.7s	remaining: 1.61s
968:	learn: 0.0054269	total: 48.7s	remaining: 1.56s
969:	learn: 0.0054269	total: 48.7s	remaining: 1.51s
970:	learn: 0.0054269	total: 48.8s	remaining: 1.46s
971:	learn: 0.0054269	total: 48.8s	remaining: 1.41s
972:	learn: 0.0054269	total: 48.9s	remaining: 1.35s
973:	learn: 0.0054269	total: 48.9s	remaining: 1.3s
974:	learn: 0.0054269	total: 48.9s	remaining: 1.25s
975:	learn: 0.0054269	total: 49s	remaining: 1.2s
976:	learn: 0.0054269	total: 49s	remaining: 1.15s
977:	learn: 0.0054269	total: 49.1s	remaining: 1.1s
978:	learn: 0.0054269	total: 49.1s	remaining: 1.05s
979:	learn: 0.0054269	total: 49.1s	remaining: 1s
980:	learn: 0.0054269	total: 49.2s	remaining: 953ms
981:	learn: 0.0054269	total: 49.2s	remaining: 902ms
982:	learn: 0.0054269	

In [54]:
metrics3

[[0.4025307170364937, 0.5937246416012983, 0.47978142076502733],
 [0.8344566133108677, 0.5358398701649987, 0.6526107725251195],
 [0.8078866124052765, 0.7786042737354611, 0.7929752066115703],
 [0.15691649761500082, 0.7741411955639708, 0.2609409190371991]]

In [55]:
metrics4=classSuper(train_set22,test_set22)

lightgbm
acc: 0.9980596001283649
balanced acc: 0.9914051657402176
precision: 0.9909261268369661
recall: 0.983553597650514
cm: [[123686     92]
 [   168  10047]]
f1: 0.9872260980642625
MCC: 0.9861842201473545
Kappa: 0.9861761688260677
GradientBoosting
acc: 0.9965222063839155
balanced acc: 0.9797501700164617
precision: 0.9942208253067019
recall: 0.9599608418991679
cm: [[123721     57]
 [   409   9806]]
f1: 0.9767905169837633
MCC: 0.9750865248135848
Kappa: 0.9749114119327092
catboost
Learning rate set to 0.18781
0:	learn: 0.3808596	total: 62.7ms	remaining: 1m 2s
1:	learn: 0.2182577	total: 130ms	remaining: 1m 4s
2:	learn: 0.1481522	total: 183ms	remaining: 1m
3:	learn: 0.1017503	total: 235ms	remaining: 58.4s
4:	learn: 0.0734599	total: 300ms	remaining: 59.8s
5:	learn: 0.0571694	total: 361ms	remaining: 59.7s
6:	learn: 0.0455613	total: 413ms	remaining: 58.6s
7:	learn: 0.0393450	total: 463ms	remaining: 57.4s
8:	learn: 0.0362531	total: 517ms	remaining: 56.9s
9:	learn: 0.0322501	total: 578ms	rema

153:	learn: 0.0067346	total: 8.53s	remaining: 46.9s
154:	learn: 0.0067235	total: 8.59s	remaining: 46.8s
155:	learn: 0.0067027	total: 8.64s	remaining: 46.7s
156:	learn: 0.0066864	total: 8.69s	remaining: 46.7s
157:	learn: 0.0066812	total: 8.74s	remaining: 46.6s
158:	learn: 0.0066737	total: 8.79s	remaining: 46.5s
159:	learn: 0.0066679	total: 8.84s	remaining: 46.4s
160:	learn: 0.0066625	total: 8.89s	remaining: 46.3s
161:	learn: 0.0066514	total: 8.94s	remaining: 46.3s
162:	learn: 0.0066319	total: 8.99s	remaining: 46.2s
163:	learn: 0.0066181	total: 9.05s	remaining: 46.1s
164:	learn: 0.0066080	total: 9.1s	remaining: 46s
165:	learn: 0.0065997	total: 9.15s	remaining: 46s
166:	learn: 0.0065928	total: 9.2s	remaining: 45.9s
167:	learn: 0.0065804	total: 9.25s	remaining: 45.8s
168:	learn: 0.0065735	total: 9.31s	remaining: 45.8s
169:	learn: 0.0065685	total: 9.36s	remaining: 45.7s
170:	learn: 0.0065614	total: 9.42s	remaining: 45.7s
171:	learn: 0.0065570	total: 9.48s	remaining: 45.6s
172:	learn: 0.0065

312:	learn: 0.0055269	total: 16.7s	remaining: 36.6s
313:	learn: 0.0055269	total: 16.7s	remaining: 36.5s
314:	learn: 0.0055179	total: 16.8s	remaining: 36.5s
315:	learn: 0.0055120	total: 16.8s	remaining: 36.4s
316:	learn: 0.0055050	total: 16.9s	remaining: 36.3s
317:	learn: 0.0054967	total: 16.9s	remaining: 36.3s
318:	learn: 0.0054914	total: 17s	remaining: 36.2s
319:	learn: 0.0054839	total: 17s	remaining: 36.2s
320:	learn: 0.0054779	total: 17.1s	remaining: 36.1s
321:	learn: 0.0054757	total: 17.1s	remaining: 36s
322:	learn: 0.0054740	total: 17.2s	remaining: 36s
323:	learn: 0.0054660	total: 17.2s	remaining: 35.9s
324:	learn: 0.0054589	total: 17.3s	remaining: 35.9s
325:	learn: 0.0054509	total: 17.3s	remaining: 35.8s
326:	learn: 0.0054509	total: 17.4s	remaining: 35.7s
327:	learn: 0.0054501	total: 17.4s	remaining: 35.7s
328:	learn: 0.0054405	total: 17.5s	remaining: 35.6s
329:	learn: 0.0054382	total: 17.5s	remaining: 35.6s
330:	learn: 0.0054303	total: 17.6s	remaining: 35.5s
331:	learn: 0.005430

474:	learn: 0.0054269	total: 23.2s	remaining: 25.6s
475:	learn: 0.0054269	total: 23.2s	remaining: 25.6s
476:	learn: 0.0054269	total: 23.3s	remaining: 25.5s
477:	learn: 0.0054269	total: 23.3s	remaining: 25.5s
478:	learn: 0.0054269	total: 23.4s	remaining: 25.4s
479:	learn: 0.0054269	total: 23.4s	remaining: 25.3s
480:	learn: 0.0054269	total: 23.4s	remaining: 25.3s
481:	learn: 0.0054269	total: 23.5s	remaining: 25.2s
482:	learn: 0.0054269	total: 23.5s	remaining: 25.2s
483:	learn: 0.0054269	total: 23.5s	remaining: 25.1s
484:	learn: 0.0054269	total: 23.6s	remaining: 25s
485:	learn: 0.0054269	total: 23.6s	remaining: 25s
486:	learn: 0.0054269	total: 23.7s	remaining: 24.9s
487:	learn: 0.0054269	total: 23.7s	remaining: 24.9s
488:	learn: 0.0054269	total: 23.7s	remaining: 24.8s
489:	learn: 0.0054269	total: 23.8s	remaining: 24.7s
490:	learn: 0.0054269	total: 23.8s	remaining: 24.7s
491:	learn: 0.0054269	total: 23.9s	remaining: 24.6s
492:	learn: 0.0054269	total: 23.9s	remaining: 24.6s
493:	learn: 0.00

635:	learn: 0.0054269	total: 29.5s	remaining: 16.9s
636:	learn: 0.0054269	total: 29.5s	remaining: 16.8s
637:	learn: 0.0054269	total: 29.5s	remaining: 16.8s
638:	learn: 0.0054269	total: 29.6s	remaining: 16.7s
639:	learn: 0.0054269	total: 29.6s	remaining: 16.7s
640:	learn: 0.0054269	total: 29.7s	remaining: 16.6s
641:	learn: 0.0054269	total: 29.7s	remaining: 16.6s
642:	learn: 0.0054269	total: 29.8s	remaining: 16.5s
643:	learn: 0.0054269	total: 29.8s	remaining: 16.5s
644:	learn: 0.0054269	total: 29.8s	remaining: 16.4s
645:	learn: 0.0054269	total: 29.9s	remaining: 16.4s
646:	learn: 0.0054269	total: 29.9s	remaining: 16.3s
647:	learn: 0.0054269	total: 30s	remaining: 16.3s
648:	learn: 0.0054269	total: 30s	remaining: 16.2s
649:	learn: 0.0054269	total: 30s	remaining: 16.2s
650:	learn: 0.0054269	total: 30.1s	remaining: 16.1s
651:	learn: 0.0054269	total: 30.1s	remaining: 16.1s
652:	learn: 0.0054269	total: 30.2s	remaining: 16s
653:	learn: 0.0054269	total: 30.2s	remaining: 16s
654:	learn: 0.0054269	

794:	learn: 0.0054269	total: 35.7s	remaining: 9.21s
795:	learn: 0.0054269	total: 35.7s	remaining: 9.16s
796:	learn: 0.0054269	total: 35.8s	remaining: 9.11s
797:	learn: 0.0054269	total: 35.8s	remaining: 9.07s
798:	learn: 0.0054269	total: 35.9s	remaining: 9.02s
799:	learn: 0.0054269	total: 35.9s	remaining: 8.97s
800:	learn: 0.0054269	total: 35.9s	remaining: 8.93s
801:	learn: 0.0054269	total: 36s	remaining: 8.88s
802:	learn: 0.0054269	total: 36s	remaining: 8.84s
803:	learn: 0.0054269	total: 36.1s	remaining: 8.79s
804:	learn: 0.0054269	total: 36.1s	remaining: 8.74s
805:	learn: 0.0054269	total: 36.1s	remaining: 8.7s
806:	learn: 0.0054269	total: 36.2s	remaining: 8.65s
807:	learn: 0.0054269	total: 36.2s	remaining: 8.6s
808:	learn: 0.0054269	total: 36.2s	remaining: 8.56s
809:	learn: 0.0054269	total: 36.3s	remaining: 8.51s
810:	learn: 0.0054269	total: 36.3s	remaining: 8.46s
811:	learn: 0.0054269	total: 36.4s	remaining: 8.42s
812:	learn: 0.0054269	total: 36.4s	remaining: 8.37s
813:	learn: 0.0054

958:	learn: 0.0054269	total: 42.2s	remaining: 1.8s
959:	learn: 0.0054269	total: 42.2s	remaining: 1.76s
960:	learn: 0.0054269	total: 42.3s	remaining: 1.71s
961:	learn: 0.0054269	total: 42.3s	remaining: 1.67s
962:	learn: 0.0054269	total: 42.3s	remaining: 1.63s
963:	learn: 0.0054269	total: 42.4s	remaining: 1.58s
964:	learn: 0.0054269	total: 42.4s	remaining: 1.54s
965:	learn: 0.0054269	total: 42.4s	remaining: 1.49s
966:	learn: 0.0054269	total: 42.5s	remaining: 1.45s
967:	learn: 0.0054269	total: 42.5s	remaining: 1.41s
968:	learn: 0.0054269	total: 42.6s	remaining: 1.36s
969:	learn: 0.0054269	total: 42.6s	remaining: 1.32s
970:	learn: 0.0054269	total: 42.6s	remaining: 1.27s
971:	learn: 0.0054269	total: 42.7s	remaining: 1.23s
972:	learn: 0.0054269	total: 42.7s	remaining: 1.19s
973:	learn: 0.0054269	total: 42.8s	remaining: 1.14s
974:	learn: 0.0054269	total: 42.8s	remaining: 1.1s
975:	learn: 0.0054269	total: 42.8s	remaining: 1.05s
976:	learn: 0.0054269	total: 42.9s	remaining: 1.01s
977:	learn: 0.

In [56]:
metrics4

[[0.9909261268369661, 0.983553597650514, 0.9872260980642625],
 [0.9942208253067019, 0.9599608418991679, 0.9767905169837633],
 [0.9965329370975731, 0.9848262359275575, 0.9906450024618414],
 [0.9935483870967742, 0.9799314733235438, 0.9866929521931986]]

# for each test

In [None]:
train_df_1,test_df_1=getRandomSplit(df,0.1)
train_set2,test_set2 = getLastSplit(data,0.1)

In [None]:
train_set1 = train_set1.sample(n=len(train_set2))