In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from performance_measures import performance_measures
import timeit
from imblearn.over_sampling import RandomOverSampler

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier 
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [3]:
FetalHealth = pd.read_csv(r"source\fetal_health.csv")

Analysis consists of :

1. Count plot
2. Correlation heat map

Data Preprocessing :

1. Scaling
2. X,y and test, train division

In [4]:
X = FetalHealth.drop(['fetal_health'], axis = 1)
y = FetalHealth['fetal_health']

In [5]:
col_names = list(X.columns)
# Three different scaling methods

#s_scaler = preprocessing.StandardScaler()
#s_scaler = preprocessing.MinMaxScaler()
#s_scaler = preprocessing.RobustScaler()
#s_scaler = preprocessing.MaxAbsScaler()
#s_scaler = preprocessing.Normalizer()
#s_scaler = preprocessing.QuantileTransformer()

scaler_name = "Unnormalized"

X_df = X

X_df = pd.DataFrame(X_df, columns=col_names)   
X_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
baseline value,2126.0,133.303857,9.840844,106.0,126.0,133.0,140.0,160.0
accelerations,2126.0,0.003178,0.003866,0.0,0.0,0.002,0.006,0.019
fetal_movement,2126.0,0.009481,0.046666,0.0,0.0,0.0,0.003,0.481
uterine_contractions,2126.0,0.004366,0.002946,0.0,0.002,0.004,0.007,0.015
light_decelerations,2126.0,0.001889,0.00296,0.0,0.0,0.0,0.003,0.015
severe_decelerations,2126.0,3e-06,5.7e-05,0.0,0.0,0.0,0.0,0.001
prolongued_decelerations,2126.0,0.000159,0.00059,0.0,0.0,0.0,0.0,0.005
abnormal_short_term_variability,2126.0,46.990122,17.192814,12.0,32.0,49.0,61.0,87.0
mean_value_of_short_term_variability,2126.0,1.332785,0.883241,0.2,0.7,1.2,1.7,7.0
percentage_of_time_with_abnormal_long_term_variability,2126.0,9.84666,18.39688,0.0,0.0,0.0,11.0,91.0


In [6]:
X_train, X_test, y_train,y_test = train_test_split(X_df,y,test_size=0.3,random_state=42)

In [7]:
ros = RandomOverSampler(random_state=42)
X_train,y_train = ros.fit_resample(X_train,y_train)

In [8]:
performanceMeasuresDF = pd.DataFrame(columns=['Classifier Model', 'Accuracy','F1-Score','Recall','Precision','Jaccard-Score','Kappa-Score'])

In [9]:
lr = LogisticRegression(max_iter = 100000)
result = %timeit -o -n 5 -r 5 lr.fit(X=X_train, y=y_train) 
lr.fit(X = X_train, y = y_train)
measures = performance_measures("Logistic Regression", lr, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time = result.average

7.1 s ± 183 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.7836990595611285
F1 : 0.8052068394905271
Recall : 0.7836990595611285
Precision : 0.8587796889901756
Jaccard : 0.6898744364520928
Kappa : 0.5318700950679514


In [10]:
svmlinear = SVC(kernel = 'linear')
result = %timeit -o -n 5 -r 5 svmlinear.fit(X=X_train, y = y_train)
svmlinear.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Linear)", svmlinear, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

31.8 s ± 4.73 s per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.786833855799373
F1 : 0.808564239301457
Recall : 0.786833855799373
Precision : 0.8620195824290652
Jaccard : 0.6958038475399838
Kappa : 0.5382740619728503


In [11]:
svmrbf = SVC(kernel = 'rbf')
result = %timeit -o -n 5 -r 5 svmrbf.fit(X=X_train, y = y_train)
svmrbf.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(RBF)", svmrbf, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

473 ms ± 4.1 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8040752351097179
F1 : 0.8215023540200517
Recall : 0.8040752351097179
Precision : 0.8692294078373486
Jaccard : 0.7089542383771368
Kappa : 0.5714692559416661


Two variants which haven't been tried before

In [12]:
svmpoly = SVC(kernel = 'poly')
result = %timeit -o -n 5 -r 5 svmpoly.fit(X=X_train, y = y_train)
svmpoly.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Poly)",svmpoly, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

371 ms ± 5.17 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.7946708463949843
F1 : 0.8144198955903447
Recall : 0.7946708463949843
Precision : 0.865755743211786
Jaccard : 0.7009406254906687
Kappa : 0.5541698227945334


In [13]:
svmsigmoid = SVC(kernel = 'sigmoid')
result = %timeit -o -n 5 -r 5 svmsigmoid.fit(X=X_train, y = y_train)
svmsigmoid.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Sigmoid)",svmsigmoid, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

1 s ± 7.13 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.15830721003134796
F1 : 0.16164092801306398
Recall : 0.15830721003134796
Precision : 0.7512433668546041
Jaccard : 0.08814372934991296
Kappa : -0.00809166235111336


In [14]:
knn = KNeighborsClassifier(n_neighbors=5)
result = %timeit -o -n 5 -r 5 knn.fit(X=X_train, y = y_train) 
knn.fit(X=X_train, y = y_train)
measures = performance_measures("K-Nearest Neighbors",knn, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

3.31 ms ± 358 μs per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.835423197492163
F1 : 0.8464569522900693
Recall : 0.835423197492163
Precision : 0.8706680819907945
Jaccard : 0.7455669364049478
Kappa : 0.6087741121642694


In [15]:
gnb = GaussianNB()
result = %timeit -o -n 5 -r 5 gnb.fit(X_train, y_train)
gnb.fit(X_train, y_train)
measures = performance_measures("Naive Bayes",gnb, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

11.4 ms ± 2.4 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.7633228840125392
F1 : 0.787325927249344
Recall : 0.7633228840125392
Precision : 0.8606987574723375
Jaccard : 0.6620772872259199
Kappa : 0.5119062094682232


In [16]:
dt = DecisionTreeClassifier()
result = %timeit -o -n 5 -r 5 dt.fit(X_train, y_train)
dt.fit(X_train, y_train)
measures = performance_measures("Decision Tree",dt, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

35.6 ms ± 5.2 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.9200626959247649
F1 : 0.9194400708977777
Recall : 0.9200626959247649
Precision : 0.9191279938791931
Jaccard : 0.8568797923982472
Kappa : 0.7809419938600743


In [17]:
rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o -n 5 -r 5 rfc.fit(X_train, y_train)
rfc.fit(X_train, y_train)
measures = performance_measures("Random Forest",rfc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

631 ms ± 29.3 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.9373040752351097
F1 : 0.9373326249297671
Recall : 0.9373040752351097
Precision : 0.9376395149699887
Jaccard : 0.88668851725909
Kappa : 0.8300660558278287


In [18]:
abc = AdaBoostClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o -n 5 -r 5 abc.fit(X_train, y_train)
abc.fit(X_train, y_train)
measures = performance_measures("Ada Boost",abc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

704 ms ± 35.8 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8871473354231975
F1 : 0.8922203194307321
Recall : 0.8871473354231975
Precision : 0.9037482268486758
Jaccard : 0.8123084124727266
Kappa : 0.7184448762189628


In [19]:
gbc = GradientBoostingClassifier(n_estimators = 100, random_state = 42, learning_rate = 1.0, max_depth = 1)
result = %timeit -o -n 5 -r 5 gbc.fit(X_train, y_train)
gbc.fit(X_train, y_train)
measures = performance_measures("Gradient Boost",gbc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

1.55 s ± 119 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8824451410658307
F1 : 0.889478732362703
Recall : 0.8824451410658307
Precision : 0.9123061851569174
Jaccard : 0.8067182439299724
Kappa : 0.7229556034183284


In [20]:
xgb = XGBClassifier(use_label_encoder = False, eval_metric = 'mlogloss')
result = %timeit -o -n 5 -r 5 xgb.fit(X_train, y_train - 1)
xgb.fit(X_train, y_train - 1)
measures = performance_measures("XG Boost",xgb, X_test, y_test - 1, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



712 ms ± 75 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Parameters: { "use_label_encoder" } are not used.



Accuracy : 0.9545454545454546
F1 : 0.9544474452495844
Recall : 0.9545454545454546
Precision : 0.954673114614897
Jaccard : 0.9150425054580357
Kappa : 0.8764729838898124


In [21]:
lgbm = LGBMClassifier()
result = %timeit -o -n 5 -r 5 lgbm.fit(X_train, y_train)
lgbm.fit(X_train, y_train)
measures = performance_measures("LightGBM",lgbm, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002116 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000435 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001343 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001433 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001315 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000712 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000433 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001476 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001317 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001329 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001357 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000458 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001338 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001298 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000462 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000918 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001471 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002725 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000894 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000959 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001482 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000772 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001406 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


561 ms ± 38 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001394 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1262
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


Accuracy : 0.9561128526645768
F1 : 0.9555792458398668
Recall : 0.9561128526645768
Precision : 0.9554740609172917
Jaccard : 0.9172395981761186
Kappa : 0.8783404614672151


In [22]:
performanceMeasuresDF["Weighted_Score"] = performanceMeasuresDF["Accuracy"] + performanceMeasuresDF["F1-Score"] + performanceMeasuresDF["Recall"] + performanceMeasuresDF["Precision"] + performanceMeasuresDF["Jaccard-Score"] + performanceMeasuresDF["Kappa-Score"]

In [23]:
performanceTable = performanceMeasuresDF.sort_values(by = "Weighted_Score",ascending= False)

In [24]:
performanceTable.to_excel("scripts/new_results/"+ scaler_name + '.xlsx', index = False)

In [25]:
with open(r"scripts/new_results/" + scaler_name + r" Timings.txt", "+a") as file:
    file.write(str(exec_time) + "\n")