In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from performance_measures import performance_measures
import timeit
from imblearn.over_sampling import RandomOverSampler

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier 
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [3]:
FetalHealth = pd.read_csv(r"source\fetal_health.csv")

Analysis consists of :

1. Count plot
2. Correlation heat map

Data Preprocessing :

1. Scaling
2. X,y and test, train division

In [4]:
X = FetalHealth.drop(['fetal_health'], axis = 1)
y = FetalHealth['fetal_health']

In [5]:
col_names = list(X.columns)
# Three different scaling methods

#s_scaler = preprocessing.StandardScaler()
s_scaler = preprocessing.MinMaxScaler()
#s_scaler = preprocessing.RobustScaler()
#s_scaler = preprocessing.MaxAbsScaler()
#s_scaler = preprocessing.Normalizer()
#s_scaler = preprocessing.QuantileTransformer()

scaler_name = "MinMax Scaler"
noscale = False
if(noscale):
    X_df = X
else:
    X_df= s_scaler.fit_transform(X)

X_df = pd.DataFrame(X_df, columns=col_names)   
X_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
baseline value,2126.0,0.505627,0.182238,0.0,0.37037,0.5,0.62963,1.0
accelerations,2126.0,0.167277,0.203452,0.0,0.0,0.105263,0.315789,1.0
fetal_movement,2126.0,0.01971,0.097018,0.0,0.0,0.0,0.006237,1.0
uterine_contractions,2126.0,0.291094,0.196405,0.0,0.133333,0.266667,0.466667,1.0
light_decelerations,2126.0,0.125964,0.197347,0.0,0.0,0.0,0.2,1.0
severe_decelerations,2126.0,0.003293,0.0573,0.0,0.0,0.0,0.0,1.0
prolongued_decelerations,2126.0,0.031703,0.11799,0.0,0.0,0.0,0.0,1.0
abnormal_short_term_variability,2126.0,0.466535,0.229238,0.0,0.266667,0.493333,0.653333,1.0
mean_value_of_short_term_variability,2126.0,0.166586,0.129888,0.0,0.073529,0.147059,0.220588,1.0
percentage_of_time_with_abnormal_long_term_variability,2126.0,0.108205,0.202164,0.0,0.0,0.0,0.120879,1.0


In [6]:
X_train, X_test, y_train,y_test = train_test_split(X_df,y,test_size=0.3,random_state=42)

In [7]:
ros = RandomOverSampler(random_state=42)
X_train,y_train = ros.fit_resample(X_train,y_train)

In [8]:
performanceMeasuresDF = pd.DataFrame(columns=['Classifier Model', 'Accuracy','F1-Score','Recall','Precision','Jaccard-Score','Kappa-Score'])

In [9]:
lr = LogisticRegression(max_iter = 100000)
result = %timeit -o -n 5 -r 5 lr.fit(X=X_train, y=y_train) 
lr.fit(X = X_train, y = y_train)
measures = performance_measures("Logistic Regression", lr, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time = result.average

138 ms ± 4.8 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.8087774294670846
F1 : 0.8270736293766521
Recall : 0.8087774294670846
Precision : 0.8784964773992987
Jaccard : 0.7175013317318161
Kappa : 0.5851423881122049


In [10]:
svmlinear = SVC(kernel = 'linear')
result = %timeit -o -n 5 -r 5 svmlinear.fit(X=X_train, y = y_train)
svmlinear.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Linear)", svmlinear, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

252 ms ± 4.62 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8103448275862069
F1 : 0.8293138098337391
Recall : 0.8103448275862069
Precision : 0.881994333575948
Jaccard : 0.7213720825587152
Kappa : 0.5890138205668775


In [11]:
svmrbf = SVC(kernel = 'rbf')
result = %timeit -o -n 5 -r 5 svmrbf.fit(X=X_train, y = y_train)
svmrbf.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(RBF)", svmrbf, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

336 ms ± 74.3 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8557993730407524
F1 : 0.8687914819256769
Recall : 0.8557993730407524
Precision : 0.9064054771749809
Jaccard : 0.7777105378491422
Kappa : 0.6737044572673805


Two variants which haven't been tried before

In [12]:
svmpoly = SVC(kernel = 'poly')
result = %timeit -o -n 5 -r 5 svmpoly.fit(X=X_train, y = y_train)
svmpoly.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Poly)",svmpoly, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

219 ms ± 4.08 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8808777429467085
F1 : 0.8890960294684581
Recall : 0.8808777429467085
Precision : 0.9097445356496517
Jaccard : 0.8090003794098621
Kappa : 0.7141897188934931


In [13]:
svmsigmoid = SVC(kernel = 'sigmoid')
result = %timeit -o -n 5 -r 5 svmsigmoid.fit(X=X_train, y = y_train)
svmsigmoid.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Sigmoid)",svmsigmoid, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

897 ms ± 91.8 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.45768025078369906
F1 : 0.5240576224452486
Recall : 0.45768025078369906
Precision : 0.6807826030003284
Jaccard : 0.36607044723405247
Kappa : 0.03002447480237813


In [14]:
knn = KNeighborsClassifier(n_neighbors=5)
result = %timeit -o -n 5 -r 5 knn.fit(X=X_train, y = y_train) 
knn.fit(X=X_train, y = y_train)
measures = performance_measures("K-Nearest Neighbors",knn, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

3.35 ms ± 441 μs per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8667711598746082
F1 : 0.875611438525708
Recall : 0.8667711598746082
Precision : 0.8953476051687655
Jaccard : 0.7898556958072873
Kappa : 0.6791902556184595


In [15]:
gnb = GaussianNB()
result = %timeit -o -n 5 -r 5 gnb.fit(X_train, y_train)
gnb.fit(X_train, y_train)
measures = performance_measures("Naive Bayes",gnb, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

5.42 ms ± 985 μs per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.677115987460815
F1 : 0.7132858379901645
Recall : 0.677115987460815
Precision : 0.8556846660487221
Jaccard : 0.5637272958162386
Kappa : 0.40643389741713754


In [16]:
dt = DecisionTreeClassifier()
result = %timeit -o -n 5 -r 5 dt.fit(X_train, y_train)
dt.fit(X_train, y_train)
measures = performance_measures("Decision Tree",dt, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

23.2 ms ± 2.06 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.9247648902821317
F1 : 0.9243306467823504
Recall : 0.9247648902821317
Precision : 0.9241468552894992
Jaccard : 0.8645456118094799
Kappa : 0.7943745761460005


In [17]:
rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o -n 5 -r 5 rfc.fit(X_train, y_train)
rfc.fit(X_train, y_train)
measures = performance_measures("Random Forest",rfc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

645 ms ± 43.9 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.9373040752351097
F1 : 0.9373326249297671
Recall : 0.9373040752351097
Precision : 0.9376395149699887
Jaccard : 0.88668851725909
Kappa : 0.8300660558278287


In [18]:
abc = AdaBoostClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o -n 5 -r 5 abc.fit(X_train, y_train)
abc.fit(X_train, y_train)
measures = performance_measures("Ada Boost",abc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

667 ms ± 6.95 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8871473354231975
F1 : 0.8922203194307321
Recall : 0.8871473354231975
Precision : 0.9037482268486758
Jaccard : 0.8123084124727266
Kappa : 0.7184448762189628


In [19]:
gbc = GradientBoostingClassifier(n_estimators = 100, random_state = 42, learning_rate = 1.0, max_depth = 1)
result = %timeit -o -n 5 -r 5 gbc.fit(X_train, y_train)
gbc.fit(X_train, y_train)
measures = performance_measures("Gradient Boost",gbc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

1.45 s ± 88.9 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8824451410658307
F1 : 0.889478732362703
Recall : 0.8824451410658307
Precision : 0.9123061851569174
Jaccard : 0.8067182439299724
Kappa : 0.7229556034183284


In [20]:
xgb = XGBClassifier(use_label_encoder = False, eval_metric = 'mlogloss')
result = %timeit -o -n 5 -r 5 xgb.fit(X_train, y_train - 1)
xgb.fit(X_train, y_train - 1)
measures = performance_measures("XG Boost",xgb, X_test, y_test - 1, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



643 ms ± 4.81 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Parameters: { "use_label_encoder" } are not used.



Accuracy : 0.9545454545454546
F1 : 0.9544474452495844
Recall : 0.9545454545454546
Precision : 0.954673114614897
Jaccard : 0.9150425054580357
Kappa : 0.8764729838898124


In [21]:
lgbm = LGBMClassifier()
result = %timeit -o -n 5 -r 5 lgbm.fit(X_train, y_train)
lgbm.fit(X_train, y_train)
measures = performance_measures("LightGBM",lgbm, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000461 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001275 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001321 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000373 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000432 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001798 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000427 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001301 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000420 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000406 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000435 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001434 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000385 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000404 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001444 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001224 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001708 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000389 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000824 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001256 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001253 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001221 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000647 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


542 ms ± 24.9 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000714 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


Accuracy : 0.9561128526645768
F1 : 0.9555792458398668
Recall : 0.9561128526645768
Precision : 0.9554740609172917
Jaccard : 0.9172395981761186
Kappa : 0.8783404614672151


In [22]:
performanceMeasuresDF["Weighted_Score"] = performanceMeasuresDF["Accuracy"] + performanceMeasuresDF["F1-Score"] + performanceMeasuresDF["Recall"] + performanceMeasuresDF["Precision"] + performanceMeasuresDF["Jaccard-Score"] + performanceMeasuresDF["Kappa-Score"]

In [23]:
performanceTable = performanceMeasuresDF.sort_values(by = "Weighted_Score",ascending= False)

In [24]:
performanceTable.to_excel("scripts/new_results/"+ scaler_name + '.xlsx', index = False)

In [25]:
with open(r"scripts/new_results/" + scaler_name + r" Timings.txt", "+a") as file:
    file.write(str(exec_time) + "\n")