In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from performance_measures import performance_measures
import timeit
from imblearn.over_sampling import RandomOverSampler

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier 
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [3]:
FetalHealth = pd.read_csv(r"source\fetal_health.csv")

Analysis consists of :

1. Count plot
2. Correlation heat map

Data Preprocessing :

1. Scaling
2. X,y and test, train division

In [4]:
X = FetalHealth.drop(['fetal_health'], axis = 1)
y = FetalHealth['fetal_health']

In [5]:
col_names = list(X.columns)
# Three different scaling methods

#s_scaler = preprocessing.StandardScaler()
#s_scaler = preprocessing.MinMaxScaler()
#s_scaler = preprocessing.RobustScaler()
#s_scaler = preprocessing.MaxAbsScaler()
#s_scaler = preprocessing.Normalizer()
s_scaler = preprocessing.QuantileTransformer()

scaler_name = "Quantile Transformer"
noscale = False
if(noscale):
    X_df = X
else:
    X_df= s_scaler.fit_transform(X)

X_df = pd.DataFrame(X_df, columns=col_names)   
X_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
baseline value,2126.0,0.499974,0.288711,0.0,0.245746,0.496496,0.735235,1.0
accelerations,2126.0,0.411646,0.372958,0.0,0.0,0.525526,0.772272,1.0
fetal_movement,2126.0,0.309915,0.399023,0.0,0.0,0.0,0.767267,1.0
uterine_contractions,2126.0,0.487896,0.30579,0.0,0.249249,0.443944,0.797798,1.0
light_decelerations,2126.0,0.332402,0.39772,0.0,0.0,0.0,0.737738,1.0
severe_decelerations,2126.0,0.003293,0.0573,0.0,0.0,0.0,0.0,1.0
prolongued_decelerations,2126.0,0.080242,0.265596,0.0,0.0,0.0,0.0,1.0
abnormal_short_term_variability,2126.0,0.499999,0.288802,0.0,0.248749,0.503504,0.748248,1.0
mean_value_of_short_term_variability,2126.0,0.499758,0.288987,0.0,0.255756,0.513013,0.741241,1.0
percentage_of_time_with_abnormal_long_term_variability,2126.0,0.329969,0.398106,0.0,0.0,0.0,0.752252,1.0


In [6]:
X_train, X_test, y_train,y_test = train_test_split(X_df,y,test_size=0.3,random_state=42)

In [7]:
ros = RandomOverSampler(random_state=42)
X_train,y_train = ros.fit_resample(X_train,y_train)

In [8]:
performanceMeasuresDF = pd.DataFrame(columns=['Classifier Model', 'Accuracy','F1-Score','Recall','Precision','Jaccard-Score','Kappa-Score'])

In [9]:
lr = LogisticRegression(max_iter = 100000)
result = %timeit -o -n 5 -r 5 lr.fit(X=X_train, y=y_train) 
lr.fit(X = X_train, y = y_train)
measures = performance_measures("Logistic Regression", lr, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time = result.average

115 ms ± 4.27 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.799373040752351
F1 : 0.8211757581308344
Recall : 0.799373040752351
Precision : 0.8745049209354433
Jaccard : 0.7121302265067361
Kappa : 0.5657694639116055


In [10]:
svmlinear = SVC(kernel = 'linear')
result = %timeit -o -n 5 -r 5 svmlinear.fit(X=X_train, y = y_train)
svmlinear.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Linear)", svmlinear, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

251 ms ± 4.58 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8087774294670846
F1 : 0.8276172305210416
Recall : 0.8087774294670846
Precision : 0.8712274567537726
Jaccard : 0.7231933742040757
Kappa : 0.5743084344202225


In [11]:
svmrbf = SVC(kernel = 'rbf')
result = %timeit -o -n 5 -r 5 svmrbf.fit(X=X_train, y = y_train)
svmrbf.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(RBF)", svmrbf, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

269 ms ± 21.4 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.85423197492163
F1 : 0.8658701173109076
Recall : 0.85423197492163
Precision : 0.8918567077055578
Jaccard : 0.7775490628463335
Kappa : 0.6571795049573599


Two variants which haven't been tried before

In [12]:
svmpoly = SVC(kernel = 'poly')
result = %timeit -o -n 5 -r 5 svmpoly.fit(X=X_train, y = y_train)
svmpoly.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Poly)",svmpoly, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

217 ms ± 1.8 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8761755485893417
F1 : 0.8821209495621278
Recall : 0.8761755485893417
Precision : 0.8933290760839968
Jaccard : 0.8039326789816233
Kappa : 0.689137514185622


In [13]:
svmsigmoid = SVC(kernel = 'sigmoid')
result = %timeit -o -n 5 -r 5 svmsigmoid.fit(X=X_train, y = y_train)
svmsigmoid.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Sigmoid)",svmsigmoid, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

766 ms ± 66 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.329153605015674
F1 : 0.407072716983754
Recall : 0.329153605015674
Precision : 0.6952136234633696
Jaccard : 0.2600865028641128
Kappa : 0.047448947555691956


In [14]:
knn = KNeighborsClassifier(n_neighbors=5)
result = %timeit -o -n 5 -r 5 knn.fit(X=X_train, y = y_train) 
knn.fit(X=X_train, y = y_train)
measures = performance_measures("K-Nearest Neighbors",knn, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

2.74 ms ± 398 μs per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8307210031347962
F1 : 0.8441423502718315
Recall : 0.8307210031347962
Precision : 0.8716908499460924
Jaccard : 0.7471289522570472
Kappa : 0.6032041278195922


In [15]:
gnb = GaussianNB()
result = %timeit -o -n 5 -r 5 gnb.fit(X_train, y_train)
gnb.fit(X_train, y_train)
measures = performance_measures("Naive Bayes",gnb, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

4.16 ms ± 252 μs per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.6598746081504702
F1 : 0.6973158449808338
Recall : 0.6598746081504702
Precision : 0.8537542319074346
Jaccard : 0.5443481871191831
Kappa : 0.3884921002999102


In [16]:
dt = DecisionTreeClassifier()
result = %timeit -o -n 5 -r 5 dt.fit(X_train, y_train)
dt.fit(X_train, y_train)
measures = performance_measures("Decision Tree",dt, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

20.6 ms ± 764 μs per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.9231974921630094
F1 : 0.9226885170304304
Recall : 0.9231974921630094
Precision : 0.9223537709247842
Jaccard : 0.8622403190625623
Kappa : 0.7894474527869824


In [17]:
rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o -n 5 -r 5 rfc.fit(X_train, y_train)
rfc.fit(X_train, y_train)
measures = performance_measures("Random Forest",rfc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

549 ms ± 10.8 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.9341692789968652
F1 : 0.9336789071948222
Recall : 0.9341692789968652
Precision : 0.9336802525506525
Jaccard : 0.8808512066923518
Kappa : 0.819672131147541


In [18]:
abc = AdaBoostClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o -n 5 -r 5 abc.fit(X_train, y_train)
abc.fit(X_train, y_train)
measures = performance_measures("Ada Boost",abc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

608 ms ± 20 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8871473354231975
F1 : 0.8922203194307321
Recall : 0.8871473354231975
Precision : 0.9037482268486758
Jaccard : 0.8123084124727266
Kappa : 0.7184448762189628


In [19]:
gbc = GradientBoostingClassifier(n_estimators = 100, random_state = 42, learning_rate = 1.0, max_depth = 1)
result = %timeit -o -n 5 -r 5 gbc.fit(X_train, y_train)
gbc.fit(X_train, y_train)
measures = performance_measures("Gradient Boost",gbc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

1.31 s ± 74.5 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8824451410658307
F1 : 0.889478732362703
Recall : 0.8824451410658307
Precision : 0.9123061851569174
Jaccard : 0.8067182439299724
Kappa : 0.7229556034183284


In [20]:
xgb = XGBClassifier(use_label_encoder = False, eval_metric = 'mlogloss')
result = %timeit -o -n 5 -r 5 xgb.fit(X_train, y_train - 1)
xgb.fit(X_train, y_train - 1)
measures = performance_measures("XG Boost",xgb, X_test, y_test - 1, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



582 ms ± 7.85 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Parameters: { "use_label_encoder" } are not used.



Accuracy : 0.9545454545454546
F1 : 0.9544474452495844
Recall : 0.9545454545454546
Precision : 0.954673114614897
Jaccard : 0.9150425054580357
Kappa : 0.8764729838898124


In [21]:
lgbm = LGBMClassifier()
result = %timeit -o -n 5 -r 5 lgbm.fit(X_train, y_train)
lgbm.fit(X_train, y_train)
measures = performance_measures("LightGBM",lgbm, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000890 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000394 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001165 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000392 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001173 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000386 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000697 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000401 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000764 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000408 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000390 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000397 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000822 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000434 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001181 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000355 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001024 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000377 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


495 ms ± 26.8 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000383 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


Accuracy : 0.9561128526645768
F1 : 0.9555792458398668
Recall : 0.9561128526645768
Precision : 0.9554740609172917
Jaccard : 0.9172395981761186
Kappa : 0.8783404614672151


In [22]:
performanceMeasuresDF["Weighted_Score"] = performanceMeasuresDF["Accuracy"] + performanceMeasuresDF["F1-Score"] + performanceMeasuresDF["Recall"] + performanceMeasuresDF["Precision"] + performanceMeasuresDF["Jaccard-Score"] + performanceMeasuresDF["Kappa-Score"]

In [23]:
performanceTable = performanceMeasuresDF.sort_values(by = "Weighted_Score",ascending= False)

In [24]:
performanceTable.to_excel("scripts/new_results/"+ scaler_name + '.xlsx', index = False)

In [25]:
with open(r"scripts/new_results/" + scaler_name + r" Timings.txt", "+a") as file:
    file.write(str(exec_time) + "\n")