In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from performance_measures import performance_measures
import timeit
from imblearn.over_sampling import RandomOverSampler

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier 
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [3]:
FetalHealth = pd.read_csv(r"source\fetal_health.csv")

Analysis consists of :

1. Count plot
2. Correlation heat map

Data Preprocessing :

1. Scaling
2. X,y and test, train division

In [4]:
X = FetalHealth.drop(['fetal_health'], axis = 1)
y = FetalHealth['fetal_health']

In [5]:
col_names = list(X.columns)
# Three different scaling methods

s_scaler = preprocessing.StandardScaler()
#s_scaler = preprocessing.MinMaxScaler()
#s_scaler = preprocessing.RobustScaler()
#s_scaler = preprocessing.MaxAbsScaler()
#s_scaler = preprocessing.Normalizer()
#s_scaler = preprocessing.QuantileTransformer()

scaler_name = "Standard Scaler"
noscale = False
if(noscale):
    X_df = X
else:
    X_df= s_scaler.fit_transform(X)

X_df = pd.DataFrame(X_df, columns=col_names)   
X_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
baseline value,2126.0,1.06949e-15,1.000235,-2.775197,-0.742373,-0.030884,0.680604,2.713428
accelerations,2126.0,-4.010589e-17,1.000235,-0.822388,-0.822388,-0.304881,0.730133,4.093929
fetal_movement,2126.0,-1.336863e-17,1.000235,-0.20321,-0.20321,-0.20321,-0.138908,10.10654
uterine_contractions,2126.0,-1.336863e-16,1.000235,-1.482465,-0.803434,-0.124404,0.894142,3.610264
light_decelerations,2126.0,-5.347452e-17,1.000235,-0.638438,-0.638438,-0.638438,0.375243,4.429965
severe_decelerations,2126.0,6.684315e-18,1.000235,-0.057476,-0.057476,-0.057476,-0.057476,17.398686
prolongued_decelerations,2126.0,1.336863e-17,1.000235,-0.268754,-0.268754,-0.268754,-0.268754,8.20857
abnormal_short_term_variability,2126.0,-7.352747000000001e-17,1.000235,-2.035639,-0.872088,0.11693,0.81506,2.327675
mean_value_of_short_term_variability,2126.0,6.684315e-17,1.000235,-1.282833,-0.716603,-0.150373,0.415857,6.417893
percentage_of_time_with_abnormal_long_term_variability,2126.0,-5.347452e-17,1.000235,-0.535361,-0.535361,-0.535361,0.062707,4.412293


In [6]:
X_train, X_test, y_train,y_test = train_test_split(X_df,y,test_size=0.3,random_state=42)

In [7]:
ros = RandomOverSampler(random_state=42)
X_train,y_train = ros.fit_resample(X_train,y_train)

In [8]:
performanceMeasuresDF = pd.DataFrame(columns=['Classifier Model', 'Accuracy','F1-Score','Recall','Precision','Jaccard-Score','Kappa-Score'])

In [9]:
lr = LogisticRegression(max_iter = 100000)
result = %timeit -o -n 5 -r 5 lr.fit(X=X_train, y=y_train) 
lr.fit(X = X_train, y = y_train)
measures = performance_measures("Logistic Regression", lr, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time = result.average

137 ms ± 23.2 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.829153605015674
F1 : 0.8444897352804529
Recall : 0.829153605015674
Precision : 0.8815067739511789
Jaccard : 0.7458678319168522
Kappa : 0.613055792033207


In [10]:
svmlinear = SVC(kernel = 'linear')
result = %timeit -o -n 5 -r 5 svmlinear.fit(X=X_train, y = y_train)
svmlinear.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Linear)", svmlinear, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

395 ms ± 7.08 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8275862068965517
F1 : 0.8422017413975483
Recall : 0.8275862068965517
Precision : 0.8758525503219734
Jaccard : 0.7419029578083128
Kappa : 0.6044436678859887


In [11]:
svmrbf = SVC(kernel = 'rbf')
result = %timeit -o -n 5 -r 5 svmrbf.fit(X=X_train, y = y_train)
svmrbf.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(RBF)", svmrbf, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

259 ms ± 5.84 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8667711598746082
F1 : 0.8768344274947355
Recall : 0.8667711598746082
Precision : 0.9039672986490344
Jaccard : 0.7899066397764395
Kappa : 0.688969694188901


Two variants which haven't been tried before

In [12]:
svmpoly = SVC(kernel = 'poly')
result = %timeit -o -n 5 -r 5 svmpoly.fit(X=X_train, y = y_train)
svmpoly.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Poly)",svmpoly, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

252 ms ± 3.04 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8573667711598746
F1 : 0.8667069617363671
Recall : 0.8573667711598746
Precision : 0.8870851445554866
Jaccard : 0.7765662062274762
Kappa : 0.6574668428753481


In [13]:
svmsigmoid = SVC(kernel = 'sigmoid')
result = %timeit -o -n 5 -r 5 svmsigmoid.fit(X=X_train, y = y_train)
svmsigmoid.fit(X=X_train, y = y_train)
measures = performance_measures("Support Vector Machine(Sigmoid)",svmsigmoid, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

398 ms ± 7.74 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.6520376175548589
F1 : 0.7068440350281285
Recall : 0.6520376175548589
Precision : 0.8459066400633176
Jaccard : 0.5585236266514508
Kappa : 0.37212797177067214


In [14]:
knn = KNeighborsClassifier(n_neighbors=5)
result = %timeit -o -n 5 -r 5 knn.fit(X=X_train, y = y_train) 
knn.fit(X=X_train, y = y_train)
measures = performance_measures("K-Nearest Neighbors",knn, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

3.34 ms ± 404 μs per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.85423197492163
F1 : 0.8646910717286094
Recall : 0.85423197492163
Precision : 0.8872843890423953
Jaccard : 0.7749695896500054
Kappa : 0.652735264337678


In [15]:
gnb = GaussianNB()
result = %timeit -o -n 5 -r 5 gnb.fit(X_train, y_train)
gnb.fit(X_train, y_train)
measures = performance_measures("Naive Bayes",gnb, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

4.65 ms ± 807 μs per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.6520376175548589
F1 : 0.6906620704515873
Recall : 0.6520376175548589
Precision : 0.8521497798109492
Jaccard : 0.5357928003380658
Kappa : 0.3780829977913507


In [16]:
dt = DecisionTreeClassifier()
result = %timeit -o -n 5 -r 5 dt.fit(X_train, y_train)
dt.fit(X_train, y_train)
measures = performance_measures("Decision Tree",dt, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

24.9 ms ± 2.21 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
Accuracy : 0.9231974921630094
F1 : 0.9224049290373703
Recall : 0.9231974921630094
Precision : 0.921934242181235
Jaccard : 0.8618978128490614
Kappa : 0.7883211678832117


In [17]:
rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o -n 5 -r 5 rfc.fit(X_train, y_train)
rfc.fit(X_train, y_train)
measures = performance_measures("Random Forest",rfc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

594 ms ± 6.88 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.9357366771159875
F1 : 0.9356363710608147
Recall : 0.9357366771159875
Precision : 0.9358424304461027
Jaccard : 0.8839321095023362
Kappa : 0.8253583565338728


In [18]:
abc = AdaBoostClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o -n 5 -r 5 abc.fit(X_train, y_train)
abc.fit(X_train, y_train)
measures = performance_measures("Ada Boost",abc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

704 ms ± 83.2 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8871473354231975
F1 : 0.8922203194307321
Recall : 0.8871473354231975
Precision : 0.9037482268486758
Jaccard : 0.8123084124727266
Kappa : 0.7184448762189628


In [19]:
gbc = GradientBoostingClassifier(n_estimators = 100, random_state = 42, learning_rate = 1.0, max_depth = 1)
result = %timeit -o -n 5 -r 5 gbc.fit(X_train, y_train)
gbc.fit(X_train, y_train)
measures = performance_measures("Gradient Boost",gbc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

1.41 s ± 8.58 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Accuracy : 0.8824451410658307
F1 : 0.889478732362703
Recall : 0.8824451410658307
Precision : 0.9123061851569174
Jaccard : 0.8067182439299724
Kappa : 0.7229556034183284


In [20]:
xgb = XGBClassifier(use_label_encoder = False, eval_metric = 'mlogloss')
result = %timeit -o -n 5 -r 5 xgb.fit(X_train, y_train - 1)
xgb.fit(X_train, y_train - 1)
measures = performance_measures("XG Boost",xgb, X_test, y_test - 1, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



Parameters: { "use_label_encoder" } are not used.



683 ms ± 68.7 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)


Parameters: { "use_label_encoder" } are not used.



Accuracy : 0.9545454545454546
F1 : 0.9544474452495844
Recall : 0.9545454545454546
Precision : 0.954673114614897
Jaccard : 0.9150425054580357
Kappa : 0.8764729838898124


In [21]:
lgbm = LGBMClassifier()
result = %timeit -o -n 5 -r 5 lgbm.fit(X_train, y_train)
lgbm.fit(X_train, y_train)
measures = performance_measures("LightGBM",lgbm, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000641 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001281 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001338 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001298 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000424 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000634 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001267 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001222 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000446 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000425 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000635 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001683 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001286 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000656 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000368 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001310 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001482 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001386 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001477 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000989 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


600 ms ± 30.3 ms per loop (mean ± std. dev. of 5 runs, 5 loops each)
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1274
[LightGBM] [Info] Number of data points in the train set: 3477, number of used features: 21
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


Accuracy : 0.9561128526645768
F1 : 0.9555792458398668
Recall : 0.9561128526645768
Precision : 0.9554740609172917
Jaccard : 0.9172395981761186
Kappa : 0.8783404614672151


In [22]:
performanceMeasuresDF["Weighted_Score"] = performanceMeasuresDF["Accuracy"] + performanceMeasuresDF["F1-Score"] + performanceMeasuresDF["Recall"] + performanceMeasuresDF["Precision"] + performanceMeasuresDF["Jaccard-Score"] + performanceMeasuresDF["Kappa-Score"]

In [23]:
performanceTable = performanceMeasuresDF.sort_values(by = "Weighted_Score",ascending= False)

In [24]:
performanceTable.to_excel("scripts/new_results/"+ scaler_name + '.xlsx', index = False)

In [25]:
with open(r"scripts/new_results/" + scaler_name + r" Timings.txt", "+a") as file:
    file.write(str(exec_time) + "\n")