In [126]:
import joblib
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, f1_score, ConfusionMatrixDisplay, classification_report, precision_recall_fscore_support, precision_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

# models
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# counter 
from collections import Counter

# time 
import time
import os
import psutil
import pickle


In [127]:
def object_to_cat(df):
    tmp = df.copy()
    for col in tmp.columns:
        if np.dtype(tmp[col]) == "object":
            tmp[col] = tmp[col].astype("category")
    return tmp

df = pd.read_csv("./data/german_clean.csv")
df = object_to_cat(df)


In [128]:
SEED = 42
TARGET = 'class'
FEATURES = df.columns.drop(TARGET)

NUMERICAL = df[FEATURES].select_dtypes('number').columns
CATEGORICAL = pd.Index(np.setdiff1d(FEATURES, NUMERICAL))

X = df.drop(columns=TARGET)
y = df[TARGET]


In [129]:
print('original dataset class distribution:', sorted(Counter(y).items()))

original dataset class distribution: [(0, 297), (1, 698)]


#### Pipeline

In [130]:
# result lists
# indexs = 0:over, 1:under, 2:normal 
trainin_time = []
memory_usage=[]
training_test_time=[]
memory_test_usage=[]


In [131]:
numerical_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', MinMaxScaler())
])

categorical_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_pipe, NUMERICAL),
        ('cat', categorical_pipe, CATEGORICAL)
    ])

Hold out the testing set

In [132]:
# Split train-test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=SEED,
                                                    stratify=df[TARGET])


In [133]:
print("train size: ",X_train.shape, y_train.shape)
print("test size: ",X_test.shape, y_test.shape)
print('normal class distribution:', sorted(Counter(y_train).items()))

train size:  (796, 20) (796,)
test size:  (199, 20) (199,)
normal class distribution: [(0, 238), (1, 558)]


### Over-sampling

In [134]:
# oversampling usign random oversampling technique 
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(random_state=SEED)
X_oversampled, y_oversampled = ros.fit_resample(X_train, y_train)

print('over sampled class distribution:',sorted(Counter(y_oversampled).items()))


over sampled class distribution: [(0, 558), (1, 558)]


### Models

In [135]:
# logistic regression1
over_sampled_LR = Pipeline([
    ('preprocessor', preprocessor),
    ('model', LogisticRegression(solver='liblinear', random_state=0))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

over_sampled_LR.fit(X_oversampled, y_oversampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
lr_exec_time = end - start
trainin_time.append(lr_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# SVC2
over_sampled_SVC = Pipeline([
    ('preprocessor', preprocessor),
    ('model', SVC(C=1.0,kernel='rbf',degree=3,gamma='scale',coef0=0.0,shrinking=True,probability=False,tol=0.001,cache_size=200,
                class_weight=None,
                verbose=False,
                max_iter=-1,
                decision_function_shape='ovr',
                break_ties=False,
                random_state=None))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

over_sampled_SVC.fit(X_oversampled, y_oversampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
svc_exec_time = end - start
trainin_time.append(svc_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# GaussianNaiveBayes3 
over_sampled_GNB = Pipeline([
    ('preprocessor', preprocessor),
    ('model', GaussianNB())
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

over_sampled_GNB.fit(X_oversampled, y_oversampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
gnb_exec_time = end - start
trainin_time.append(gnb_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# KNN4 
over_sampled_KNN = Pipeline([
    ('preprocessor', preprocessor),
    ('model', KNeighborsClassifier(n_neighbors=33))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

over_sampled_KNN.fit(X_oversampled, y_oversampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
knn_exec_time = end - start
trainin_time.append(knn_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)


# RF5
over_sampled_RF = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestClassifier(max_depth=None, max_features=10, n_estimators=15, random_state=2))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

over_sampled_RF.fit(X_oversampled, y_oversampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
rf_exec_time = end - start
trainin_time.append(rf_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

filename = 'models/over_sampled_LR.pkl'
joblib.dump(over_sampled_LR, open(filename, 'wb'))

filename = 'models/over_sampled_GNB.pkl'
joblib.dump(over_sampled_GNB, open(filename, 'wb'))

filename = 'models/over_sampled_KNN.pkl'
joblib.dump(over_sampled_KNN, open(filename, 'wb'))

filename = 'models/over_sampled_RF.pkl'
joblib.dump(over_sampled_RF, open(filename, 'wb'))

filename = 'models/over_sampled_SVC.pkl'
joblib.dump(over_sampled_SVC, open(filename, 'wb'))



In [136]:
#LR
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = over_sampled_LR.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
lr_exc_time = end-start
training_test_time.append(lr_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#GNB
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = over_sampled_GNB.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
gnb_exc_time = end-start
training_test_time.append(gnb_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#KNN
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = over_sampled_KNN.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
knn_exc_time = end-start
training_test_time.append(knn_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#RF
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = over_sampled_RF.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
rf_exc_time = end-start
training_test_time.append(rf_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)


#SVC
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = over_sampled_SVC.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
svc_exc_time = end-start
training_test_time.append(svc_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)



### Under-sampling

In [137]:
# under-sampling using random undersampling technique 
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=SEED)
X_undersampled, y_undersampled = rus.fit_resample(X_train, y_train)

print('over sampled class distribution:',sorted(Counter(y_undersampled).items()))


over sampled class distribution: [(0, 238), (1, 238)]


In [138]:
# logistic regression1
under_sampled_LR = Pipeline([
    ('preprocessor', preprocessor),
    ('model', LogisticRegression(solver='liblinear', random_state=0))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

under_sampled_LR.fit(X_undersampled, y_undersampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
lr_exec_time = end - start
trainin_time.append(lr_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# SVC2
under_sampled_SVC = Pipeline([
    ('preprocessor', preprocessor),
    ('model', SVC(C=1.0,kernel='rbf',degree=3,gamma='scale',coef0=0.0,shrinking=True,probability=False,tol=0.001,cache_size=200,
                class_weight=None,
                verbose=False,
                max_iter=-1,
                decision_function_shape='ovr',
                break_ties=False,
                random_state=None))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

under_sampled_SVC.fit(X_undersampled, y_undersampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
svc_exec_time = end - start
trainin_time.append(svc_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# GaussianNaiveBayes3 
under_sampled_GNB = Pipeline([
    ('preprocessor', preprocessor),
    ('model', GaussianNB())
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

under_sampled_GNB.fit(X_undersampled, y_undersampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
gnb_exec_time = end - start
trainin_time.append(gnb_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# KNN4 
under_sampled_KNN = Pipeline([
    ('preprocessor', preprocessor),
    ('model', KNeighborsClassifier(n_neighbors=33))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

under_sampled_KNN.fit(X_undersampled, y_undersampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
knn_exec_time = end - start
trainin_time.append(knn_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# RF5
under_sampled_RF = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestClassifier(max_depth=None, max_features=10, n_estimators=15, random_state=2))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

under_sampled_RF.fit(X_undersampled, y_undersampled)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
rf_exec_time = end - start
trainin_time.append(rf_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

filename = 'models/under_sampled_LR.pkl'
joblib.dump(under_sampled_LR, open(filename, 'wb'))

filename = 'models/under_sampled_GNB.pkl'
joblib.dump(under_sampled_GNB, open(filename, 'wb'))

filename = 'models/under_sampled_KNN.pkl'
joblib.dump(under_sampled_KNN, open(filename, 'wb'))

filename = 'models/under_sampled_RF.pkl'
joblib.dump(under_sampled_RF, open(filename, 'wb'))

filename = 'models/under_sampled_SVC.pkl'
joblib.dump(under_sampled_SVC, open(filename, 'wb'))


In [139]:
#LR
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = under_sampled_LR.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
lr_exc_time = end-start
training_test_time.append(lr_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#GNB
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = under_sampled_GNB.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
gnb_exc_time = end-start
training_test_time.append(gnb_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#KNN
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = under_sampled_KNN.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
knn_exc_time = end-start
training_test_time.append(knn_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#RF
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = under_sampled_RF.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
rf_exc_time = end-start
training_test_time.append(rf_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)


#SVC
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = under_sampled_SVC.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
svc_exc_time = end-start
training_test_time.append(svc_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)



### Normal dataset

In [140]:
# logistic regression1
normal_LR = Pipeline([
    ('preprocessor', preprocessor),
    ('model', LogisticRegression(solver='liblinear', random_state=0))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

normal_LR.fit(X_train, y_train)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
lr_exec_time = end - start
trainin_time.append(lr_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# SVC2
normal_SVC = Pipeline([
    ('preprocessor', preprocessor),
    ('model', SVC(C=1.0,kernel='rbf',degree=3,gamma='scale',coef0=0.0,shrinking=True,probability=False,tol=0.001,cache_size=200,
                class_weight=None,
                verbose=False,
                max_iter=-1,
                decision_function_shape='ovr',
                break_ties=False,
                random_state=None))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

normal_SVC.fit(X_train, y_train)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
svc_exec_time = end - start
trainin_time.append(svc_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# GaussianNaiveBayes3 
normal_GNB = Pipeline([
    ('preprocessor', preprocessor),
    ('model', GaussianNB())
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

normal_GNB.fit(X_train, y_train)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
gnb_exec_time = end - start
trainin_time.append(gnb_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

# KNN4 
normal_KNN = Pipeline([
    ('preprocessor', preprocessor),
    ('model', KNeighborsClassifier(n_neighbors=33))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

normal_KNN.fit(X_train, y_train)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
knn_exec_time = end - start
trainin_time.append(knn_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)


# RF5
normal_RF = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestClassifier(max_depth=None, max_features=10, n_estimators=15, random_state=2))
])

start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

normal_RF.fit(X_train, y_train)

end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
rf_exec_time = end - start
trainin_time.append(rf_exec_time)
mem_used = finished_mem - start_mem
memory_usage.append(mem_used)

filename = 'models/normal_LR.pkl'
joblib.dump(normal_LR, open(filename, 'wb'))

filename = 'models/normal_GNB.pkl'
joblib.dump(normal_GNB, open(filename, 'wb'))

filename = 'models/normal_KNN.pkl'
joblib.dump(normal_KNN, open(filename, 'wb'))

filename = 'models/normal_RF.pkl'
joblib.dump(normal_RF, open(filename, 'wb'))

filename = 'models/normal_SVC.pkl'
joblib.dump(normal_SVC, open(filename, 'wb'))

In [141]:
#LR
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = normal_LR.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
lr_exc_time = end-start
training_test_time.append(lr_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#GNB
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = normal_GNB.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
gnb_exc_time = end-start
training_test_time.append(gnb_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#KNN
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = normal_KNN.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
knn_exc_time = end-start
training_test_time.append(knn_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)

#RF
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = normal_RF.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
rf_exc_time = end-start
training_test_time.append(rf_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)


#SVC
# timer start
start = time.time()
start_mem = psutil.Process().memory_info().rss / (1024 * 1024)

y_pred = normal_SVC.predict(X_test)

# timer stop 
end = time.time()
finished_mem = psutil.Process().memory_info().rss / (1024 * 1024)
svc_exc_time = end-start
training_test_time.append(svc_exec_time)
mem_used = finished_mem - start_mem
memory_test_usage.append(mem_used)



In [142]:
training_test_time

[0.1060798168182373,
 0.14803624153137207,
 0.13208436965942383,
 0.46352362632751465,
 0.5287573337554932,
 0.10094308853149414,
 0.08794927597045898,
 0.08295440673828125,
 0.38629937171936035,
 0.13292407989501953,
 0.14991283416748047,
 0.11093449592590332,
 0.09693670272827148,
 0.39081883430480957,
 0.3679349422454834]

### Evaluation

Training Time

In [143]:
trainin_time

[0.1060798168182373,
 0.5287573337554932,
 0.14803624153137207,
 0.13208436965942383,
 0.46352362632751465,
 0.10094308853149414,
 0.13292407989501953,
 0.08794927597045898,
 0.08295440673828125,
 0.38629937171936035,
 0.14991283416748047,
 0.3679349422454834,
 0.11093449592590332,
 0.09693670272827148,
 0.39081883430480957]

In [144]:
memory_usage

[4.41796875,
 -0.5546875,
 1.390625,
 0.0703125,
 0.77734375,
 0.73046875,
 0.390625,
 0.01171875,
 0.00390625,
 0.25,
 0.08984375,
 0.78125,
 0.03515625,
 0.0,
 0.05859375]

In [145]:
training_test_time

[0.1060798168182373,
 0.14803624153137207,
 0.13208436965942383,
 0.46352362632751465,
 0.5287573337554932,
 0.10094308853149414,
 0.08794927597045898,
 0.08295440673828125,
 0.38629937171936035,
 0.13292407989501953,
 0.14991283416748047,
 0.11093449592590332,
 0.09693670272827148,
 0.39081883430480957,
 0.3679349422454834]

In [146]:
memory_test_usage

[0.2421875,
 0.14453125,
 2.375,
 0.0390625,
 0.03515625,
 0.046875,
 0.0,
 0.73828125,
 0.0,
 0.0,
 0.03125,
 0.0,
 0.6015625,
 0.0,
 0.0078125]

Accuracy

In [149]:
models = [over_sampled_LR,over_sampled_GNB,over_sampled_KNN,over_sampled_SVC, over_sampled_RF,
          under_sampled_LR,under_sampled_GNB,under_sampled_KNN,under_sampled_SVC, under_sampled_RF,
          normal_RF,normal_GNB,normal_KNN,normal_SVC, normal_RF]
confusion_met = []
false_negatives = []
false_positives = []
true_negatives=[]
true_positives=[]

cls_acc = []
cls_precision = []
cls_recall = []
cls_f1 = []


In [150]:
for model in models:
    y_pred = model.predict(X_test)

    accuracy = model.score(X_test, y_test)
    cls_acc.append(accuracy)

    score = precision_recall_fscore_support(y_test, y_pred, average='weighted')

    cls_precision.append(score[0])
    cls_recall.append(score[1])
    cls_f1.append(score[2])
    confusion_met.append(classification_report(y_test, y_pred, target_names=['bad','good']))
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    false_negatives.append(fn)
    false_positives.append(fp)
    true_negatives.append(tn)
    true_positives.append(tp)


In [151]:
false_negatives


[41, 47, 47, 29, 22, 37, 43, 44, 36, 41, 14, 40, 4, 11, 14]

In [152]:
false_positives

[15, 15, 18, 26, 35, 16, 15, 16, 11, 15, 30, 17, 49, 37, 30]

In [153]:
true_positives

[99, 93, 93, 111, 118, 103, 97, 96, 104, 99, 126, 100, 136, 129, 126]

In [156]:
true_negatives

[44, 44, 41, 33, 24, 43, 44, 43, 48, 44, 29, 42, 10, 22, 29]

In [157]:
trainin_time

[0.1060798168182373,
 0.5287573337554932,
 0.14803624153137207,
 0.13208436965942383,
 0.46352362632751465,
 0.10094308853149414,
 0.13292407989501953,
 0.08794927597045898,
 0.08295440673828125,
 0.38629937171936035,
 0.14991283416748047,
 0.3679349422454834,
 0.11093449592590332,
 0.09693670272827148,
 0.39081883430480957]

In [158]:
cls_acc

[0.7185929648241206,
 0.6884422110552764,
 0.6733668341708543,
 0.7236180904522613,
 0.7135678391959799,
 0.7336683417085427,
 0.7085427135678392,
 0.6984924623115578,
 0.7638190954773869,
 0.7185929648241206,
 0.7788944723618091,
 0.7135678391959799,
 0.7336683417085427,
 0.7587939698492462,
 0.7788944723618091]

In [159]:
cls_precision

[0.7644227328593431,
 0.7491609451408447,
 0.7275675058338376,
 0.7278084559837095,
 0.6972686678633292,
 0.7682862843629915,
 0.7592416103506036,
 0.7495523594986426,
 0.8056431224445207,
 0.7644227328593431,
 0.7681789987504608,
 0.7531539289107618,
 0.7289536485516386,
 0.7443643922423362,
 0.7681789987504608]

In [160]:
cls_recall

[0.7185929648241206,
 0.6884422110552764,
 0.6733668341708543,
 0.7236180904522613,
 0.7135678391959799,
 0.7336683417085427,
 0.7085427135678392,
 0.6984924623115578,
 0.7638190954773869,
 0.7185929648241206,
 0.7788944723618091,
 0.7135678391959799,
 0.7336683417085427,
 0.7587939698492462,
 0.7788944723618091]

In [161]:
cls_f1

[0.7295950443380508,
 0.7015745393634841,
 0.6867165052571309,
 0.725547659531659,
 0.7021906215622237,
 0.7429896908223794,
 0.7202986010723328,
 0.7106537252472406,
 0.7728867841194178,
 0.7295950443380508,
 0.7675286874222731,
 0.7241123241840463,
 0.6700181624667065,
 0.7349578240734791,
 0.7675286874222731]

In [162]:
for cm in confusion_met:
    print(cm,'\n')

              precision    recall  f1-score   support

         bad       0.52      0.75      0.61        59
        good       0.87      0.71      0.78       140

    accuracy                           0.72       199
   macro avg       0.69      0.73      0.70       199
weighted avg       0.76      0.72      0.73       199
 

              precision    recall  f1-score   support

         bad       0.48      0.75      0.59        59
        good       0.86      0.66      0.75       140

    accuracy                           0.69       199
   macro avg       0.67      0.71      0.67       199
weighted avg       0.75      0.69      0.70       199
 

              precision    recall  f1-score   support

         bad       0.47      0.69      0.56        59
        good       0.84      0.66      0.74       140

    accuracy                           0.67       199
   macro avg       0.65      0.68      0.65       199
weighted avg       0.73      0.67      0.69       199
 

             