In [1]:
%cd '/content/drive/My Drive/ML/Final'

/content/drive/My Drive/ML/Final


In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import cross_val_score,train_test_split
from sklearn.metrics import confusion_matrix, classification_report, plot_confusion_matrix,accuracy_score,f1_score
import matplotlib.pyplot as plt

import joblib
import pickle
import h5py
import numpy as np
import os
import glob
import cv2

In [4]:
train_normal = 'H5PY/train_normal_64.h5'
train_label = 'H5PY/labels_train_64_128.h5'

In [6]:
h5f_train = h5py.File(train_normal, 'r')
h5f_labels = h5py.File(train_label, 'r')

In [7]:
data = h5f_train['dataset']
labels = h5f_labels['dataset']

In [8]:
data = np.array(data)
labels = np.array(labels)

In [9]:
h5f_train.close()
h5f_labels.close()

In [10]:
data.shape

(13729, 12288)

In [11]:
labels.shape

(13729,)

In [13]:
# create models sklearn
models = []
models.append(LogisticRegression(random_state=42,max_iter=100000))
models.append(DecisionTreeClassifier(random_state=42))
models.append(KNeighborsClassifier())
models.append(GaussianNB())
models.append(SVC(random_state=42,max_iter=100000))

In [14]:
results_accuracy = [[] for _ in range(len(models))]
results_f1 = [[] for _ in range(len(models))]
names = ['' for _ in range(len(models))]

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for train_idx, val_idx in skf.split(data,labels):
    train_X, val_X = data[train_idx], data[val_idx]
    train_Y, val_Y = labels[train_idx], labels[val_idx]
    for i in range(len(models)):
        model = models[i]
        
        model.fit(train_X, train_Y)
        pred = model.predict(val_X)

        score_accuracy = accuracy_score(val_Y, pred)
        score_f1 = f1_score(val_Y,pred, average='micro')

        results_accuracy[i].append(score_accuracy)
        results_f1[i].append(score_f1)
        names[i] = model.__class__.__name__


In [None]:
plt.figure(figsize=(10,5))
plt.boxplot(results_accuracy,labels=names)
plt.title('Algorithm Comparison with normal extraction accuracy')
plt.show()

In [None]:
score_accuracy = np.array(results_accuracy)
for i in range(len(models)): 
    print('{}: {} ({})'.format(names[i],score_accuracy[i].mean(),score_accuracy[i].std()))

In [None]:
plt.figure(figsize=(10,5))
plt.boxplot(results_f1,labels=names)
plt.title('Algorithm Comparison with normal extraction f1 score')
plt.show()

In [None]:
score_f1 = np.array(results_f1)
for i in range(len(models)): 
    print('{}: {} ({})'.format(names[i],score_f1[i].mean(),score_f1[i].std()))