In [1]:
import h5py
import numpy as np

import gc
import pandas as pd

# machine learning
from sklearn.utils import shuffle

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


from sklearn.metrics import classification_report, accuracy_score

import pickle



file_path = './TL_prepared_data/train/data.hdf5'
h5_file = h5py.File(file_path)
data = h5_file.get('points')
target = h5_file.get('labels')

tensors = data[:]
labels = target[:]

tensors = tensors.reshape([len(tensors), -1])
labels = np.argmax(labels, axis=1)

X_train, Y_train = shuffle(tensors, labels, random_state=0)


file_path = './TL_prepared_data/test/data.hdf5'
h5_file_1 = h5py.File(file_path)
data = h5_file_1.get('points')
target = h5_file_1.get('labels')

tensors = data[:]
labels = target[:]

tensors = tensors.reshape([len(tensors), -1])
labels = np.argmax(labels, axis=1)

tensors, labels = shuffle(tensors, labels, random_state=0)

X_test, Y_test = shuffle(tensors, labels, random_state=0)

h5_file.close()
h5_file_1.close()

data, target, tensors, labels = None, None, None, None
gc.collect()

X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

((46028, 4096), (46028,), (43632, 4096), (43632,))

In [2]:
# LDA

lda = LinearDiscriminantAnalysis()
lda.fit(X_train, Y_train)

acc_lda = round(lda.score(X_train, Y_train) * 100, 2)

# save
with open('./models/no_nn/lda.pkl','wb') as f:
    pickle.dump(lda,f)

Y_pred = lda.predict(X_test)
test_acc_lda = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_lda)
print(classification_report(Y_test, y_pred=Y_pred))

acuracy: 0.7696644664466447
              precision    recall  f1-score   support

           0       0.73      0.76      0.75     10908
           1       0.77      0.79      0.78     10908
           2       0.78      0.68      0.73     10908
           3       0.80      0.85      0.82     10908

    accuracy                           0.77     43632
   macro avg       0.77      0.77      0.77     43632
weighted avg       0.77      0.77      0.77     43632



In [3]:
# Logistic Regression

logreg = LogisticRegression()
logreg.fit(X_train, Y_train)
acc_log = round(logreg.score(X_train, Y_train) * 100, 2)

# save
with open('./models/no_nn/log_reg.pkl','wb') as f:
    pickle.dump(logreg,f)

Y_pred = logreg.predict(X_test)
test_acc_log = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_log)
print(classification_report(Y_test, y_pred=Y_pred))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


acuracy: 0.7911853685368537
              precision    recall  f1-score   support

           0       0.80      0.77      0.78     10908
           1       0.76      0.78      0.77     10908
           2       0.81      0.74      0.78     10908
           3       0.80      0.87      0.83     10908

    accuracy                           0.79     43632
   macro avg       0.79      0.79      0.79     43632
weighted avg       0.79      0.79      0.79     43632



In [4]:
# Support Vector Machines

svc = SVC()
svc.fit(X_train, Y_train)
Y_pred = svc.predict(X_test)
acc_svc = round(svc.score(X_train, Y_train) * 100, 2)
acc_svc

# save
with open('./models/no_nn/svc.pkl','wb') as f:
    pickle.dump(svc,f)

Y_pred = svc.predict(X_test)
test_acc_svc = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_svc)
print(classification_report(Y_test, y_pred=Y_pred))

acuracy: 0.9337871287128713
              precision    recall  f1-score   support

           0       0.97      0.94      0.96     10908
           1       0.99      0.91      0.95     10908
           2       0.96      0.92      0.94     10908
           3       0.84      0.96      0.90     10908

    accuracy                           0.93     43632
   macro avg       0.94      0.93      0.93     43632
weighted avg       0.94      0.93      0.93     43632



In [5]:
# KNeighbors

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
acc_knn = round(knn.score(X_train, Y_train) * 100, 2)


# save
with open('./models/no_nn/knn.pkl','wb') as f:
    pickle.dump(knn,f)

Y_pred = knn.predict(X_test)
test_acc_knn = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_knn)
print(classification_report(Y_test, y_pred=Y_pred))

acuracy: 0.9023881554822148
              precision    recall  f1-score   support

           0       0.95      0.92      0.94     10908
           1       0.94      0.90      0.92     10908
           2       0.93      0.88      0.90     10908
           3       0.81      0.91      0.86     10908

    accuracy                           0.90     43632
   macro avg       0.91      0.90      0.90     43632
weighted avg       0.91      0.90      0.90     43632



In [6]:
# Gaussian Naive Bayes

gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
Y_pred = gaussian.predict(X_test)
acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)
acc_gaussian


# save
with open('./models/no_nn/gaussian.pkl','wb') as f:
    pickle.dump(gaussian,f)

Y_pred = gaussian.predict(X_test)
test_acc_gaussian = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_gaussian)
print(classification_report(Y_test, y_pred=Y_pred))

acuracy: 0.688027136046938
              precision    recall  f1-score   support

           0       0.69      0.70      0.70     10908
           1       0.72      0.63      0.67     10908
           2       0.70      0.61      0.65     10908
           3       0.65      0.81      0.72     10908

    accuracy                           0.69     43632
   macro avg       0.69      0.69      0.69     43632
weighted avg       0.69      0.69      0.69     43632



In [7]:
# Perceptron

perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)
acc_perceptron

# save
with open('./models/no_nn/perceptron.pkl','wb') as f:
    pickle.dump(perceptron,f)

Y_pred = perceptron.predict(X_test)
test_acc_perceptron = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_perceptron)
print(classification_report(Y_test, y_pred=Y_pred))

acuracy: 0.7131004767143381
              precision    recall  f1-score   support

           0       0.84      0.58      0.68     10908
           1       0.58      0.88      0.70     10908
           2       0.75      0.69      0.72     10908
           3       0.81      0.70      0.75     10908

    accuracy                           0.71     43632
   macro avg       0.74      0.71      0.71     43632
weighted avg       0.74      0.71      0.71     43632



In [8]:
# Linear SVC

linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
Y_pred = linear_svc.predict(X_test)
acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2)
acc_linear_svc


# save
with open('./models/no_nn/linear_svc.pkl','wb') as f:
    pickle.dump(linear_svc,f)

Y_pred = linear_svc.predict(X_test)
test_acc_linear_svc = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_linear_svc)
print(classification_report(Y_test, y_pred=Y_pred))



acuracy: 0.7981298129812981
              precision    recall  f1-score   support

           0       0.79      0.78      0.79     10908
           1       0.81      0.79      0.80     10908
           2       0.79      0.74      0.77     10908
           3       0.80      0.88      0.83     10908

    accuracy                           0.80     43632
   macro avg       0.80      0.80      0.80     43632
weighted avg       0.80      0.80      0.80     43632



In [9]:
# Stochastic Gradient Descent

sgd = SGDClassifier()
sgd.fit(X_train, Y_train)
Y_pred = sgd.predict(X_test)
acc_sgd = round(sgd.score(X_train, Y_train) * 100, 2)
acc_sgd

# save
with open('./models/no_nn/sgd.pkl','wb') as f:
    pickle.dump(sgd,f)

Y_pred = sgd.predict(X_test)
test_acc_sgd = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_sgd)
print(classification_report(Y_test, y_pred=Y_pred))

acuracy: 0.7842638430509717
              precision    recall  f1-score   support

           0       0.73      0.81      0.77     10908
           1       0.86      0.67      0.76     10908
           2       0.75      0.77      0.76     10908
           3       0.81      0.88      0.85     10908

    accuracy                           0.78     43632
   macro avg       0.79      0.78      0.78     43632
weighted avg       0.79      0.78      0.78     43632



In [10]:
# Decision Tree

decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, Y_train)
Y_pred = decision_tree.predict(X_test)
acc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2)
acc_decision_tree


# save
with open('./models/no_nn/decision_tree.pkl','wb') as f:
    pickle.dump(decision_tree,f)

Y_pred = decision_tree.predict(X_test)
test_acc_decision_tree = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_decision_tree)
print(classification_report(Y_test, y_pred=Y_pred))

acuracy: 0.8519206087275394
              precision    recall  f1-score   support

           0       0.85      0.87      0.86     10908
           1       0.91      0.89      0.90     10908
           2       0.85      0.82      0.84     10908
           3       0.80      0.82      0.81     10908

    accuracy                           0.85     43632
   macro avg       0.85      0.85      0.85     43632
weighted avg       0.85      0.85      0.85     43632



In [11]:
# Random Forest

random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, Y_train)
Y_pred = random_forest.predict(X_test)
random_forest.score(X_train, Y_train)
acc_random_forest = round(random_forest.score(X_train, Y_train) * 100, 2)
acc_random_forest


# save
with open('./models/no_nn/random_forest.pkl','wb') as f:
    pickle.dump(random_forest,f)

Y_pred = random_forest.predict(X_test)
test_acc_random_forest = accuracy_score(Y_test,y_pred=Y_pred)
print("acuracy:", test_acc_random_forest)
print(classification_report(Y_test, y_pred=Y_pred))

acuracy: 0.9329620462046204
              precision    recall  f1-score   support

           0       0.95      0.94      0.95     10908
           1       0.99      0.92      0.96     10908
           2       0.96      0.91      0.93     10908
           3       0.85      0.95      0.90     10908

    accuracy                           0.93     43632
   macro avg       0.94      0.93      0.93     43632
weighted avg       0.94      0.93      0.93     43632



In [12]:
# Convergence

models = pd.DataFrame({
    'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression', 
              'Random Forest', 'Naive Bayes', 'Perceptron', 
              'Stochastic Gradient Decent', 'Linear SVC', 
              'Decision Tree', 'Linear Discriminant Analysis'],
    'Score': [acc_svc, acc_knn, acc_log, 
              acc_random_forest, acc_gaussian, acc_perceptron, 
              acc_sgd, acc_linear_svc, acc_decision_tree, acc_lda]})
models.sort_values(by='Score', ascending=False)

Unnamed: 0,Model,Score
3,Random Forest,100.0
8,Decision Tree,100.0
1,KNN,95.21
0,Support Vector Machines,94.5
7,Linear SVC,85.37
9,Linear Discriminant Analysis,84.63
6,Stochastic Gradient Decent,80.2
2,Logistic Regression,80.14
5,Perceptron,73.04
4,Naive Bayes,69.14


In [13]:
# Test results

models = pd.DataFrame({
    'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression', 
              'Random Forest', 'Naive Bayes', 'Perceptron', 
              'Stochastic Gradient Decent', 'Linear SVC', 
              'Decision Tree', 'Linear Discriminant Analysis'],
    'Score': [test_acc_svc, test_acc_knn, test_acc_log, 
              test_acc_random_forest, test_acc_gaussian, test_acc_perceptron, 
              test_acc_sgd, test_acc_linear_svc, test_acc_decision_tree, test_acc_lda]})
models.sort_values(by='Score', ascending=False)

Unnamed: 0,Model,Score
0,Support Vector Machines,0.933787
3,Random Forest,0.932962
1,KNN,0.902388
8,Decision Tree,0.851921
7,Linear SVC,0.79813
2,Logistic Regression,0.791185
6,Stochastic Gradient Decent,0.784264
9,Linear Discriminant Analysis,0.769664
5,Perceptron,0.7131
4,Naive Bayes,0.688027


In [1]:
lda = 0.7696644664466447
logistic = 0.7911853685368537
svm = 0.9337871287128713
kn = 0.9023881554822148
gaus = 0.688027136046938
perc = 0.7131004767143381
linear = 0.7981298129812981
sgd = 0.7842638430509717
dtree = 0.8519206087275394
rf = 0.9329620462046204

cnn = 0.9375916758342501

In [2]:
# Test results
import pandas as pd

models = pd.DataFrame({
    'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression', 
              'Random Forest', 'Gaussian Naive Bayes', 'Perceptron', 
              'Stochastic Gradient Decent', 'Linear SVC', 
              'Decision Tree', 'Linear Discriminant Analysis', 'Convolutional Neural Network'],
    'Score': [svm, kn, logistic, 
              rf, gaus, perc, 
              sgd, linear, dtree, lda, cnn]})
models.sort_values(by='Score', ascending=False)

Unnamed: 0,Model,Score
10,Convolutional Neural Network,0.937592
0,Support Vector Machines,0.933787
3,Random Forest,0.932962
1,KNN,0.902388
8,Decision Tree,0.851921
7,Linear SVC,0.79813
2,Logistic Regression,0.791185
6,Stochastic Gradient Decent,0.784264
9,Linear Discriminant Analysis,0.769664
5,Perceptron,0.7131


In [19]:
import json

# Opening JSON file
with open('./models/result.json') as f:
    data = json.load(f)


In [20]:
data.keys()

dict_keys(['ACDC', 'clinical_no_TL_resnet152', 'clinical_TL', 'ACDC_resnet18', 'ACDC_Unet', 'clinical_no_TL_resnet16', 'clinical_TL_resnet1', 'clinical_TL_resnet34', 'ACDC_resnet50', 'clinical_no_TL_Unet', 'ACDC_resnet34', 'clinical_TL_resnet50', 'clinical_no_TL', 'clinical_no_TL_resnet18', 'clinical_TL_resnet18', 'clinical_no_TL_resnet101', 'ACDC_resnet1', 'ACDC_resnet101', 'clinical_TL_resnet101', 'clinical_no_TL_resnet34', 'clinical_no_TL_resnet50', 'clinical_TL_Unet'])

In [21]:
dict_ = {}
for key in data.keys():
    if 'clinical_no_TL_' in key:
        dict_[key[15:]] = data[key]

In [22]:
dict_

{'resnet152': 0.9432526585991932,
 'resnet16': 0.9441464979831315,
 'Unet': 0.9408920058672534,
 'resnet18': 0.943458929226256,
 'resnet101': 0.9435276861019436,
 'resnet34': 0.9421525485881922,
 'resnet50': 0.941487898789879}

In [23]:
import pandas as pd

models = pd.DataFrame({
    'Model': dict_.keys(),
    'Score': dict_.values()})
models.sort_values(by='Score', ascending=False)

Unnamed: 0,Model,Score
1,resnet16,0.944146
4,resnet101,0.943528
3,resnet18,0.943459
0,resnet152,0.943253
5,resnet34,0.942153
6,resnet50,0.941488
2,Unet,0.940892
