In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import time
from datetime import datetime
import os

# Model architecture
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from predict_proba import predict_proba

# Ensemble model class
from ensemblemodel import EnsembleModel
from ensemblemodel import ScoreMetrics, PrintConfusionMatrix

import numpy as np
import tensorflow as tf
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from dataloader import DataLoader
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# ResNet Baseline

In [2]:
# Specify fold 1 to 10, we cannot loop this due to memory limit

fold = 1

X_train = np.load(f'./data/input/xtrain_{fold}.npy')
Y_train = np.load(f'./data/input/ytrain_{fold}.npy')
X_test = np.load(f'./data/input/xtest_{fold}.npy')
Y_test = np.load(f'./data/input/ytest_{fold}.npy')

X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

In [3]:
base_model = tf.keras.applications.resnet.ResNet50(input_shape=(224,224,3), include_top=False, weights='imagenet')

In [None]:
full_dataset_prediction = predict_proba(X_train, Y_train, X_test, base_model, validation_data=(X_val, Y_val), output_layer="fc")
print(fold)
PrintConfusionMatrix(Y_test, full_dataset_prediction[:, 1], threshold=0.5)

In [None]:
X_train = np.load('./data/input/full_xtrain.npy')
Y_train = np.load('./data/input/full_ytrain.npy')

X_test = np.load('./data/input/xtest.npy')
Y_test = np.load('./data/input/ytest.npy')

X = np.concatenate((X_train, X_test), axis=0)
Y = np.concatenate((Y_train, Y_test), axis=0)

X_external = np.load('./data/input/xexternal.npy')
Y_external = np.load('./data/input/yexternal.npy')

In [None]:
external_prediction = predict_proba(X, Y, X_external, base_model, validation_data=(X_test, Y_test), output_layer="fc")
PrintConfusionMatrix(Y_external, external_prediction[:,1], threshold=0.5)

# CS50-3 Model

In [None]:
# Specify fold 1 to 10, we cannot loop this due to memory limit

fold = 1

X_train = np.load(f'./data/input/xtrain_{fold}.npy')
Y_train = np.load(f'./data/input/ytrain_{fold}.npy')
X_test = np.load(f'./data/input/xtest_{fold}.npy')
Y_test = np.load(f'./data/input/ytest_{fold}.npy')

In [None]:
base_model = tf.keras.applications.resnet.ResNet50(input_shape=(224,224,3), include_top=False, weights='imagenet')

In [None]:
full_dataset_prediction = predict_proba(X_train, Y_train, X_test, base_model, validation_data=None, output_layer="svm")
print(fold)
PrintConfusionMatrix(Y_test, full_dataset_prediction, threshold=0.5)

In [2]:
X_train = np.load('./data/input/full_xtrain.npy')
Y_train = np.load('./data/input/full_ytrain.npy')

X_test = np.load('./data/input/xtest.npy')
Y_test = np.load('./data/input/ytest.npy')

X = np.concatenate((X_train, X_test), axis=0)
Y = np.concatenate((Y_train, Y_test), axis=0)

X_external = np.load('./data/input/xexternal.npy')
Y_external = np.load('./data/input/yexternal.npy')

In [None]:
external_prediction = predict_proba(X, Y, X_external, base_model, validation_data=None, output_layer="svm")
PrintConfusionMatrix(Y_external, external_prediction, threshold=0.5)

# Random Forest Model

In [None]:
for fold in range(1, 11):
    X_train = np.load(f'./data/input/xtrain_{fold}.npy')
    Y_train = np.load(f'./data/input/ytrain_{fold}.npy')
    X_test = np.load(f'./data/input/xtest_{fold}.npy')
    Y_test = np.load(f'./data/input/ytest_{fold}.npy')

    layer = RandomForestClassifier(random_state=42, n_jobs=-1)
    layer.fit(X_train, Y_train)
    full_dataset_prediction = layer.predict(X_test)
    print(fold)
    PrintConfusionMatrix(Y_test, full_dataset_prediction, threshold=0.5)

In [2]:
X_train = np.load('./data/input/full_xtrain.npy')
Y_train = np.load('./data/input/full_ytrain.npy')

X_test = np.load('./data/input/xtest.npy')
Y_test = np.load('./data/input/ytest.npy')

X = np.concatenate((X_train, X_test), axis=0)
Y = np.concatenate((Y_train, Y_test), axis=0)

X_external = np.load('./data/input/xexternal.npy')
Y_external = np.load('./data/input/yexternal.npy')

In [3]:
layer = RandomForestClassifier(random_state=42, n_jobs=-1)
layer.fit(X, Y)
full_dataset_prediction = layer.predict(X_external)

PrintConfusionMatrix(Y_external, full_dataset_prediction, threshold=0.5)

----------------  ## CONFUSION MATRIX ##  ----------------
                                    True Labels
    Predicted Labels  |  True Positive  |  True Negative
 Predicted Positive   |        153     |        45   
 Predicted Negative   |        27     |        140   
----------------------------------------------------------
Accuracy: 0.803
Precision: 0.773
Recall: 0.850
F1: 0.810
Specifity: 0.757
Neg-Pred Value: 0.838
ROC_AUC_Score: 0.803
Gini: 1.607
