In [1]:
import numpy as np 
import pandas as pd 
import os
from glob import glob
from cv2 import resize
from cv2 import imread
from cv2 import INTER_CUBIC

In [2]:
#loads the images located under the given path returns them in a list together with array of image's shapes 
def load_images(path):
    imglist, imgshapelist = list(), list()
    for impath in glob(path):
        img = imread(impath)
        if img.shape[2]==3:
            imglist.append(img)
            imgshapelist.append(img.shape)
    return imglist, np.array(imgshapelist)[:,:2]

#get images and shapes
cposl, cpshape = load_images("../input/covidct/CT_COVID/*")
cnegl, cnshape = load_images("../input/covidct/CT_NonCOVID/*")

concatenated = np.concatenate([cpshape,cnshape])
concatenatedhwratios = concatenated[:,0]/concatenated[:,1]
q1 = np.quantile(concatenatedhwratios, .25, axis = 0)
q2 = np.quantile(concatenatedhwratios, .50, axis = 0)
q3 = np.quantile(concatenatedhwratios, .75, axis = 0)
qr = (q3-q1)*1.5
ql = q2 - qr
qu = q2 + qr

#h, w = concatenated[(concatenatedhwratios>ql) & (concatenatedhwratios<qu)].mean(axis=0).astype(int)
#print("mean [height, width] => ",h, w)
#print("images are to be resized to size 300x425")
#h, w = 300,425


#outlier filters
cphwratios = cpshape[:,0]/cpshape[:,1]
cpfilter = (cphwratios<qu) & (cphwratios>ql)

cnhwratios = cnshape[:,0]/cnshape[:,1]
cnfilter = (cnhwratios<qu) & (cnhwratios>ql)


from itertools import compress
#returns an array of resized images that are not outliers 
def filter_resize_array(listofimages, filterofwanteds, targetdim, interpolation = INTER_CUBIC):
    retlis = list()
    for im in compress(listofimages, filterofwanteds):
        #midimg = cv2.resize(im, middim, interpolation)
        retlis.append(resize(im, targetdim, interpolation))
    return np.array(retlis)

#get resized appropriate images
cpos = filter_resize_array(cposl, cpfilter, (224, 224))
cneg = filter_resize_array(cnegl, cnfilter, (224, 224))


In [3]:
#concatenate
X = np.concatenate([cpos,cneg])

#create the Y array positive = [1 , 0] negative = [0 , 1]
Y = np.tile( np.array([0,0]),(X.shape[0],1))
Y[:cpos.shape[0],0]=1
Y[cpos.shape[0]:,1]=1


#shuffle
np.random.seed(1)
p = np.random.permutation(Y.shape[0])
X, Y = X[p], Y[p] 

#train test split
from sklearn.model_selection import train_test_split
xtr, xte, ytr, yte = train_test_split(X,Y,train_size = 0.85, random_state = 1)
xtr = xtr /255
xte = xte /255

In [11]:
from keras.applications import VGG19
vgg_19 = VGG19()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5


Exception: URL fetch failure on https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5: None -- [Errno -3] Temporary failure in name resolution

In [8]:
from keras.models import Sequential
from keras.layers import Dense, LeakyReLU, ReLU, Dropout
from keras.optimizers import Adam

In [10]:
model = Sequential()
for layer in vgg_19.layers[:-1]:
    layer.trainable = False
    model.add(layer)

model.add(Dense(256))
model.add(LeakyReLU())
model.add(Dense(128))
model.add(ReLU())
model.add(Dense(64))
model.add(ReLU())
model.add(Dense(2, activation = "softmax"))

adam_optimizer = Adam(learning_rate = 0.0001)
model.compile(loss = "categorical_crossentropy", optimizer = adam_optimizer, metrics = ["accuracy"])

NameError: name 'vgg_19' is not defined

In [None]:
epochs = 128    
history = model.fit(xtr, ytr ,epochs = epochs, validation_data=(xte, yte),use_multiprocessing=True)

In [None]:
import matplotlib.pyplot as plt

def summarize(history, epochs):
    tl = history.history["loss"]
    vl = history.history["val_loss"]
    plt.figure(figsize=(12,4))
    plt.plot(range(epochs), tl, label ="Training Loss")
    plt.plot(range(epochs), vl,label = "Validation Loss")
    plt.title("Losses by Epoch Count")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

    ta = history.history["accuracy"]
    va = history.history["val_accuracy"]
    plt.figure(figsize=(12,4))
    plt.plot(range(epochs), ta, label ="Training Accuracy")
    plt.plot(range(epochs), va,label = "Validation Accuracy")
    plt.title("Accuracies by Epoch Count")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.show()

    ta = ta[3*epochs//4:]
    va = va[3*epochs//4:]

    tl = tl[3*epochs//4:]
    vl = vl[3*epochs//4:]

    print(f"""
    For The Last Quartile [Last {epochs//4} Epochs]

    Training Loss Mean       = {sum(tl)/len(tl):.3}
    Test Loss Mean           = {sum(vl)/len(vl):.3}

    Training Accuracy Mean   = {sum(ta)/len(ta):.3}
    Test Accuracy Mean       = {sum(va)/len(va):.3}


    Test Min Loss            = {min(vl):.3}
    Test Max Accuracy        = {max(va):.3}
    """)

summarize(history, epochs)

In [None]:
model.save(f"./model{epochs}epochs.h5")

In [None]:
for layer in model.layers[-8:]:
    layer.trainable = True

In [None]:
history = model.fit(xtr, ytr ,epochs = epochs, validation_data=(xte, yte), use_multiprocessing=True)

In [None]:
print("Results after retraining along with VGG19's already trained last layer;\n")
summarize(history, epochs)

In [None]:
model.save(f"./model{epochs*2}epochs.h5")