In [67]:
import pandas as pd
import numpy as np
import math

In [63]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RepeatedStratifiedKFold

In [119]:
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,Flatten,MaxPooling2D,Dropout,Dense,Activation,BatchNormalization

In [121]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
import tensorflow_hub as hub
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

In [60]:
from tqdm import tqdm
from tabulate import tabulate

In [35]:
# selecting random number of images from multiple folders and copying it to the destination folder

import os
import shutil
import glob
import random

ctscan_covid_data = random.sample(glob.glob("Hamza_Custom_Data/CT_Scan/Covid/*.png"), 330)
ctscan_normal_data = random.sample(glob.glob("Hamza_Custom_Data/CT_Scan/Normal/*"), 330)
    
ultrasound_covid_data = random.sample(glob.glob("Hamza_Custom_Data/Ultra_Sound/Covid/*"), 330)
ultrasound_normal_data = random.sample(glob.glob("Hamza_Custom_Data/Ultra_Sound/Normal/*.jpg"), 330)

xray_covid_data = random.sample(glob.glob("Hamza_Custom_Data/X_Ray/Covid/*.png"), 330)
xray_normal_data = random.sample(glob.glob("Hamza_Custom_Data/X_Ray/Normal/*.png"), 330)

    
data_list = [ctscan_covid_data, ctscan_normal_data, ultrasound_covid_data, ultrasound_normal_data, xray_covid_data,\
             xray_normal_data]

if os.path.isdir('Data/CT_Scan'):
    shutil.rmtree('Data/CT_Scan')
    
if os.path.isdir('Data/Ultra_Sound'):
    shutil.rmtree('Data/Ultra_Sound')
    
if os.path.isdir('Data/X_Ray'):
    shutil.rmtree('Data/X_Ray')


for data,label in zip(data_list,range(len(data_list))):
    if label == 0:
        dest = 'Data/CT_Scan/Covid'
    elif label == 1:
        dest = 'Data/CT_Scan/Normal'
    elif label == 2:
        dest = 'Data/Ultra_Sound/Covid'
    elif label == 3:
        dest = 'Data/Ultra_Sound/Normal'
    elif label == 4:
        dest = 'Data/X_Ray/Covid'
    elif label == 5:
        dest = 'Data/X_Ray/Normal'
    else:
        break
    
    os.makedirs(dest)
    for imgs in data:
        shutil.copy(imgs, dest)

In [36]:
class Dataset:
    def __init__(self, data_root: str, *, test_size: float, img_size: int, seed: int = 0) -> None:
        self.label2index = {}
        self.index2label = {}
        
        # Discover the class label names.
        class_labels = os.listdir(data_root)
        self.nclasses = len(class_labels)
        X, y = [], []
        
        for label_index, label in enumerate(class_labels):
            # Load the images for this class label.
            self.label2index[label_index] = label
            self.index2label[label] = label_index
            
            img_names = os.listdir(os.path.join(data_root, label))
            for img_name in img_names:
                img_path = os.path.join(data_root, label, img_name)
                img = load_img(img_path, target_size=(img_size, img_size, 3))
                X.append(img_to_array(img))
                y.append(label_index)
        
        X = np.array(X)
        y = np.array(y)
        one_hot_y = to_categorical(y, num_classes=self.nclasses)
        
        # Make a stratified split.
        self.X, self.X_test, self.labels, self.labels_test, self.y, self.y_test = train_test_split(
            X, y, one_hot_y, test_size=test_size, random_state=seed, stratify=y)

In [42]:
# 660 * 0.7 = 462 
# X shape in 3 dimensions
# Y has 2 classes (Covid, Normal)

ctscan_data = Dataset("Data/CT_Scan", test_size=0.3, img_size=224)
print(ctscan_data.X.shape, ctscan_data.y.shape)

ultrasound_data = Dataset("Data/Ultra_Sound", test_size=0.3, img_size=224)
print(ultrasound_data.X.shape, ultrasound_data.y.shape)

xray_data = Dataset("Data/X_Ray", test_size=0.3, img_size=224)
print(xray_data.X.shape, xray_data.y.shape)

(462, 224, 224, 3) (462, 2)
(462, 224, 224, 3) (462, 2)
(462, 224, 224, 3) (462, 2)


In [43]:
final_data = [ctscan_data,ultrasound_data,xray_data]

In [50]:
# feature extractor model resnet 101 v2
model = hub.KerasLayer("https://tfhub.dev/google/bit/m-r101x1/1", trainable=False)

for data,label in zip(final_data,range(len(final_data))):    
    if label == 0:            
        ctscan_embedding = model(data.X)
        ctscan_test_embedding = model(data.X_test)
        print(ctscan_embedding.shape, ctscan_test_embedding.shape)
    
    elif label == 1:        
        ultrasound_embedding = model(data.X)
        ultrasound_test_embedding = model(data.X_test)
        print(ultrasound_embedding.shape, ultrasound_test_embedding.shape)
    
    elif label == 2:        
        xray_embedding = model(data.X)
        xray_test_embedding = model(data.X_test)
        print(xray_embedding.shape, xray_test_embedding.shape)
        
    else:
        break

(462, 2048) (198, 2048)
(462, 2048) (198, 2048)
(462, 2048) (198, 2048)


In [113]:
def make_model(nclasses: int):
    model = Sequential()
    model.add(Conv2D(filters = 16, kernel_size = (3,3), padding='same', input_shape=(2,2,512), activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Conv2D(24, (3,3), padding='same', activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Conv2D(32, (3,3), padding='same', activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Conv2D(48, (3,3), padding='same', activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Conv2D(64, (3,3), padding='same', activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Flatten())
    model.add(Dense(128,activation="relu"))
    model.add(Dropout(0.3))
    model.add(Dense(128,activation="relu"))
    model.add(Dense(nclasses, activation="sigmoid"))
    model.summary()
    return model

In [114]:
def evaluate_model(nclasses, X, y, X_test, y_test, *, epochs: int, batch_size: int, learning_rate: float, 
                   **model_params) -> tuple:
    
    # Math to compute the learning rate schedule. We will divide our
    # learning rate by a factor of 10 every 30% of the optimizer's
    # total steps.
    steps_per_epoch = math.ceil(len(X) / batch_size)
    third_of_total_steps = math.floor(epochs * steps_per_epoch / 3)
    
    # Make and compile the model.
    #model = model_maker(nclasses, **model_params)
    model = make_model(nclasses)
    model.compile(
        optimizer=Adam(
            learning_rate=ExponentialDecay(
                learning_rate,
                decay_steps=third_of_total_steps,
                decay_rate=0.1,
                staircase=True
            )
        ),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    
    # Train the model on the training set and evaluate it on the test set.
    history = model.fit(X, y, batch_size=batch_size, epochs=epochs, verbose=0)
    _, train_acc = model.evaluate(X, y, batch_size=batch_size, verbose=0)
    _, test_acc = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
    return model, train_acc, test_acc

In [115]:
def cv_evaluate_model(
    X, y, labels, *, nfolds: int, nrepeats: int, epochs: int, batch_size: int,
    learning_rate: float, model_maker, verbose: bool = True, seed: int = 0,
    **model_params
) -> dict:
    """
    Performs `nfolds` cross-validated training and evaluation of a
    model hyperparameter configuration. Returns a dictionary of
    statistics about the outcome of the cross-validated experiment.
    """
    _, nclasses = y.shape
    train_accs, test_accs = [], []
    
    # Train and evaluate the model for each fold.
    for train_index, test_index in tqdm(
        RepeatedStratifiedKFold(
            n_splits=nfolds, n_repeats=nrepeats, random_state=seed
        ).split(X, labels),
        total=nfolds*nrepeats, disable=not verbose
    ):
        
        # Select the data for this fold.
        X_train_fold = tf.gather(X, train_index) 
        y_train_fold = tf.gather(y, train_index)
        X_test_fold = tf.gather(X, test_index)
        y_test_fold = tf.gather(y, test_index)
        
        # Train and evaluate the model.
        _, train_acc, test_acc = evaluate_model(
            nclasses,
            X_train_fold,
            y_train_fold,
            X_test_fold,
            y_test_fold,
            epochs=epochs,
            batch_size=batch_size,
            learning_rate=learning_rate,
            model_maker=model_maker,
            **model_params
        )
        train_accs.append(train_acc)
        test_accs.append(test_acc)
    
    # Aggregate.
    results = {
        "train_mean": np.mean(train_accs),
        "train_std": np.std(train_accs),
        "test_mean": np.mean(test_accs),
        "test_std": np.std(test_accs)
    }
    
    # Report.
    if verbose:
        print(
            tabulate(
                [
                    ["Train", results["train_mean"], results["train_std"]],
                    ["Test", results["test_mean"], results["test_std"]]
                ],
                headers=["Set", "Accuracy", "Standard Deviation"]
            )
        )
    
    return results

In [116]:
X_list = [ctscan_embedding, ultrasound_embedding, xray_embedding]
y_list = [ctscan_data.y, ultrasound_data.y, xray_data.y]
data_labels = [ctscan_data.labels, ultrasound_data.labels, xray_data.label2index]

In [130]:
ctscan_embedding2d = tf.reshape(ctscan_embedding, [-1,2,2,512])
ultrasound_embedding2d = tf.reshape(ultrasound_embedding, [-1,2,2,512])
xray_embedding2d = tf.reshape(xray_embedding, [-1,2,2,512])
print(ctscan_embedding2d.shape)

X_list = [ctscan_embedding2d, ultrasound_embedding2d, xray_embedding2d]

(462, 2, 2, 512)


In [131]:
default_cv_evaluate_params = {
    "X": X_list[0],
    "y": y_list[0],
    "labels": data_labels[0],
    "nfolds": 10,
    "nrepeats": 3,
    "model_maker": make_model,
    "epochs": 200,
    "batch_size": 32,
    "verbose": False,
    "learning_rate": 3e-3 #0.003
}

In [None]:
_ = cv_evaluate_model(
    **{
        **default_cv_evaluate_params,
        "verbose": True
    }
)

  0%|                                                                                           | 0/30 [00:00<?, ?it/s]

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_68 (Conv2D)           (None, 2, 2, 16)          73744     
_________________________________________________________________
batch_normalization_65 (Batc (None, 2, 2, 16)          64        
_________________________________________________________________
max_pooling2d_65 (MaxPooling (None, 2, 2, 16)          0         
_________________________________________________________________
conv2d_69 (Conv2D)           (None, 2, 2, 24)          3480      
_________________________________________________________________
batch_normalization_66 (Batc (None, 2, 2, 24)          96        
_________________________________________________________________
max_pooling2d_66 (MaxPooling (None, 2, 2, 24)          0         
_________________________________________________________________
conv2d_70 (Conv2D)           (None, 2, 2, 32)        

  3%|██▋                                                                               | 1/30 [01:54<55:12, 114.21s/it]

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_73 (Conv2D)           (None, 2, 2, 16)          73744     
_________________________________________________________________
batch_normalization_70 (Batc (None, 2, 2, 16)          64        
_________________________________________________________________
max_pooling2d_70 (MaxPooling (None, 2, 2, 16)          0         
_________________________________________________________________
conv2d_74 (Conv2D)           (None, 2, 2, 24)          3480      
_________________________________________________________________
batch_normalization_71 (Batc (None, 2, 2, 24)          96        
_________________________________________________________________
max_pooling2d_71 (MaxPooling (None, 2, 2, 24)          0         
_________________________________________________________________
conv2d_75 (Conv2D)           (None, 2, 2, 32)        

In [129]:
X = X_list[0]
y = y_list[0]
labels = data_labels[0]
train_accs, test_accs = [], []
history = []
    
# Train and evaluate the model for each fold.
for train_index, test_index in tqdm(
    RepeatedStratifiedKFold(
        n_splits=10, n_repeats=3, random_state=0
    ).split(X, labels),
    total=10*3, disable = not True #(verbose - True)
):

    # Select the data for this fold.
    X_train = tf.gather(X, train_index) 
    y_train = tf.gather(y, train_index)
    X_test = tf.gather(X, test_index)
    y_test = tf.gather(y, test_index)
    
    print(X_train.shape)
    print(X_test.shape)
    print(y_train.shape)
    print(y_test.shape)
    
    #Define Model
    model = Sequential()
    model.add(Conv2D(filters = 16, kernel_size = (3,3), padding='same', input_shape=(2,2,512), activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Conv2D(24, (3,3), padding='same', activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Conv2D(32, (3,3), padding='same', activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Conv2D(48, (3,3), padding='same', activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Conv2D(64, (3,3), padding='same', activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(1,1))
    model.add(Flatten())
    model.add(Dense(128,activation="relu"))
    model.add(Dropout(0.3))
    model.add(Dense(128,activation="relu"))
    model.add(Dense(2, activation="sigmoid"))

    #Learning Rate
    steps_per_epoch = math.ceil(len(X_train) / 64) #batch - 64
    third_of_total_steps = math.floor(100 * steps_per_epoch / 3) #epoch - 100
    
    # Train and evaluate the model.
    model.compile(
        optimizer=Adam(
            learning_rate=ExponentialDecay(
                0.0003,
                decay_steps=third_of_total_steps,
                decay_rate=0.1,
                staircase=True
            )
        ),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    
    # Train the model on the training set and evaluate it on the test set.
    history = (model.fit(X_train, y_train, batch_size=64, epochs=100, verbose=1, validation_data=(X_test, y_test)))
    train_loss, train_acc = model.evaluate(X_test, y_test, batch_size=64, verbose=0)
    test_loss, test_acc = model.evaluate(X_test, y_test, batch_size=64, verbose=0)
    
    train_accs.append(train_acc)
    test_accs.append(test_acc)

# Aggregate.
results = {
    "Train_Acc": np.mean(train_accs),
    "Train_std": np.std(train_accs),
    "Test_Acc": np.mean(test_accs),
    "Test_std": np.std(test_accs)
}

# Report.
if verbose:
    print(
        tabulate(
            [
                ["Train", results["Train_Acc"], results["Train_std"]],
                ["Test", results["Test_Acc"], results["Test_std"]]
            ],
            headers=["Set", "Accuracy", "Standard Deviation"]
        )
    )

  0%|                                                                                           | 0/30 [00:00<?, ?it/s]

(415, 2048)
(47, 2048)
(415, 2)
(47, 2)


  0%|                                                                                           | 0/30 [00:06<?, ?it/s]

Epoch 1/100





ValueError: in user code:

    C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py:787 train_step
        y_pred = self(x, training=True)
    C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\input_spec.py:229 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential_15 is incompatible with the layer: : expected min_ndim=4, found ndim=2. Full shape received: (None, 2048)
