In [1]:
import os
import time
import glob
import cv2
import tqdm
import numpy as np
import pandas as pd
import tensorflow as tf 
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')

try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
    print('No GPU detected')

In [3]:
training_data_path = r'.\Pneumonia\train' 
testing_data_path = r'.\Pneumonia\test'
validation_data_path = r'.\Pneumonia\val'

In [4]:
def createDataframe(path):
    files_normal = glob.glob(os.path.join(path, 'NORMAL') + r'\*.*')
    files_pneunomia = glob.glob(os.path.join(path, 'PNEUMONIA') + r'\*.*')
    
    files_normal = files_normal
    files_pneunomia = files_pneunomia
    
    def transformImage(file_name: str):
        img = cv2.imread(file_name)
        img = cv2.resize(img, (120, 120))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if (img.shape[2] == 1):
            print('Anomaly detected!!!')
        return img

    images = []
    labels = []
    for file in tqdm.tqdm(files_normal):
        img = transformImage(file)
        images.append(img)
        labels.append(0)
    for file in tqdm.tqdm(files_pneunomia):
        img = transformImage(file)
        images.append(img)
        labels.append(1)

    dataframe = pd.DataFrame()
    dataframe['images'] = images
    dataframe['labels'] = labels
    dataframe = dataframe.sample(frac=1).reset_index(drop=True)
    return dataframe

dataframe_training = createDataframe(training_data_path)
dataframe_validation = createDataframe(validation_data_path)

100%|██████████| 1000/1000 [00:41<00:00, 24.27it/s]
100%|██████████| 1000/1000 [00:19<00:00, 52.44it/s]
100%|██████████| 308/308 [00:10<00:00, 28.41it/s]
100%|██████████| 308/308 [00:03<00:00, 82.27it/s]


In [5]:
X_train, y_train = np.stack(dataframe_training['images'] / 255).astype('float32'), dataframe_training['labels']
X_test, y_test = np.stack(dataframe_validation['images'] / 255).astype('float32'), dataframe_validation['labels']

X_train = np.reshape(X_train, (X_train.shape[0], 120, 120, 3))
y_train = tf.keras.utils.to_categorical(y_train)

X_test = np.reshape(X_test, (X_test.shape[0], 120, 120, 3))
y_test = tf.keras.utils.to_categorical(y_test)

In [6]:
def buildModel():
    input_layer = tf.keras.layers.Input(shape=(120, 120, 3), name='input_layer_1')
    model = tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', name='conv_b1_1')(input_layer)
    model = tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', name='conv_b1_2')(model)
    model = tf.keras.layers.MaxPooling2D((2, 2), name='pool_1')(model)
    
    model = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', activation='relu', name='conv_b2_1')(model)
    model = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', activation='relu', name='conv_b2_2')(model)
    model = tf.keras.layers.MaxPooling2D((2, 2), name='pool_2')(model)
    
    model = tf.keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu', name='conv_b3_1')(model)
    model = tf.keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu', name='conv_b3_2')(model)
    model = tf.keras.layers.MaxPooling2D((2, 2), name='pool_3')(model)
    
    model = tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu', name='conv_b4_1')(model)
    model = tf.keras.layers.BatchNormalization(name='bn_1')(model)
    model = tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu', name='conv_b4_2')(model)
    model = tf.keras.layers.BatchNormalization(name='bn_2')(model)
    model = tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu', name='conv_b4_3')(model)
    model = tf.keras.layers.MaxPooling2D((2, 2), name='pool_4')(model)
    
    model = tf.keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu', name='conv_b5_1')(model)
    model = tf.keras.layers.BatchNormalization(name='bn_3')(model)
    model = tf.keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu', name='conv_b5_2')(model)
    model = tf.keras.layers.BatchNormalization(name='bn_4')(model)
    model = tf.keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu', name='conv_b5_3')(model)
    model = tf.keras.layers.MaxPooling2D((2, 2), name='pool_5')(model)
    
    model = tf.keras.layers.Flatten()(model)
    model = tf.keras.layers.Dense(1024, activation='relu', name='fc_1')(model)
    model = tf.keras.layers.Dropout(0.4)(model)
    model = tf.keras.layers.Dense(512, activation='relu', name='fc_2')(model)
    model = tf.keras.layers.Dropout(0.4)(model)
    output_layer = tf.keras.layers.Dense(2, activation='softmax', name='fc_3')(model)
    
    model = tf.keras.models.Model(input_layer, output_layer)
    return model

In [7]:
model_conv = buildModel()
model_conv.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer_1 (InputLayer)   [(None, 120, 120, 3)]     0         
_________________________________________________________________
conv_b1_1 (Conv2D)           (None, 120, 120, 32)      896       
_________________________________________________________________
conv_b1_2 (Conv2D)           (None, 120, 120, 32)      9248      
_________________________________________________________________
pool_1 (MaxPooling2D)        (None, 60, 60, 32)        0         
_________________________________________________________________
conv_b2_1 (Conv2D)           (None, 60, 60, 64)        18496     
_________________________________________________________________
conv_b2_2 (Conv2D)           (None, 60, 60, 64)        36928     
_________________________________________________________________
pool_2 (MaxPooling2D)        (None, 30, 30, 64)       

In [8]:
class TimeLogger(tf.keras.callbacks.Callback):
    def __init__(self, name:str):
        self.log_name = name
        self.start_time = None
        self.log_time = []
        self.log_epoch = []
        
    def on_epoch_begin(self, epoch, logs=None):
        self.start_time = time.time()
    
    def on_epoch_end(self, epoch, logs=None):
        self.log_epoch.append(epoch)
        self.log_time.append(time.time() - self.start_time)
        
    def on_train_end(self, logs=None):
        dataframe_log = pd.DataFrame()
        dataframe_log['epochs'] = np.array(self.log_epoch) 
        dataframe_log['duration'] = np.array(self.log_time)
        dataframe_log.to_csv('log_duration\\' + self.log_name + '.csv', index=False)
        
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1, min_lr=1e-7)

In [9]:
model_conv.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), 
                       loss=tf.keras.losses.CategoricalCrossentropy(),
                           metrics=['accuracy'])

In [10]:
%%time
duration_callback = TimeLogger('Conv_duration_log')
history = model_conv.fit(X_train, y_train,
                          epochs=50, batch_size=64,
                              validation_data=(X_test, y_test),
                                  callbacks=[early_stopping, reduce_lr, duration_callback], verbose=1)
pd.DataFrame.from_dict(history.history).to_csv(r'log_history\history_conv.csv', index=False)
tf.keras.models.save_model(model_conv, r'log_weights\model_conv.h5')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0019999999552965165.
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0003999999724328518.
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 00032: ReduceLROnPlateau reducing learning rate to 7.999999215826393e-05.
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 00038: ReduceLROnPlateau reducing learning rate to 1.599999814061448e-05.
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 00041: ReduceLROnPlateau reducing learning rate to 3.199999628122896e-06.
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 00044: ReduceLROnPlateau reducing learning rate to 6.399999165296323e-07.
W