# Import libraries and setting environment

In [2]:
import numpy as np
import pandas as pd
import os
import pickle
from pathlib import Path
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dropout, Flatten, Dense, Conv2D, MaxPool2D, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
import matplotlib.pyplot as plt
import warnings

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings('ignore')

# Define functions to load data

In [3]:
# Define functions to create a DataFrame with the filepath and the labels of the pictures
def proc_img(filepath):
    labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepath))

    filepath = pd.Series(filepath, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')

    # Concatenate filepath and labels
    df = pd.concat([filepath, labels], axis=1)

    # Shuffle the DataFrame and reset index
    df = df.sample(frac=1).reset_index(drop=True)

    return df


def proc_df(input_dir):
    path = Path(input_dir)
    images = os.listdir(input_dir)
    filepath = [str(path) + '/' + img_path for img_path in images]
    df = proc_img(filepath)
    return df

In [3]:
# import datasets
normal_df = proc_df(r"/kaggle/input/segmented-datasets/NORMAL")
covid_df = proc_df(r"/kaggle/input/segmented-datasets/COVID19")
pneumonia_df = proc_df(r"/kaggle/input/segmented-datasets/PNEUMONIA")
tb_df = proc_df(r"/kaggle/input/segmented-datasets/TUBERCULOSIS")

dataset_df = pd.concat([normal_df, covid_df, pneumonia_df, tb_df])
dataset_l = len(dataset_df.Label)
print(dataset_l)

7132


# Dividing train, validation, and test data

In [6]:
# extract test data
test_df = dataset_df.sample(n=int(dataset_l * 0.1), random_state=1109)
print(len(test_df.Label))
# extract other data
no_test_df = dataset_df.merge(test_df, how='outer', indicator=True).loc[lambda x: x['_merge'] == 'left_only']
no_test_df = no_test_df.iloc[:, :2]
# extract validation data
valid_df = no_test_df.sample(n=int(dataset_l * 0.1), random_state=1109)
print(len(valid_df))
# extract training data
train_df = no_test_df.append(valid_df).drop_duplicates(keep=False)
print(len(train_df))

# save data
test_df.to_csv('/kaggle/working/seg_test_df.csv',sep=',', index=False, header=True)
valid_df.to_csv('/kaggle/working/seg_valid_df.csv',sep=',', index=False, header=True)
train_df.to_csv('/kaggle/working/seg_train_df.csv',sep=',', index=False, header=True)

713
713
5706


# Image generators


In [4]:
# load data
test_df = pd.read_csv('/kaggle/input/models/seg_test_df.csv',sep=',')
valid_df = pd.read_csv('/kaggle/input/models/seg_valid_df.csv',sep=',')
train_df = pd.read_csv('/kaggle/input/models/seg_train_df.csv',sep=',')
print(test_df)

                                              Filepath         Label
0    /kaggle/input/segmented-datasets/PNEUMONIA/per...     PNEUMONIA
1    /kaggle/input/segmented-datasets/COVID19/COVID...       COVID19
2    /kaggle/input/segmented-datasets/PNEUMONIA/per...     PNEUMONIA
3    /kaggle/input/segmented-datasets/PNEUMONIA/per...     PNEUMONIA
4    /kaggle/input/segmented-datasets/TUBERCULOSIS/...  TUBERCULOSIS
..                                                 ...           ...
708  /kaggle/input/segmented-datasets/PNEUMONIA/per...     PNEUMONIA
709  /kaggle/input/segmented-datasets/PNEUMONIA/per...     PNEUMONIA
710  /kaggle/input/segmented-datasets/NORMAL/NORMAL...        NORMAL
711  /kaggle/input/segmented-datasets/TUBERCULOSIS/...  TUBERCULOSIS
712  /kaggle/input/segmented-datasets/NORMAL/NORMAL...        NORMAL

[713 rows x 2 columns]


In [5]:
# Image generator
# Train generator
train_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1. / 255
)
valid_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1. / 255
)
test_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1. / 255
)

# Here we used 224 * 224 based on the previous result
train_images = train_generator.flow_from_dataframe(
    x_col='Filepath',
    y_col='Label',
    dataframe=train_df,
    target_size=(224, 224),
    color_mode="grayscale",
    batch_size=32,
    class_mode="categorical",
    shuffle=True, seed=1109
)

valid_images = valid_generator.flow_from_dataframe(
    x_col='Filepath',
    y_col='Label',
    dataframe=valid_df,
    target_size=(224, 224),
    color_mode="grayscale",
    batch_size=32,
    class_mode="categorical",
    shuffle=True, seed=1109
)

test_images = test_generator.flow_from_dataframe(
    x_col='Filepath',
    y_col='Label',
    dataframe=test_df,
    target_size=(224, 224),
    color_mode="grayscale",
    batch_size=32,
    class_mode="categorical",
    shuffle=True, seed=1109
)

Found 5706 validated image filenames belonging to 4 classes.
Found 713 validated image filenames belonging to 4 classes.
Found 713 validated image filenames belonging to 4 classes.


# Model selection
## Building the models

In [6]:
from keras.applications.mobilenet import MobileNet
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.inception_v3 import InceptionV3

In [7]:
# self-designed CNN model
seg_self_model = Sequential(name="self_designed")
# convolutions and pooling
seg_self_model.add(Conv2D(filters=64, strides=1, kernel_size=(5, 5), activation='relu', 
                      input_shape=(224, 224, 1,)))
seg_self_model.add(MaxPool2D(3, 3))
seg_self_model.add(Conv2D(filters=30, kernel_size=(3, 3), activation='relu'))
seg_self_model.add(MaxPool2D(2, 2))
# flatten
seg_self_model.add(Flatten())
# fully-connected
seg_self_model.add(Dense(1024, activation='relu'))
seg_self_model.add(Dropout(0.1))
seg_self_model.add(Dense(256, activation='relu'))
seg_self_model.add(Dense(64, activation='relu'))
# output layer
seg_self_model.add(Dense(16, activation='relu'))
seg_self_model.add(Dense(4, activation='softmax'))
# loss function
seg_self_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
seg_self_model.summary()

# function to create full model with different existing models
def create_full_model(base_model, base_model_name, optimizer='adam'):
    model = Sequential(name=base_model_name)
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    model.add(Dense(512, activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(4, activation = 'softmax'))
    # loss function
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, 
                  metrics=['accuracy'])
    print(f'{model.summary()}')
    return model

base_mobilenet_model = MobileNet(input_shape=(224, 224, 1,), 
                                 include_top = False, weights = None)
seg_mobilenet_model = create_full_model(base_mobilenet_model, 
                                    "MobileNet", 
                                    optimizer='adam')

base_inceptionresnetv2_model = InceptionResNetV2(input_shape=(224, 224, 1,), 
                                 include_top = False, weights = None)
seg_inceptionresnetv2_model = create_full_model(base_inceptionresnetv2_model, 
                                            "InceptionResNetV2",
                                            optimizer='adam')

base_inceptionv3_model = InceptionV3(input_shape=(224, 224, 1,), 
                                 include_top = False, weights = None)
seg_inceptionv3_model = create_full_model(base_inceptionv3_model, 
                                      "InceptionV3",
                                      optimizer='adam')


Model: "self_designed"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 220, 220, 64)      1664      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 73, 73, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 71, 71, 30)        17310     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 35, 35, 30)        0         
_________________________________________________________________
flatten (Flatten)            (None, 36750)             0         
_________________________________________________________________
dense (Dense)                (None, 1024)              37633024  
_________________________________________________________________
dropout (Dropout)            (None, 1024)            

## Additional info

In [9]:
# Early stopping and hyperparameters
# Hyper parameters
STEP_SIZE_TRAIN = train_images.n // train_images.batch_size
STEP_SIZE_VALID = valid_images.n // valid_images.batch_size
# Stop the training when there is no improvement after 3 epochs trainings.
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

## Training the models

In [29]:
# self-designed simple CNN model
seg_self_model_history = seg_self_model.fit(train_images, epochs=20, validation_data=valid_images, verbose=1, 
                    callbacks = early_stop)
seg_self_model_result = seg_self_model_history.history
seg_self_model.evaluate(valid_images, steps=STEP_SIZE_VALID) # Evaluate the model
seg_self_model.save("seg_self_model.h5")
# save txt
with open('seg_self_model_result.txt', 'wb') as file_pi:
    pickle.dump(seg_self_model_result, file_pi)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


In [11]:
# mobilenet
seg_mobilenet_model_history = seg_mobilenet_model.fit(train_images, epochs=20, validation_data=valid_images, verbose=1, 
                    callbacks = early_stop)
seg_mobilenet_model_result = seg_mobilenet_model_history.history
seg_mobilenet_model.evaluate(valid_images, steps=STEP_SIZE_VALID) # Evaluate the model
seg_mobilenet_model.save("seg_mobilenet_model.h5")
# save txt
with open('seg_mobilenet_model_result.txt', 'wb') as file_pi:
    pickle.dump(seg_mobilenet_model_result, file_pi)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20


In [10]:
# inception resnet v2
seg_inceptionresnetv2_model_history = seg_inceptionresnetv2_model.fit(train_images, epochs=20, validation_data=valid_images, verbose=1, 
                    callbacks = early_stop)
seg_inceptionresnetv2_model_result = seg_inceptionresnetv2_model_history.history
seg_inceptionresnetv2_model.evaluate(valid_images, steps=STEP_SIZE_VALID) # Evaluate the model
seg_inceptionresnetv2_model.save("seg_inceptionresnetv2_model.h5")
with open('seg_inceptionresnetv2_model_result.txt', 'wb') as file_pi:
    pickle.dump(seg_inceptionresnetv2_model_result, file_pi)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20


In [8]:
# inception v3
seg_inceptionv3_model_history = seg_inceptionv3_model.fit(train_images, epochs=20, validation_data=valid_images, verbose=1, 
                    callbacks = early_stop)
seg_inceptionv3_model_result = seg_inceptionv3_model_history.history
seg_inceptionv3_model.evaluate(valid_images, steps=STEP_SIZE_VALID) # Evaluate the model
seg_inceptionv3_model.save("seg_inceptionv3_model.h5")
with open('seg_inceptionv3_model_result.txt', 'wb') as file_pi:
    pickle.dump(seg_inceptionv3_model_result, file_pi)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20


## Testing the models

In [8]:
# Load models
seg_self_model = keras.models.load_model("/kaggle/input/models/seg_self_model.h5")

seg_mobilenet_model = keras.models.load_model("/kaggle/input/models/seg_mobilenet_model.h5")
seg_inceptionresnetv2_model = keras.models.load_model("/kaggle/input/models/seg_inceptionresnetv2_model.h5")
seg_inceptionv3_model = keras.models.load_model("/kaggle/input/models/seg_inceptionv3_model.h5")

# Evaluate the label of the test_images
seg_self_model.evaluate(test_images)

seg_mobilenet_model.evaluate(test_images)
seg_inceptionresnetv2_model.evaluate(test_images)
seg_inceptionv3_model.evaluate(test_images)



[0.18400059640407562, 0.946704089641571]