# Import libraries and setting environment

In [2]:
import numpy as np
import pandas as pd
import os
import pickle
from pathlib import Path
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dropout, Flatten, Dense, Conv2D, MaxPool2D, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
import matplotlib.pyplot as plt
import warnings

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings('ignore')

# Define functions to load data

In [3]:
# Define functions to create a DataFrame with the filepath and the labels of the pictures
def proc_img(filepath):
    labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepath))

    filepath = pd.Series(filepath, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')

    # Concatenate filepath and labels
    df = pd.concat([filepath, labels], axis=1)

    # Shuffle the DataFrame and reset index
    df = df.sample(frac=1).reset_index(drop=True)

    return df


def proc_df(input_dir):
    path = Path(input_dir)
    images = os.listdir(input_dir)
    filepath = [str(path) + '/' + img_path for img_path in images]
    df = proc_img(filepath)
    return df

In [3]:
# import datasets
normal_df = proc_df(r"/kaggle/input/yihui-data/dataset/NORMAL")
covid_df = proc_df(r"/kaggle/input/yihui-data/dataset/COVID19")
pneumonia_df = proc_df(r"/kaggle/input/yihui-data/dataset/PNEUMONIA")
tb_df = proc_df(r"/kaggle/input/yihui-data/dataset/TUBERCULOSIS")

dataset_df = pd.concat([normal_df, covid_df, pneumonia_df, tb_df])
dataset_l = len(dataset_df.Label)
print(dataset_l)

7132


# Dividing train, validation, and test data

In [4]:
# extract test data
test_df = dataset_df.sample(n=int(dataset_l * 0.1), random_state=1109)
print(len(test_df.Label))
# extract other data
no_test_df = dataset_df.merge(test_df, how='outer', indicator=True).loc[lambda x: x['_merge'] == 'left_only']
no_test_df = no_test_df.iloc[:, :2]
# extract validation data
valid_df = no_test_df.sample(n=int(dataset_l * 0.1), random_state=1109)
print(len(valid_df))
# extract training data
train_df = no_test_df.append(valid_df).drop_duplicates(keep=False)
print(len(train_df))

# save data
test_df.to_csv('/kaggle/working/test_df.csv',sep=',', index=False, header=True)
valid_df.to_csv('/kaggle/working/valid_df.csv',sep=',', index=False, header=True)
train_df.to_csv('/kaggle/working/train_df.csv',sep=',', index=False, header=True)

713
713
5706


# Image generators


In [6]:
# load data
test_df = pd.read_csv('/kaggle/input/models/test_df.csv',sep=',')
valid_df = pd.read_csv('/kaggle/input/models/valid_df.csv',sep=',')
train_df = pd.read_csv('/kaggle/input/models/train_df.csv',sep=',')
print(test_df)

                                              Filepath         Label
0    /kaggle/input/yihui-data/dataset/PNEUMONIA/per...     PNEUMONIA
1    /kaggle/input/yihui-data/dataset/COVID19/COVID...       COVID19
2    /kaggle/input/yihui-data/dataset/PNEUMONIA/per...     PNEUMONIA
3    /kaggle/input/yihui-data/dataset/PNEUMONIA/per...     PNEUMONIA
4    /kaggle/input/yihui-data/dataset/TUBERCULOSIS/...  TUBERCULOSIS
..                                                 ...           ...
708  /kaggle/input/yihui-data/dataset/PNEUMONIA/per...     PNEUMONIA
709  /kaggle/input/yihui-data/dataset/PNEUMONIA/per...     PNEUMONIA
710  /kaggle/input/yihui-data/dataset/NORMAL/IM-059...        NORMAL
711  /kaggle/input/yihui-data/dataset/TUBERCULOSIS/...  TUBERCULOSIS
712  /kaggle/input/yihui-data/dataset/NORMAL/NORMAL...        NORMAL

[713 rows x 2 columns]


In [7]:
# Image generator
# Train generator
train_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1. / 255
)
valid_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1. / 255
)
test_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1. / 255
)

# Here we used 224 * 224 based on the previous result
train_images = train_generator.flow_from_dataframe(
    x_col='Filepath',
    y_col='Label',
    dataframe=train_df,
    target_size=(224, 224),
    color_mode="grayscale",
    batch_size=32,
    class_mode="categorical",
    shuffle=True, seed=1109
)

valid_images = valid_generator.flow_from_dataframe(
    x_col='Filepath',
    y_col='Label',
    dataframe=valid_df,
    target_size=(224, 224),
    color_mode="grayscale",
    batch_size=32,
    class_mode="categorical",
    shuffle=True, seed=1109
)

test_images = test_generator.flow_from_dataframe(
    x_col='Filepath',
    y_col='Label',
    dataframe=test_df,
    target_size=(224, 224),
    color_mode="grayscale",
    batch_size=32,
    class_mode="categorical",
    shuffle=True, seed=1109
)

Found 5706 validated image filenames belonging to 4 classes.
Found 713 validated image filenames belonging to 4 classes.
Found 713 validated image filenames belonging to 4 classes.


# Model selection
## Building the models

In [54]:
from keras.applications.mobilenet import MobileNet
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.inception_v3 import InceptionV3

In [55]:
# self-designed CNN model
self_model = Sequential(name="self_designed")
# convolutions and pooling
self_model.add(Conv2D(filters=64, strides=1, kernel_size=(5, 5), activation='relu', 
                      input_shape=(224, 224, 1,)))
self_model.add(MaxPool2D(3, 3))
self_model.add(Conv2D(filters=30, kernel_size=(3, 3), activation='relu'))
self_model.add(MaxPool2D(2, 2))
# flatten
self_model.add(Flatten())
# fully-connected
self_model.add(Dense(1024, activation='relu'))
self_model.add(Dropout(0.1))
self_model.add(Dense(256, activation='relu'))
self_model.add(Dense(64, activation='relu'))
# output layer
self_model.add(Dense(16, activation='relu'))
self_model.add(Dense(4, activation='softmax'))
# loss function
self_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
self_model.summary()

# function to create full model with different existing models
def create_full_model(base_model, base_model_name, optimizer='adam'):
    model = Sequential(name=base_model_name)
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    model.add(Dense(512, activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(4, activation = 'softmax'))
    # loss function
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, 
                  metrics=['accuracy'])
    print(f'{model.summary()}')
    return model

base_mobilenet_model = MobileNet(input_shape=(224, 224, 1,), 
                                 include_top = False, weights = None)
mobilenet_model = create_full_model(base_mobilenet_model, 
                                    "MobileNet", 
                                    optimizer='adam')

base_inceptionresnetv2_model = InceptionResNetV2(input_shape=(224, 224, 1,), 
                                 include_top = False, weights = None)
inceptionresnetv2_model = create_full_model(base_inceptionresnetv2_model, 
                                            "InceptionResNetV2",
                                            optimizer='adam')

base_inceptionv3_model = InceptionV3(input_shape=(224, 224, 1,), 
                                 include_top = False, weights = None)
inceptionv3_model = create_full_model(base_inceptionv3_model, 
                                      "InceptionV3",
                                      optimizer='adam')


Model: "self_designed"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3293 (Conv2D)         (None, 220, 220, 64)      1664      
_________________________________________________________________
max_pooling2d_114 (MaxPoolin (None, 73, 73, 64)        0         
_________________________________________________________________
conv2d_3294 (Conv2D)         (None, 71, 71, 30)        17310     
_________________________________________________________________
max_pooling2d_115 (MaxPoolin (None, 35, 35, 30)        0         
_________________________________________________________________
flatten_25 (Flatten)         (None, 36750)             0         
_________________________________________________________________
dense_260 (Dense)            (None, 1024)              37633024  
_________________________________________________________________
dropout_105 (Dropout)        (None, 1024)            

## Additional info

In [8]:
# Early stopping and hyperparameters
# Hyper parameters
STEP_SIZE_TRAIN = train_images.n // train_images.batch_size
STEP_SIZE_VALID = valid_images.n // valid_images.batch_size
# Stop the training when there is no improvement after 3 epochs trainings.
early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

## Training the models

In [9]:
# self-designed simple CNN model
self_model_history = self_model.fit(train_images, epochs=10, validation_data=valid_images, verbose=1, 
                    callbacks = early_stop)
self_model_result = self_model_history.history
self_model.evaluate(valid_images, steps=STEP_SIZE_VALID) # Evaluate the model
self_model.save("self_model.h5")
# save txt
with open('self_model_result.txt', 'wb') as file_pi:
    pickle.dump(self_model_result, file_pi)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
# mobilenet
mobilenet_model_history = mobilenet_model.fit(train_images, epochs=10, validation_data=valid_images, verbose=1, 
                    callbacks = early_stop)
mobilenet_model_result = mobilenet_model_history.history
mobilenet_model.evaluate(valid_images, steps=STEP_SIZE_VALID) # Evaluate the model
mobilenet_model.save("mobilenet_model.h5")
# save txt
with open('mobilenet_model_result.txt', 'wb') as file_pi:
    pickle.dump(mobilenet_model_result, file_pi)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


In [38]:
# inception resnet v2
inceptionresnetv2_model_history = inceptionresnetv2_model.fit(train_images, epochs=10, validation_data=valid_images, verbose=1, 
                    callbacks = early_stop)
inceptionresnetv2_model_result = inceptionresnetv2_model_history.history
inceptionresnetv2_model.evaluate(valid_images, steps=STEP_SIZE_VALID) # Evaluate the model
inceptionresnetv2_model.save("inceptionresnetv2_model.h5")
with open('inceptionresnetv2_model_result.txt', 'wb') as file_pi:
    pickle.dump(inceptionresnetv2_model_result, file_pi)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


In [39]:
# inception v3
inceptionv3_model_history = inceptionv3_model.fit(train_images, epochs=10, validation_data=valid_images, verbose=1, 
                    callbacks = early_stop)
inceptionv3_model_result = inceptionv3_model_history.history
inceptionv3_model.evaluate(valid_images, steps=STEP_SIZE_VALID) # Evaluate the model
inceptionv3_model.save("inceptionv3_model.h5")
with open('inceptionv3_model_result.txt', 'wb') as file_pi:
    pickle.dump(inceptionv3_model_result, file_pi)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Testing the models

In [8]:
self_model = keras.models.load_model("/kaggle/input/models/self_model.h5")
mobilenet_model = keras.models.load_model("/kaggle/input/models/mobilenet_model.h5")
inceptionresnetv2_model = keras.models.load_model("/kaggle/input/models/inceptionresnetv2_model.h5")
inceptionv3_model = keras.models.load_model("/kaggle/input/models/inceptionv3_model.h5")
densenet121_model = keras.models.load_model("/kaggle/input/models/densenet121_model.h5")

# Evaluate the label of the test_images
self_model.evaluate(test_images)
mobilenet_model.evaluate(test_images)
inceptionresnetv2_model.evaluate(test_images)
inceptionv3_model.evaluate(test_images)
densenet121_model.evaluate(test_images)



[0.18400059640407562, 0.946704089641571]