In [1]:
# Base packages
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import shutil

# More packages
import cv2
import matplotlib.pyplot as plt

# Keras
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.layers import Activation, Dropout, Flatten, Dense, Input
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import ResNet50
from keras.applications.resnet50 import preprocess_input

In [2]:
# Reading through the metadata
summary = pd.read_csv('/kaggle/input/coronahack-chest-xraydataset/Chest_xray_Corona_dataset_Summary.csv')
df = pd.read_csv('/kaggle/input/coronahack-chest-xraydataset/Chest_xray_Corona_Metadata.csv')

replace_dict = {'Pnemonia':1,
                'Normal':0}
df['Label'] = df['Label'].replace(replace_dict)

train_df = df[df.Dataset_type=='TRAIN']
test_df = df[df.Dataset_type=='TEST']

In [3]:
# Defining the path to Train and Test directories
training_data_path = '../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train'
testing_data_path = '../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/test'

In [4]:
# Funtions for Making nd Removing subdirectories
def create_dir():
    try:
        os.makedirs('/kaggle/working/train/Pneumonia')
        os.makedirs('/kaggle/working/train/Normal')
        os.makedirs('/kaggle/working/val/Pneumonia')
        os.makedirs('/kaggle/working/val/Normal')
        os.makedirs('/kaggle/working/test/Pneumonia')
        os.makedirs('/kaggle/working/test/Normal')
    except:
        pass
def remove_dir():
    try:
        shutil.rmtree('/kaggle/working/train')
        shutil.rmtree('/kaggle/working/test')    
    except:
        pass

In [5]:
# Seperate dataframes for different labels in test and train
train_pneumonia_df = train_df[train_df.Label==1]
train_normal_df = train_df[train_df.Label==0]
test_pneumonia_df = test_df[test_df.Label==1]
test_normal_df = test_df[test_df.Label==0]

In [6]:
ntrain_p = len(train_pneumonia_df)
ntrain_n = len(train_normal_df)
tntrain = ntrain_p+ntrain_n

#Take 10% from train to be validation

nval_p = round(0.1*ntrain_p)
nval_n = round(0.1*ntrain_n)

print(nval_p)
print(nval_n)

In [7]:
val_pneumonia_df = train_pneumonia_df[0:nval_p]
train_pneumonia_df = train_pneumonia_df[nval_p:]

val_normal_df = train_normal_df[0:nval_n]
train_normal_df = train_normal_df[nval_n:]

In [8]:
# Copying the files to newly created locations. You may use Flow from dataframe attribute and skip all these steps. But I prefer to use flow from directory 
remove_dir()
create_dir()

training_images_pneumonia = train_pneumonia_df.X_ray_image_name.values.tolist()
training_images_normal = train_normal_df.X_ray_image_name.values.tolist()

val_images_pneumonia = val_pneumonia_df.X_ray_image_name.values.tolist()
val_images_normal = val_normal_df.X_ray_image_name.values.tolist()

testing_images_pneumonia = test_pneumonia_df.X_ray_image_name.values.tolist()
testing_images_normal = test_normal_df.X_ray_image_name.values.tolist()

for image in training_images_pneumonia:
    train_image_pneumonia = os.path.join(training_data_path, str(image))
    shutil.copy(train_image_pneumonia, '/kaggle/working/train/Pneumonia')
    
for image in training_images_normal:
    train_image_normal = os.path.join(training_data_path, str(image))
    shutil.copy(train_image_normal, '/kaggle/working/train/Normal')
    
for image in val_images_pneumonia:
    val_image_pneumonia = os.path.join(training_data_path, str(image))
    shutil.copy(val_image_pneumonia, '/kaggle/working/val/Pneumonia')
    
for image in val_images_normal:
    val_image_normal = os.path.join(training_data_path, str(image))
    shutil.copy(val_image_normal, '/kaggle/working/val/Normal')
    
for image in testing_images_pneumonia:
    test_image_pneumonia = os.path.join(testing_data_path, str(image))
    shutil.copy(test_image_pneumonia, '/kaggle/working/test/Pneumonia')

for image in testing_images_normal:
    test_image_normal = os.path.join(testing_data_path, str(image))
    shutil.copy(test_image_normal, '/kaggle/working/test/Normal')

In [9]:
# Model configuration
batch_size = 32
img_width, img_height, img_num_channels = 224,224,3
verbosity = 1
input_shape = (img_width, img_height, img_num_channels)

In [10]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   rotation_range=0.2,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest')
train_generator = train_datagen.flow_from_directory('/kaggle/working/train',
                                                    target_size=(224,224),
                                                    batch_size=batch_size,
                                                    class_mode='binary')

valid_datagen = ImageDataGenerator(rescale=1./255)
valid_generator = valid_datagen.flow_from_directory('/kaggle/working/val',
                                                    target_size=(224,224),
                                                    batch_size=batch_size,
                                                    class_mode='binary')

In [11]:
#Model 1
base_model = ResNet50(input_shape=(224, 224, 3), include_top=False, weights="imagenet", pooling = "max")

inputs = Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = Dropout(0.3)(x)
x = Dense(1024, activation = "relu")(x)
x = Dropout(0.3)(x)
x = Dense(512, activation = "relu")(x)
x = Dropout(0.3)(x)
outputs = Dense(1, activation = "sigmoid")(x)

model = keras.Model(inputs,outputs)

base_model.trainable = False
model.compile(optimizer=keras.optimizers.Adam(lr=0.0001),loss='binary_crossentropy',metrics=['accuracy'])

In [12]:
model.summary()

In [13]:
model.fit(train_generator,
          steps_per_epoch=train_generator.samples//batch_size,
          epochs = 50,
          validation_data=valid_generator,
          validation_steps=valid_generator.samples//batch_size)


base_model.trainable = True
model.compile(optimizer=keras.optimizers.Adam(lr=0.00001),loss='binary_crossentropy',metrics=['accuracy'])

model.fit(train_generator,
          steps_per_epoch=train_generator.samples//batch_size,
          epochs = 5,
          validation_data=valid_generator,
          validation_steps=valid_generator.samples//batch_size)

In [14]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory('/kaggle/working/test',
                                                    target_size=(224,224),
                                                    batch_size=batch_size,
                                                    class_mode='binary')

model.evaluate(test_generator)