In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adamax

from sklearn.metrics import roc_auc_score, f1_score, classification_report, confusion_matrix

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Data loading and inspection

In [2]:
df_meta=pd.read_csv("/kaggle/input/coronahack-chest-xraydataset/Chest_xray_Corona_Metadata.csv",index_col=0)
df_meta.head()

Let's print two example images of the two classes.

In [3]:
normal=df_meta[df_meta["Label"]=="Normal"].iloc[0]
pnemonia=df_meta[df_meta["Label"]=="Pnemonia"].iloc[0]
plt.figure(figsize=(25,5))
plt.suptitle(" Normal(left) and Pnemonia (right) example images", fontsize="25")
plt.subplot(121)
plt.imshow(plt.imread("/kaggle/input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/"+
                      normal["Dataset_type"].lower()+"/"+normal["X_ray_image_name"]))

plt.subplot(122)
plt.imshow(plt.imread("/kaggle/input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/"+
                      pnemonia["Dataset_type"].lower()+"/"+pnemonia["X_ray_image_name"]))

How are the images distributed among the classes?

In [4]:
plt.figure(figsize=(10,10))
df_meta[df_meta["Dataset_type"]=="TRAIN"]["Label"].value_counts().plot.bar(rot=0)
plt.tick_params(labelsize=25)

# Data preprocessing

Let's procede with image augmentation.

In [5]:
train_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.xception.preprocess_input,zoom_range=0.1,brightness_range=[0.5,1.3],
                                   width_shift_range=0.1,height_shift_range=0.1,validation_split=0.1)
test_datagen=ImageDataGenerator(preprocessing_function=tf.keras.applications.xception.preprocess_input)

The last thing to do before we work on the model is to read the images through the generators.

In [6]:
BATCH_SIZE=64
path="../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/"
train_images=train_datagen.flow_from_dataframe(dataframe=df_meta[df_meta["Dataset_type"]=="TRAIN"],x_col='X_ray_image_name',y_col='Label',color_mode='rgb',class_mode='categorical',
                                                target_size=(150,150),batch_size=BATCH_SIZE,shuffle=True,seed=1234,subset='training', directory=path+"/train")

val_images=train_datagen.flow_from_dataframe(dataframe=df_meta[df_meta["Dataset_type"]=="TRAIN"],x_col='X_ray_image_name',y_col='Label',color_mode='rgb',class_mode='categorical',
                                                target_size=(150,150),batch_size=BATCH_SIZE,shuffle=True,seed=1234,subset='validation', directory=path+"/train")

test_images = test_datagen.flow_from_dataframe(dataframe=df_meta[df_meta["Dataset_type"]=="TEST"],x_col='X_ray_image_name',y_col='Label',color_mode='rgb',class_mode='categorical',
                                                target_size=(150,150),batch_size=BATCH_SIZE,shuffle=False, directory=path+"/test")

# Model definition

We want to apply the transfer learning using the Xception as a neural net with imagenet's weights.

In [7]:
inputs = tf.keras.layers.Input((150,150,3))
base_model=tf.keras.applications.xception.Xception(include_top=False, weights="imagenet",input_shape=(150,150,3), pooling='avg') 
x=base_model(inputs)
output=layers.Dense(2, activation='sigmoid')(x)
model=tf.keras.models.Model(inputs=inputs, outputs=output)

# Model training and evaluation

In [8]:
model.compile(Adamax(learning_rate=1e-4), loss='binary_crossentropy',metrics=['accuracy'])
history = model.fit(train_images, validation_data=val_images, epochs=30)

In [9]:
fig, axes = plt.subplots(1, 2, figsize=(30, 10))
with plt.style.context(plt.style.available[8]):
    plt.suptitle("Accuracy and loss of train and validation set for each epoch.",fontsize=25)
    axes[0].plot(history.history["accuracy"],label="Train accuracy")
    axes[0].plot(history.history["val_accuracy"],label="Validation accuracy")
    axes[0].legend(fontsize=25)
    axes[0].tick_params(axis="both", labelsize=20)
    axes[0].set_xlabel("Epochs",fontsize=25)
    axes[0].set_ylabel("Accuracy",fontsize=25)
    
    axes[1].plot(history.history["loss"],label="Train loss")
    axes[1].plot(history.history["val_loss"],label="Validation loss")
    axes[1].legend(fontsize=25)
    axes[1].tick_params(axis="both", labelsize=20)
    axes[1].set_xlabel("Epochs",fontsize=25)
    axes[0].set_ylabel("Loss",fontsize=25)
    

In [10]:
preds=model.predict(test_images)
preds = np.argmax(preds,axis=1)
preds=preds>0.5
gt=[0 if x=="Normal" else 1 for x in df_meta[df_meta["Dataset_type"]=="TEST"]["Label"]]
print("Results on test set:")
print(classification_report(gt,preds,target_names=["Normal","Phemonia"]))
print("ROC AUC score:   ",roc_auc_score(gt,preds))
print("F1 score:",f1_score(gt,preds))

In [11]:
conf_matrix = confusion_matrix(gt, preds)
sns.heatmap(conf_matrix,xticklabels = ["Normal","Phemonia"], yticklabels =["Normal","Phemonia"],annot=True,fmt='g')
plt.title('Confusion Matrix')
plt.show()