In [3]:
import pandas as pd
import numpy as np
import os, os.path as op
import tensorflow as tf
import tensorflow.keras as keras
import plotly.graph_objects as go

from sklearn.model_selection import train_test_split

import glob

data: https://www.kaggle.com/praveengovi/coronahack-chest-xraydataset 

In [30]:
df = pd.read_csv('Chest_xray_Corona_Metadata.csv')

train_df = df[df['Dataset_type'] == 'TRAIN']
test_df = df[df['Dataset_type'] == 'TEST']


In [27]:
train_path = 'Coronahack-Chest-XRay-Dataset\\Coronahack-Chest-XRay-Dataset\\train'
test_path = 'Coronahack-Chest-XRay-Dataset\\Coronahack-Chest-XRay-Dataset\\test' 

In [31]:
filled_train_df = train_df.fillna('Unknown')
filled_test_df = test_df.fillna('Unknown')

final_train_df = filled_train_df[['X_ray_image_name', 'Label']]
final_test_df = test_df.fillna('Unknown')

final_train_df.shape

(5286, 2)

In [33]:
train_data, valid_data = train_test_split(final_train_df, test_size=0.2, stratify=final_train_df['Label'], random_state=42)
print(train_data['Label'].value_counts())
print(valid_data['Label'].value_counts())

Pnemonia    3155
Normal      1073
Name: Label, dtype: int64
Pnemonia    789
Normal      269
Name: Label, dtype: int64


In [34]:
train_data.head()

Unnamed: 0,X_ray_image_name,Label
3743,person436_virus_886.jpeg,Pnemonia
1081,NORMAL2-IM-1102-0001.jpeg,Normal
2762,person1670_bacteria_4425.jpeg,Pnemonia
1701,person1239_virus_2099.jpeg,Pnemonia
746,NORMAL2-IM-0600-0001.jpeg,Normal


In [55]:
gen =keras.preprocessing.image.ImageDataGenerator(rescale=1/255.)

train_img = gen.flow_from_dataframe(
    dataframe=train_data,
    directory=train_path,
    x_col='X_ray_image_name',
    y_col='Label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary', 
    color_mode='grayscale',
    shuffle=True
)

Found 0 validated image filenames belonging to 0 classes.




In [56]:
test_img = gen.flow_from_dataframe(
    dataframe=final_test_df,
    directory=test_path,
    x_col='X_ray_image_name',
    y_col='Label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    color_mode='grayscale',
    shuffle=False
)

Found 624 validated image filenames belonging to 2 classes.


In [35]:
valid_data.head()

Unnamed: 0,X_ray_image_name,Label
3481,person367_bacteria_1665.jpeg,Pnemonia
1128,NORMAL2-IM-1122-0001.jpeg,Normal
1897,person1343_virus_2316.jpeg,Pnemonia
2314,person1497_bacteria_3912.jpeg,Pnemonia
1072,NORMAL2-IM-1024-0001.jpeg,Normal


In [52]:
valid_img = gen.flow_from_dataframe(
    dataframe=valid_data,
    directory=train_path,
    x_col='X_ray_image_name',
    y_col='Label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary', 
    color_mode='grayscale',
    shuffle=True)

Found 1058 validated image filenames belonging to 2 classes.


In [4]:
def create_model():
    model = keras.Sequential([
        keras.layers.Conv2D(filters= 10, kernel_size=(3,3), strides = 1, padding='valid',input_shape=(224,224,1), activation='selu'),
        keras.layers.MaxPooling2D(pool_size=(2,2)),
        keras.layers.Conv2D(filters= 10, kernel_size=(3,3), strides = 1, padding='valid', activation='selu'),
        keras.layers.MaxPooling2D(pool_size=(2,2)),
        keras.layers.Conv2D(filters= 10, kernel_size=(3,3), strides = 1, padding='valid', activation='selu'),
        keras.layers.Flatten(),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

In [5]:
create_model().summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 222, 222, 10)      100       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 111, 111, 10)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 109, 109, 10)      910       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 54, 54, 10)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 52, 52, 10)        910       
_________________________________________________________________
flatten (Flatten)            (None, 27040)             0         
_________________________________________________________________
dense (Dense)                (None, 1)                 2

In [47]:
metrics = [
    keras.metrics.TruePositives(name='tp'),
    keras.metrics.FalsePositives(name='fp'),
    keras.metrics.TrueNegatives(name='tn'),
    keras.metrics.FalseNegatives(name='fn'), 
    'accuracy',
    keras.metrics.Precision(name='precision'),
    keras.metrics.Recall(name='recall'),
    keras.metrics.AUC(name='auc', curve='ROC')
]
callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)]
callbacks_baseline = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]

In [48]:
model = create_model()
model.compile(loss = 'binary_crossentropy', optimizer= 'Adam', metrics = metrics)

In [53]:
history = model.fit(train_img, epochs=10, validation_data=valid_img, steps_per_epoch=len(train_img), validation_steps=len(valid_img), callbacks=callbacks_baseline)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [57]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('fivethirtyeight')


In [68]:
def plot_learning_curves(*histories):
    """
    Returns separate loss curves for training and validation metrics.
    """ 
    from plotly.subplots import make_subplots
    fig = make_subplots(rows=3, cols=1, subplot_titles=('Loss', 'Recall', 'AUC'))
    for history in histories:
        loss = history[1].history.get('loss')
        val_loss = history[1].history.get('val_loss')

        auc = history[1].history.get('auc')
        val_auc = history[1].history.get('val_auc')

        recall = history[1].history.get('recall')
        val_recall = history[1].history.get('val_recall')

        epochs = np.arange(1, len(history[1].history.get('loss')) + 1)

        fig.add_trace(go.Scatter(x=epochs, y=loss, name=history[0] + " train"), row=1, col=1)
        fig.add_trace(go.Scatter(x=epochs, y=val_loss, name=history[0] + " val"), row=1, col=1)
        fig.add_trace(go.Scatter(x=epochs, y=recall, name=history[0] + " train"), row=2, col=1)
        fig.add_trace(go.Scatter(x=epochs, y=val_recall, name=history[0] + " val"), row=2, col=1)
        fig.add_trace(go.Scatter(x=epochs, y=auc, name=history[0] + " train"), row=3, col=1)
        fig.add_trace(go.Scatter(x=epochs, y=val_auc, name=history[0] + "val"), row=3, col=1)

    # Update xaxis properties
    fig.update_xaxes(title_text="Epochs", row=1, col=1)
    fig.update_xaxes(title_text="Epochs", row=2, col=1)
    fig.update_xaxes(title_text="Epochs", row=3, col=1)

    # Update yaxis properties
    fig.update_yaxes(title_text="Loss", row=1, col=1)
    fig.update_yaxes(title_text="Recall", row=2, col=1)
    fig.update_yaxes(title_text="AUC", row=3, col=1)

    fig.update_layout(title_text="Learning Curves", height=1500)

    fig.show()


def plot_roc_curve(*datas):
    from sklearn.metrics import roc_curve
    fig = go.Figure()
    for data in datas:
        name = data[0]
        y_true = data[1]
        pred = data[2]

        fpr, tpr, _ = roc_curve(y_true, pred)
        fig.add_trace(go.Scatter(x=fpr, y=tpr, name=name))

    fig.update_yaxes(title_text="TPR")
    fig.update_xaxes(title_text="FPR")
    fig.update_layout(title_text="ROC Curve")
    fig.show()

In [65]:
plot_learning_curves(['train',history])

In [69]:
pred = model.predict(test_img)
y_true = [0 if x == 'Normal' else 1 for x in final_test_df['Label']]

plot_roc_curve(['train', y_true, pred])

In [70]:
model.save('covid.h5')