In [1]:
#import libraries
import os
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model, datasets,metrics
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.layers import Dense, Dropout, Flatten,BatchNormalization, Conv2D, MaxPooling2D

In [2]:
# Set the path to the data directory
data_address = os.path.join(os.getcwd(),'dlfinal')
image_address = os.path.join(data_address, 'Coronahack-Chest-XRay-Dataset', 'Coronahack-Chest-XRay-Dataset')
training_label = pd.read_csv(os.path.join(data_address, 'training_label.csv'))
testing_label = pd.read_csv(os.path.join(data_address, 'testing_label.csv'))
# Get the image paths
train_images_normal = [os.path.join(image_address, 'train', filename) for filename in training_label[training_label.Label == 'Normal'].X_ray_image_name.tolist()]
train_images_sick = [os.path.join(image_address, 'train', filename) for filename in training_label[training_label.Label == 'Pnemonia'].X_ray_image_name.tolist()]
test_images_normal = [os.path.join(image_address, 'test', filename) for filename in testing_label[testing_label.Label == 'Normal'].X_ray_image_name.tolist()]
test_images_sick = [os.path.join(image_address, 'test', filename) for filename in testing_label[testing_label.Label == 'Pnemonia'].X_ray_image_name.tolist()]


In [3]:
# modeling parameters
train_arrays = []
size = (256, 256)
batch_size = 32
datagen = ImageDataGenerator(
    rotation_range=20,        # Degree range for random rotations
    width_shift_range=0.2,    # Range for horizontal shift
    height_shift_range=0.2,   # Range for vertical shift
    shear_range=0.2,          # Shear intensity range
    zoom_range=0.2,           # Range for random zoom
    horizontal_flip=True,     # Randomly flip images horizontally
    vertical_flip=False       # Do not flip images vertically
)

In [4]:
# Load the training data
training_label.dropna()
for x in training_label['X_ray_image_name']:
    try:
        img_path = os.path.join(image_address, 'train', x)
        img = image.load_img(img_path, target_size=size)
        img_array = image.img_to_array(img)
        train_arrays.append(img_array)
    except Exception as e:
        print(f"Error loading image: {img_path}")
        # Handle the exception or skip the image if necessary

In [5]:
# Convert train_arrays and label to NumPy arrays
train_images = np.array(train_arrays)
label_mapping = {"Pnemonia": 1, "Normal": 0}
label = training_label['Label'].map(label_mapping)
train_images = train_images.astype('float32') / 255.0


In [6]:
# Split the data into training and validation sets
augmented_images = datagen.flow(train_images, label, batch_size=batch_size)
augmented_train_images = []
augmented_labels = []

for images, labels in augmented_images:
    augmented_train_images.append(images)
    augmented_labels.append(labels)
    if len(augmented_train_images) >= (len(train_images) / batch_size):
        break

augmented_train_images = np.concatenate(augmented_train_images)
augmented_labels = np.concatenate(augmented_labels)

In [7]:
labels.shape

(6,)

In [8]:
train_images.shape

(5286, 256, 256, 3)

In [9]:
augmented_train_images.shape

(5286, 256, 256, 3)

In [10]:
# Split the data into training and validation sets
train_images = augmented_train_images
labels = augmented_labels
del(augmented_train_images)
del(augmented_labels)

train_images, test_images, train_labels, test_labels = train_test_split(train_images, labels, test_size=0.2, random_state=42)
final_train_images, val_images, final_train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)



In [11]:
final_train_images.shape

(3382, 256, 256, 3)

In [12]:
final_train_labels.shape

(3382,)

In [13]:
val_images.shape

(846, 256, 256, 3)

In [14]:
val_labels.shape

(846,)

In [15]:
input_shape = (256,256,3)

In [18]:
#Using Keras Sequential API
model = Sequential()
model.add(Conv2D(32, kernel_size=(5, 5),
                 activation='relu',
                 input_shape=input_shape))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='softmax'))

In [19]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
my_callbacks = [
    EarlyStopping(patience=3),
    ModelCheckpoint(filepath='model.{epoch:02d}-{val_loss:.2f}.h5', 
                    save_best_only=True,
                    mode='max',
                    monitor='val_accuracy'),
    TensorBoard(log_dir='logs'),
]

In [20]:
history = model.fit(final_train_images, final_train_labels,
          batch_size=batch_size,
          epochs=20,
          verbose=1,
          validation_data=(val_images, val_labels),
          callbacks=my_callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


In [23]:
model.evaluate(test_images,test_labels)



[0.0, 0.7381852269172668]

In [32]:
del train_images

In [33]:

prediction = model.predict(test_images)



In [34]:
from sklearn.metrics import f1_score, recall_score, precision_score
f1_score(prediction,test_labels)

0.8493746601413812

In [35]:
recall_score(prediction,test_labels)

0.7381852551984878

In [36]:
precision_score(prediction,test_labels)

1.0