## Importing Libraries 

In [4]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
import cv2 
from matplotlib import pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, Dropout, BatchNormalization, Activation, Input
from keras.models import Sequential, Model
import seaborn as sns
from keras import regularizers
from keras.optimizers import RMSprop

## Loading Data 

In [5]:
res = 216
def load_data(directory):
    images = []
    labels = []
    for folder in tqdm(os.listdir(directory)):
        label = folder
        data_path = os.path.join(directory, folder)
        for filename in tqdm(os.listdir(data_path), position = 0, leave = True, desc = f"Loading images from {directory.split('/')[5]} folder"):
            img = cv2.imread(os.path.join(data_path, filename))
            img = cv2.resize(img, (res, res))
            img = np.array(img)
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

In [3]:
x_train, y_train = load_data('./train/')
x_test, y_test = load_data('./test/')


  0%|                                                                                            | 0/3 [00:00<?, ?it/s]


NotADirectoryError: [WinError 267] The directory name is invalid: './train/.DS_Store'

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, shuffle = True, stratify = y_train)

In [None]:
x_train.shape, y_train.shape

In [None]:
x_test.shape, y_test.shape

In [None]:
x_val.shape, y_val.shape

<div style="background-color: #D1E6F9; padding: 20px;">
    <h2 style="color: #0056b3;"> Visualizing the Data 📊 </h2>

In [None]:
def visualize_data(data, labels):
    normal_indices = np.where(labels == 'NORMAL')[0][:5]
    pneumonia_indices = np.where(labels == 'PNEUMONIA')[0][:5]
    fig, axs = plt.subplots(2, 5, figsize=(12, 6))
    fig.suptitle('Examples of Pneumonia and Normal Images')
    for i, idx in enumerate(normal_indices):
        ax = axs[0, i]
        ax.imshow(data[idx])
        ax.set_title('Normal')
        ax.axis('off')

    for i, idx in enumerate(pneumonia_indices):
        ax = axs[1, i]
        ax.imshow(data[idx])
        ax.set_title('Pneumonia')
        ax.axis('off')

    plt.tight_layout()

    plt.show()

In [None]:
visualize_data(x_train, y_train)

In [None]:
train_counts = np.unique(y_train, return_counts=True)
test_counts = np.unique(y_test, return_counts=True)
val_counts = np.unique(y_val, return_counts=True)

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(5, 9))
sns.barplot(ax = axs[0], x=train_counts[0], y=train_counts[1], color='blue', alpha=0.8, label='Training')
axs[0].set_xlabel('Classes')
axs[0].set_ylabel('Counting')
axs[0].set_title('Training')
sns.barplot(ax = axs[1], x=test_counts[0], y=test_counts[1], color='red', alpha=0.8, label='Testing')
axs[1].set_xlabel('Classes')
axs[1].set_ylabel('Counting')
axs[1].set_title('Training')
sns.barplot(ax = axs[2], x=val_counts[0], y=val_counts[1], color='green', alpha=0.8, label='Validating')
axs[2].set_xlabel('Classes')
axs[2].set_ylabel('Counting')
axs[2].set_title('Validatin')
plt.title('Class Distribution')
plt.tight_layout()
plt.show()

<div style="background-color: #D1E6F9; padding: 20px;">
    <h3 style="color: #0056b3;"> Transforming the categorical data to numerical data </h3>

In [None]:
y_train = np.where(y_train == 'NORMAL', 0, 1)
y_test = np.where(y_test == 'NORMAL', 0, 1)
y_val = np.where(y_val == 'NORMAL', 0, 1)

<div style="background-color: #D1E6F9; padding: 20px;">
    <h2 style="color: #0056b3;"> Data Augmentation 📈 </h2>

In [None]:
train_gen = ImageDataGenerator(featurewise_center=False,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=[0.8, 1.2],
    shear_range=0.0,
    zoom_range=0.2,
    horizontal_flip=True,
    rescale=1/255,
    data_format=None,
    dtype=None,
)


val_gen = ImageDataGenerator(rescale = 1/255.)

test_gen = ImageDataGenerator(rescale = 1/255.)

train_gen.fit(x_train)
val_gen.fit(x_val)
test_gen.fit(x_test)

train_set = train_gen.flow(x_train, y_train)
val_set = val_gen.flow(x_val, y_val)
test_set = test_gen.flow(x_test, y_test)

<div style="background-color: #D1E6F9; padding: 20px;">
    <h2 style="color: #0056b3;"> Modeling 🤖 </h2>

In [None]:
def build_cnn():
    l1l2 = regularizers.L1L2(l1=1e-4, l2=1e-3)
    classifier = Sequential()
    classifier.add(Conv2D(64, (3, 3), input_shape = (res, res, 3), activation = 'relu', kernel_regularizer = l1l2, padding = 'valid'))
    classifier.add(MaxPooling2D((2, 2)))
    classifier.add(BatchNormalization())
    
    classifier.add(Conv2D(32, (3, 3), activation = 'relu', kernel_regularizer = l1l2, padding = 'valid'))
    classifier.add(MaxPooling2D((2, 2)))
    classifier.add(BatchNormalization())
    
    classifier.add(Conv2D(16, (3, 3), activation = 'relu', kernel_regularizer = l1l2, padding = 'valid'))
    classifier.add(MaxPooling2D((2, 2)))
    classifier.add(BatchNormalization())
    
    classifier.add(Flatten())
    classifier.add(Dense(units = 128, activation = 'relu'))
    classifier.add(Dropout(0.4))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    classifier.summary()
    return classifier

In [None]:
classifier = build_cnn()
rmsprop = RMSprop(learning_rate = 0.0001)
classifier.compile(optimizer = rmsprop, loss = 'binary_crossentropy', metrics = ['binary_accuracy'])

<div style="background-color: #D1E6F9; padding: 20px;">
    <p style="color: #0056b3;"> Setting GPU to train the model </p>

In [None]:
tf.test.is_gpu_available()

<div style="background-color: #D1E6F9; padding: 20px;">
    <h3 style="color: #0056b3;"> Training the model  </h3>

In [None]:
earlystopping = EarlyStopping(monitor = 'val_loss', patience = 4, restore_best_weights = True)
lr_reduc = ReduceLROnPlateau(
    monitor='val_loss',
    factor = 0.2,                                     
    patience = 2,                                   
    min_delt = 1e-7,                                
    cooldown = 0,                               
    verbose = 1) 

In [None]:
history = classifier.fit(train_set,
                         validation_data = val_set,
                         verbose = 1, epochs = 15,
                         callbacks = [earlystopping, lr_reduc], 
                         batch_size = 32,
                         steps_per_epoch = len(x_train)/32,
                         validation_steps = len(x_val)/32)

<div style="background-color: #D1E6F9; padding: 20px;">
    <h2 style="color: #0056b3;">Model evaluation 🔎 </h2>

In [None]:
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['binary_accuracy'], label='Training accuracy')
plt.plot(history.history['val_binary_accuracy'], label='Validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.ylim([0.3, 1])
plt.show()

In [None]:
test_loss, test_accuracy = classifier.evaluate(test_set)

In [None]:
y_pred = classifier.predict(test_set)
y_pred = (y_pred > 0.5)

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.colorbar()
plt.xlabel("Previsão")
plt.ylabel("Valor Real")
plt.xticks([0, 1])
plt.yticks([0, 1])
plt.show()

In [None]:
print(classification_report(y_test, y_pred))