## 1 Library Setup 

In [None]:
import keras
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils.class_weight import compute_class_weight 
from keras.models import Sequential
from keras.layers import  Dense, Dropout, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from keras import regularizers
from keras.callbacks import ReduceLROnPlateau

from tensorflow.keras.applications import InceptionV3

In [None]:
print(tf.__version__)
print(keras.__version__)

2.12.0
2.12.0


## 1.2. Parameter Setup for Model Training

In [3]:
BATCH_SIZE = 48
image_height = 299
image_width = 299

In [4]:
# Data agumentation and pre-processing using tensorflow
data_generator_1 = ImageDataGenerator(
                            rescale=1./255,
                            rotation_range=5,
                            width_shift_range=0.05,
                            height_shift_range=0.05,
                            shear_range=0.05,
                            zoom_range=0.05,
                            brightness_range = [0.95,1.05],
                            horizontal_flip=False,
                            vertical_flip=False,
                            fill_mode='nearest'                                   
                        )

data_generator_2 = ImageDataGenerator (rescale=1./255)
data_generator_3 = ImageDataGenerator (rescale=1./255)

In [None]:
train_generator = data_generator_1.flow_from_directory(
    directory = "/kaggle/input/chest-xray-pneumonia/chest_xray/train",
    color_mode = "rgb",
    target_size = (image_height, image_width), 
    class_mode = "categorical",
    batch_size = BATCH_SIZE,
    shuffle = True,
    seed = 42)

val_generator=data_generator_2.flow_from_directory(
    directory = "/kaggle/input/chest-xray-pneumonia/chest_xray/val",
    color_mode = "rgb",
    target_size = (image_height, image_width),
    class_mode = "categorical",
    batch_size = BATCH_SIZE,
    shuffle = True,
    seed = 42
)
test_generator = data_generator_3.flow_from_directory(
    directory = "/kaggle/input/chest-xray-pneumonia/chest_xray/test", 
    color_mode = "rgb",
    target_size = (image_height, image_width),
    class_mode = "categorical",
    batch_size = BATCH_SIZE,
    shuffle = True,
    seed = 42)


Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


# 3. EDA

In [6]:
dict_class = train_generator.class_indices
print('Dictionary: {}'.format(dict_class))
class_names = list(dict_class.keys())  # storing class/breed names in a list
print('Class labels: {}'.format(class_names))

Dictionary: {'NORMAL': 0, 'PNEUMONIA': 1}
Class labels: ['NORMAL', 'PNEUMONIA']


In [None]:
frequency = np.unique(train_generator.classes, return_counts=True)

plt.title("Trainning dataset", fontsize='16')
plt.pie(frequency[1], labels = class_names, autopct='%1.0f%%');

In [8]:
# Dataset characteristics
print("Dataset Characteristics of Train Data Set:")
print("Number of images:", len(train_generator.classes))
print("Number of normal images:", len([label for label in train_generator.classes if label == 0]))
print("Number of pneumonia images:", len([label for label in train_generator.classes if label == 1]))
print()
print("Dataset Characteristics of Validation Data Set:")
print("Number of images:", len(val_generator.classes))
print("Number of normal images:", len([label for label in val_generator.classes if label == 0]))
print("Number of pneumonia images:", len([label for label in val_generator.classes if label == 1]))
print()
print("Dataset Characteristics of Test Data Set:")
print("Number of images:", len(test_generator.classes))
print("Number of normal images:", len([label for label in test_generator.classes if label == 0]))
print("Number of pneumonia images:", len([label for label in test_generator.classes if label == 1]))
print()


Dataset Characteristics of Train Data Set:
Number of images: 5216
Number of normal images: 1341
Number of pneumonia images: 3875

Dataset Characteristics of Validation Data Set:
Number of images: 16
Number of normal images: 8
Number of pneumonia images: 8

Dataset Characteristics of Test Data Set:
Number of images: 624
Number of normal images: 234
Number of pneumonia images: 390



## 3.2 Class Weights

<div style=" background-color:#fce4f6;text-align:left; padding: 13px 13px; border-radius: 8px; color: black; font-size: 16px">
    
**Why Use Class weight?**
During training, the model minimizes a loss function by adjusting its parameters. Without class weights:

* The loss from the majority class dominates, causing the model to prioritize correct predictions for that class.
* Minority class samples are often misclassified because their contribution to the overall loss is minimal.

By introducing **class weights** , we explicitly adjust the loss function so that:

* The minority class contributes more to the total loss.
* The majority class contributes less.

This forces the model to pay more attention to the minority class and reduces the bias caused by class imbalance.


In [None]:
class_weights = compute_class_weight(class_weight = "balanced", classes= np.unique(train_generator.classes), y= train_generator.classes)
class_weights = dict(zip(np.unique(train_generator.classes), class_weights))
class_weights

{0: 1.9448173005219984, 1: 0.6730322580645162}

## 3.3 Image Samples

In [None]:
img, label = next(train_generator)


plt.figure(figsize=[10, 5])
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(img[i])
    plt.axis('off')
    plt.title(class_names[np.argmax(label[i])])    
plt.show()
     

## 4.1. What is Transfer Learning?
<div style=" background-color:#fce4f6;text-align:left; padding: 13px 13px; border-radius: 8px; color: black; font-size: 16px">
In this project, transfer learning was employed using  powerful model InceptionV3  pre-trained on the ImageNet dataset. Transfer learning leverages the knowledge embedded in these models, enabling them to act as feature extractors for the task of classifying chest X-rays for pneumonia detection.
<br><br>

**Why InceptionV3?**
* **InceptionV3:** Known for its efficiency and capability to capture spatial hierarchies, this model utilizes factorized convolutions and auxiliary classifiers to enhance feature learning and prevent overfitting.

<div style=" background-color:#fce4f6;text-align:left; padding: 13px 13px; border-radius: 8px; color: black; font-size: 16px">

**Transfert Learning workload:**

1. **Loading the Pre-trained Models:**
   * Both models were loaded without their top classification layers   (`include_top=False`), making them adaptable to our binary classification task.
   * The models leveraged their pre-trained weights on ImageNet to extract general features from chest X-ray images.

2. **Freezing the Base Layers:**
   * Initially, the pre-trained layers were frozen to retain their learned weights and prevent overfitting during training.
   * This allowed the models to act as fixed feature extractors, capturing high-level patterns from the input images.

3. **Adding Custom Classification Layers:**
   * A series of custom layers were added to both models:
     * Global Average Pooling to reduce the spatial dimensions of the feature maps while retaining key information.
     * Fully connected layers with ReLU activations and dropout regularization to adapt the extracted features to the binary classification task.
     * A final softmax layer to output probabilities for the two classes: Pneumonia and No Pneumonia.

In [None]:
# Define the epochs for training 
EPOCHS = 10

# Define early stopping criteria
early_stopping = EarlyStopping(monitor='val_accuracy', #Monitor validation accuracy
                               patience=2, #wait 2 epochs before stopping
                               verbose=1,
                               restore_best_weights=True #Revert to the best parameters after stopping
                              )

#The ReduceLROnPlateau callback reduces the learning rate when the validation accuracy stops improving.
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy',# Monitor validation accuracy
                              factor=0.001,# Multiply the learning rate by 0.001
                              patience=10,# Wait 10 epochs without improvement
                              verbose=1)


train_data = train_generator

## 4.2. InceptionV3 

In [None]:
# Load the pre-trained InceptionV3 model without the top classification layer
base_model_Inception = InceptionV3(weights='imagenet', include_top=False, input_shape=(image_height, image_width, 3))

# Set the layers of the base model as non-trainable (freeze them)
for layer in base_model_Inception.layers:
        layer.trainable = False  
        
# Create a new model and add the InceptionV3 base model
model_Inception = Sequential()
model_Inception.add(base_model_Inception)

 # Add a global average pooling layer and output layer for classification
model_Inception.add(GlobalAveragePooling2D())
model_Inception.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model_Inception.add(Dropout(0.4))
model_Inception.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model_Inception.add(Dropout(0.2))
        
model_Inception.add(Dense(2, activation='softmax'))

# Compile the model
model_Inception.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])

# Train the model with EarlyStopping
history_Inception = model_Inception.fit(train_data, epochs=EPOCHS, validation_data=test_generator, callbacks=[early_stopping], class_weight=class_weights)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 5: early stopping


In [21]:
test_loss_Inception, test_accuracy_Inception = model_Inception.evaluate(test_generator, steps=len(test_generator))
print(f'Validation Loss: {test_loss_Inception:.4f}')
print(f'Validation Accuracy: {test_accuracy_Inception:.4f}')

Validation Loss: 0.4491
Validation Accuracy: 0.8782


In [32]:
model_Inception.save('pneumonia-model.keras')

In [1]:
import tensorflow as tf
Loaded_model = tf.keras.models.load_model('pneumonia-model.keras')

In [2]:
import tensorflow as tf
from tensorflow.keras.utils import img_to_array, load_img
import numpy as np

def preprocess_image(image_path, target_size=(299, 299)):
    # Load the image 
    img = load_img(image_path, target_size=target_size)  # Resize the image
    img_array = img_to_array(img)  # Convert to NumPy array
    img_array = img_array / 255.0  # Rescale pixel values
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array


In [3]:
dic = {'NORMAL': 0, 'PNEUMONIA': 1}
target = {(v, k) for k, v in dic.items()}
target

{(0, 'NORMAL'), (1, 'PNEUMONIA')}

In [None]:
import os
# Path to your image

def preprocess_image(image_path, target_size=(299, 299)):
    # Load the image 
    img = load_img(image_path, target_size=target_size)  # Resize the image
    img_array = img_to_array(img)  # Convert to NumPy array
    img_array = img_array / 255.0  # Rescale pixel values
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array
image_path = os.path.join('./chest_xray/train/NORMAL/IM-0115-0001.jpeg')

# Preprocess the image
preprocessed_image = preprocess_image(image_path, target_size=(299, 299))

# Make predictions
predictions = Loaded_model.predict(preprocessed_image)

# Decode the predictions
predicted_class = np.argmax(predictions, axis=1)  # Get the class with the highest probability
predicted_probability = np.max(predictions)  # Get the probability of the predicted class

print(f"Predicted Class: {predicted_class[0]}")
print(f"Predicted Probability: {predicted_probability:.2f}")


Predicted Class: 0
Predicted Probability: 0.99
