# Pneumonia Classification Improved

This notebook implements an improved version of the Pneumonia classification using both a VGG16 pretrained model and a custom CNN model.

In [None]:
# Importing necessary libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix
import cv2
import random

In [None]:
# Setting the base directory
base_dir = '../input/chest-xray-pneumonia/chest_xray/'

train_pneumonia_dir = os.path.join(base_dir, 'train/PNEUMONIA/')
train_normal_dir = os.path.join(base_dir, 'train/NORMAL/')
test_pneumonia_dir = os.path.join(base_dir, 'test/PNEUMONIA/')
test_normal_dir = os.path.join(base_dir, 'test/NORMAL/')
val_pneumonia_dir = os.path.join(base_dir, 'val/PNEUMONIA/')
val_normal_dir = os.path.join(base_dir, 'val/NORMAL/')

In [None]:
# Function to preprocess images
def preprocess_image(image_list, img_size=224):
    X = []
    y = []
    for image in image_list:
        try:
            img = cv2.imread(image)
            img = cv2.resize(img, (img_size, img_size))
            img = img.astype(np.float32) / 255.0
            X.append(img)
            if 'NORMAL' in image:
                y.append(0)
            else:
                y.append(1)
        except Exception as e:
            print(f"Error processing image {image}: {e}")
            continue
    return np.array(X), np.array(y)

In [None]:
# Splitting and preprocessing the dataset
train_pn = [os.path.join(train_pneumonia_dir, f) for f in os.listdir(train_pneumonia_dir)]
train_normal = [os.path.join(train_normal_dir, f) for f in os.listdir(train_normal_dir)]
test_pn = [os.path.join(test_pneumonia_dir, f) for f in os.listdir(test_pneumonia_dir)]
test_normal = [os.path.join(test_normal_dir, f) for f in os.listdir(test_normal_dir)]
val_pn = [os.path.join(val_pneumonia_dir, f) for f in os.listdir(val_pneumonia_dir)]
val_normal = [os.path.join(val_normal_dir, f) for f in os.listdir(val_normal_dir)]

train_imgs = train_pn + train_normal
test_imgs = test_pn + test_normal
val_imgs = val_pn + val_normal

random.shuffle(train_imgs)
random.shuffle(test_imgs)
random.shuffle(val_imgs)

X_learn, y_learn = preprocess_image(train_imgs)
X_test, y_test = preprocess_image(test_imgs)
X_validation, y_validation = preprocess_image(val_imgs)

In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [None]:
# Model building - VGG16 pretrained model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

model_vgg16 = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model_vgg16.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Callbacks for early stopping and model checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_vgg16_model.h5', save_best_only=True)

In [None]:
# Training the VGG16 model
history_vgg16 = model_vgg16.fit(
    datagen.flow(X_learn, y_learn, batch_size=32),
    epochs=10,
    validation_data=(X_validation, y_validation),
    callbacks=[early_stopping, model_checkpoint]
)

In [None]:
# Evaluate the VGG16 model
vgg16_results = model_vgg16.evaluate(X_test, y_test)
print(f"VGG16 Model - Test Loss: {vgg16_results[0]}, Test Accuracy: {vgg16_results[1]}")

In [None]:
# Model building - Conventional CNN model
model_cnn = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Training the conventional CNN model
history_cnn = model_cnn.fit(
    datagen.flow(X_learn, y_learn, batch_size=32),
    epochs=10,
    validation_data=(X_validation, y_validation),
    callbacks=[early_stopping, model_checkpoint]
)

In [None]:
# Evaluate the conventional CNN model
cnn_results = model_cnn.evaluate(X_test, y_test)
print(f"CNN Model - Test Loss: {cnn_results[0]}, Test Accuracy: {cnn_results[1]}")

In [None]:
# Predictions and classification report
pred_vgg16 = (model_vgg16.predict(X_test) > 0.5).astype(int)
pred_cnn = (model_cnn.predict(X_test) > 0.5).astype(int)

print("VGG16 Model Classification Report")
print(classification_report(y_test, pred_vgg16))

print("Conventional CNN Model Classification Report")
print(classification_report(y_test, pred_cnn))

In [None]:
# Confusion matrix for VGG16
cm_vgg16 = confusion_matrix(y_test, pred_vgg16)
plt.figure(figsize=(8, 8))
plt.title("VGG16 Confusion Matrix")
sns.heatmap(cm_vgg16, annot=True, fmt='d', cmap='Blues')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
# Confusion matrix for CNN
cm_cnn = confusion_matrix(y_test, pred_cnn)
plt.figure(figsize=(8, 8))
plt.title("CNN Confusion Matrix")
sns.heatmap(cm_cnn, annot=True, fmt='d', cmap='Blues')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
# Save the models
model_vgg16.save('vgg16_model.h5')
model_cnn.save('cnn_model.h5')

print("Models saved successfully.")