In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import numpy as np
import cv2
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from imblearn.over_sampling import SMOTE
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt


In [None]:
# Define dataset paths
base_dir = '/content/extracted_files/chest_xray_VGG16_SMOTE'
train_dir = os.path.join(base_dir, '/content/extracted_files/chest_xray_VGG16_SMOTE/train')
test_dir = os.path.join(base_dir, '/content/extracted_files/chest_xray_VGG16_SMOTE/test')


In [None]:
# Image generator for data augmentation (but no augmentation for now)
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)


In [None]:
# Flow training images from the directory
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),  # Resize the images to save memory (e.g., 128x128 instead of 224x224)
    batch_size=32,
    class_mode='binary',  # Binary classification (Normal vs Pneumonia)
    shuffle=True
)


Found 5232 images belonging to 2 classes.


In [None]:
# Flow testing images from the directory
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),  # Resize the images to save memory
    batch_size=32,
    class_mode='binary',  # Binary classification (Normal vs Pneumonia)
    shuffle=False
)

Found 624 images belonging to 2 classes.


In [None]:
# Initialize empty lists to store images and labels
X_train, y_train = [], []


In [None]:
# Iterate over the train generator to collect batches of images and labels
for i in range(len(train_generator)):
    # Get a batch of data
    img_batch, label_batch = train_generator[i]

    # Append the batch of images and labels to the respective lists
    X_train.append(img_batch)
    y_train.append(label_batch)

# Convert lists to numpy arrays
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)

# Check the shapes of the resulting arrays
print(X_train.shape, y_train.shape)

(5232, 128, 128, 3) (5232,)


In [None]:
# Flatten images for SMOTE
X_train_flattened = X_train.reshape(X_train.shape[0], -1)


In [None]:
# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_flattened, y_train)




In [None]:
# Reshape the images back to their original shape
X_train_resampled = X_train_resampled.reshape(-1, 128, 128, 3)


In [None]:
# Verify the shape of the resampled data
print(X_train_resampled.shape, y_train_resampled.shape)

(7766, 128, 128, 3) (7766,)


In [None]:
# Define the VGG16 model with custom layers
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))


In [None]:
# Freeze the convolutional base
for layer in vgg_base.layers:
    layer.trainable = False


In [None]:
# Build the model
model = Sequential([
    vgg_base,
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification (Normal vs Pneumonia)
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# Train the model
history = model.fit(
    X_train_resampled, y_train_resampled,
    validation_split=0.2,  # Use 20% of the data for validation
    epochs=10,  # Adjust as needed
    batch_size=32,
    verbose=1
)

Epoch 1/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1738s[0m 9s/step - accuracy: 0.8509 - loss: 0.3551 - val_accuracy: 0.9897 - val_loss: 0.0450
Epoch 2/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1743s[0m 9s/step - accuracy: 0.9615 - loss: 0.1040 - val_accuracy: 0.9878 - val_loss: 0.0523
Epoch 3/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1748s[0m 9s/step - accuracy: 0.9678 - loss: 0.0931 - val_accuracy: 0.9981 - val_loss: 0.0120
Epoch 4/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1710s[0m 9s/step - accuracy: 0.9750 - loss: 0.0778 - val_accuracy: 0.9865 - val_loss: 0.0545
Epoch 5/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1710s[0m 9s/step - accuracy: 0.9705 - loss: 0.0677 - val_accuracy: 0.9762 - val_loss: 0.0803
Epoch 6/10
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1695s[0m 8s/step - accuracy: 0.9728 - loss: 0.0732 - val_accuracy: 0.9891 - val_loss: 0.0428
Epoch 7/10
[1m1

In [None]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_generator, verbose=1)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


  self._warn_if_super_not_called()


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 7s/step - accuracy: 0.6190 - loss: 1.8258
Test Accuracy: 78.04%


In [None]:
# Predict on test data
predictions = (model.predict(test_generator) > 0.5).astype("int32")


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 7s/step


In [None]:
# Confusion Matrix and Classification Report
from sklearn.metrics import confusion_matrix, classification_report
print("Confusion Matrix")
print(confusion_matrix(test_generator.classes, predictions))


Confusion Matrix
[[102 132]
 [  5 385]]


In [None]:
print("Classification Report")
print(classification_report(test_generator.classes, predictions, target_names=['NORMAL', 'PNEUMONIA']))


Classification Report
              precision    recall  f1-score   support

      NORMAL       0.95      0.44      0.60       234
   PNEUMONIA       0.74      0.99      0.85       390

    accuracy                           0.78       624
   macro avg       0.85      0.71      0.72       624
weighted avg       0.82      0.78      0.75       624

