In [None]:

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

import tensorflow as tf
from tensorflow.keras import layers, metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.applications import Xception
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# import tensorflow as tf
# from tensorflow.keras import layers
# import numpy as np
# import pandas as pd
# import os
from tqdm import tqdm
from tensorflow.keras.preprocessing import image

In [None]:
# import os
# import numpy as np
# import pandas as pd
# import tensorflow as tf
# from tensorflow.keras.preprocessing import image
# from tensorflow.keras.applications import Xception
from tensorflow.keras import Sequential, layers, metrics
# from tensorflow.keras.optimizers import Adamax
# from tensorflow.keras.callbacks import EarlyStopping
# from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# import matplotlib.pyplot as plt
# import seaborn as sns

In [None]:
# import os
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
# from sklearn.model_selection import train_test_split
# from tqdm import tqdm
import random

In [None]:
import warnings 
warnings.filterwarnings("ignore")

# Loading Dataset

In [None]:
# ===========================
# Step 1: Load Original Dataset
# ===========================
DATA_DIR = '/kaggle/input/chest-xray-pneumonia/chest_xray'
folders = ['train', 'val', 'test']

filepaths = []
labels = []

for folder in folders:
    folder_path = os.path.join(DATA_DIR, folder)
    for category in ['NORMAL', 'PNEUMONIA']:
        cat_path = os.path.join(folder_path, category)
        for img in os.listdir(cat_path):
            filepaths.append(os.path.join(cat_path, img))
            labels.append(category)

df = pd.DataFrame({'filepath': filepaths, 'label': labels})
print("Original dataset size:", len(df))
print(df['label'].value_counts())

# Visualizing origianl class distribution

In [None]:
# ===========================
# Step 2: Visualize Original Class Distribution
# ===========================
plt.figure(figsize=(6,4))
sns.countplot(x='label', data=df, palette='viridis')
plt.title("Original Class Distribution")
plt.show()

# Augementing only the 'NORMAL' Class images

In [None]:

# ===========================
# Step 3: Augment Only NORMAL Class
# ===========================
normal_df = df[df['label'] == 'NORMAL']
pneumonia_df = df[df['label'] == 'PNEUMONIA']

target_size = 7000
current_size = len(df)
images_needed = target_size - current_size
print(f"Current total: {current_size}, Need to add: {images_needed}")

# Number of new images to generate from NORMAL class
normal_to_add = images_needed

datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

augmented_images = []
augmented_labels = []

normal_images = normal_df['filepath'].values
random.shuffle(normal_images)

for img_path in tqdm(normal_images[:normal_to_add], desc="Augmenting NORMAL class"):
    try:
        img = load_img(img_path, target_size=(256, 256))
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)
        i = 0
        for batch in datagen.flow(x, batch_size=1):
            augmented_images.append(batch[0].astype(np.uint8))
            augmented_labels.append('NORMAL')
            i += 1
            break  # generate only one image per original
    except Exception as e:
        print(f"Error augmenting {img_path}: {e}")

# Convert augmented data to arrays
augmented_images = np.array(augmented_images)
augmented_labels = np.array(augmented_labels)
print("Augmented new NORMAL images:", len(augmented_labels))

In [None]:

# ===========================
# Step 3: Augment Only NORMAL Class
# ===========================
normal_df = df[df['label'] == 'NORMAL']
pneumonia_df = df[df['label'] == 'PNEUMONIA']

target_size = 7000
current_size = len(df)
images_needed = target_size - current_size
print(f"Current total: {current_size}, Need to add: {images_needed}")

# Number of new images to generate from NORMAL class
normal_to_add = images_needed

datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

augmented_images = []
augmented_labels = []

normal_images = normal_df['filepath'].values
random.shuffle(normal_images)

for img_path in tqdm(normal_images[:normal_to_add], desc="Augmenting NORMAL class"):
    try:
        img = load_img(img_path, target_size=(256, 256))
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)
        i = 0
        for batch in datagen.flow(x, batch_size=1):
            augmented_images.append(batch[0].astype(np.uint8))
            augmented_labels.append('NORMAL')
            i += 1
            break  # generate only one image per original
    except Exception as e:
        print(f"Error augmenting {img_path}: {e}")

# Convert augmented data to arrays
augmented_images = np.array(augmented_images)
augmented_labels = np.array(augmented_labels)
print("Augmented new NORMAL images:", len(augmented_labels))

# Combining orignal + augumented dataset for training

In [None]:
# ===========================
# Step 4: Combine Original + Augmented Data
# ===========================
# Load original images into arrays
all_images = []
all_labels = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Loading Original Images"):
    img = load_img(row['filepath'], target_size=(256, 256))
    img = img_to_array(img)
    all_images.append(img)
    all_labels.append(row['label'])

all_images = np.array(all_images)
all_labels = np.array(all_labels)

# Combine both
final_images = np.concatenate((all_images, augmented_images), axis=0)
final_labels = np.concatenate((all_labels, augmented_labels), axis=0)

print(f"\nFinal Dataset Size after augmentation: {len(final_labels)}")

# Visualizing new class distribution

In [None]:
# ===========================
# Step 5: Visualize New Class Distribution
# ===========================
plt.figure(figsize=(6,4))
sns.countplot(x=final_labels, palette='coolwarm')
plt.title("Class Distribution After Augmentation")
plt.show()

# Train Test Split (70,15,15)

In [None]:
# ===========================
# Step 6: Train/Val/Test Split (70/15/15)
# ===========================
X_train, X_temp, y_train, y_temp = train_test_split(
    final_images, final_labels, test_size=0.3, stratify=final_labels, random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

print("Train:", X_train.shape, len(y_train))
print("Validation:", X_val.shape, len(y_val))
print("Test:", X_test.shape, len(y_test))

In [None]:
# ===========================
# Step 7: Verify New Distribution
# ===========================
fig, axs = plt.subplots(1, 3, figsize=(15, 5))

sns.countplot(x=y_train, ax=axs[0], palette='viridis')
axs[0].set_title("Train Distribution")

sns.countplot(x=y_val, ax=axs[1], palette='coolwarm')
axs[1].set_title("Validation Distribution")

sns.countplot(x=y_test, ax=axs[2], palette='magma')
axs[2].set_title("Test Distribution")

plt.tight_layout()
plt.show()

# Normalisation

In [None]:
# ===========================
# Step 8: Normalize Pixel Values (0â€“1)
# ===========================
X_train = X_train.astype('float32') / 255.0
X_val = X_val.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Label Encoding

In [None]:
# ===========================
# Step 9: Encode Labels to 0/1
# ===========================
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)
y_test_enc = le.transform(y_test)

print("Encoded labels:", dict(zip(le.classes_, le.transform(le.classes_))))

In [None]:
# ===========================
# Step 10: Verify Everything
# ===========================
print("\nFinal Data Shapes:")
print("X_train:", X_train.shape, "y_train:", y_train_enc.shape)
print("X_val:", X_val.shape, "y_val:", y_val_enc.shape)
print("X_test:", X_test.shape, "y_test:", y_test_enc.shape)

fig, axs = plt.subplots(1, 3, figsize=(15, 5))
sns.countplot(x=y_train_enc, ax=axs[0], palette='viridis')
axs[0].set_title("Train Distribution")
sns.countplot(x=y_val_enc, ax=axs[1], palette='coolwarm')
axs[1].set_title("Validation Distribution")
sns.countplot(x=y_test_enc, ax=axs[2], palette='magma')
axs[2].set_title("Test Distribution")
plt.tight_layout()
plt.show()

# Xception Net

In [None]:
# ===========================
# Step 6: Build Xception Model
# ===========================
base_model = Xception(weights='imagenet', include_top=False, pooling='avg', input_shape=(256, 256, 3))
base_model.trainable = False

model = Sequential([
    base_model,
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(220, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(60, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=Adamax(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', metrics.Precision(), metrics.Recall(), metrics.AUC()]
)

model.summary()

In [None]:
# ===========================
# Step 7: Train Model
# ===========================
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train_enc,
    validation_data=(X_val, y_val_enc),
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

In [None]:
# ===========================
# Step 8: Evaluate Model
# ===========================
test_loss, test_acc, test_prec, test_rec, test_auc = model.evaluate(X_test, y_test_enc, verbose=1)
print(f"\nðŸ“Š Test Accuracy: {test_acc:.4f}")
print(f"Precision: {test_prec:.4f}")
print(f"Recall: {test_rec:.4f}")
print(f"AUC: {test_auc:.4f}")

In [None]:
# ===========================
# Step 9: Plot Training Metrics
# ===========================
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# DenseNet121

In [None]:
from tensorflow.keras.applications import DenseNet121

base_model = DenseNet121(weights='imagenet', include_top=False, pooling='avg', input_shape=(256, 256, 3))
base_model.trainable = False

model = Sequential([
    base_model,
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(220, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(60, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])


In [None]:
model.compile(
    optimizer=Adamax(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', metrics.Precision(), metrics.Recall(), metrics.AUC()]
)

model.summary()

In [None]:
# ===========================
# Step 7: Train Model
# ===========================
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train_enc,
    validation_data=(X_val, y_val_enc),
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

In [None]:
# ===========================
# Step 8: Evaluate Model
# ===========================
test_loss, test_acc, test_prec, test_rec, test_auc = model.evaluate(X_test, y_test_enc, verbose=1)
print(f"\nðŸ“Š Test Accuracy: {test_acc:.4f}")
print(f"Precision: {test_prec:.4f}")
print(f"Recall: {test_rec:.4f}")
print(f"AUC: {test_auc:.4f}")

In [None]:
# ===========================
# Step 9: Plot Training Metrics
# ===========================
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Resnet50

In [None]:
from tensorflow.keras.applications import ResNet50

base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg', input_shape=(256, 256, 3))
base_model.trainable = False

model = Sequential([
    base_model,
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(220, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(60, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=Adamax(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', metrics.Precision(), metrics.Recall(), metrics.AUC()]
)

model.summary()

In [None]:
# ===========================
# Step 7: Train Model
# ===========================
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train_enc,
    validation_data=(X_val, y_val_enc),
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

In [None]:
# ===========================
# Step 8: Evaluate Model
# ===========================
test_loss, test_acc, test_prec, test_rec, test_auc = model.evaluate(X_test, y_test_enc, verbose=1)
print(f"\nðŸ“Š Test Accuracy: {test_acc:.4f}")
print(f"Precision: {test_prec:.4f}")
print(f"Recall: {test_rec:.4f}")
print(f"AUC: {test_auc:.4f}")

In [None]:
# ===========================
# Step 9: Plot Training Metrics
# ===========================
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# InceptionV3

In [None]:
from tensorflow.keras.applications import InceptionV3

base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg', input_shape=(256, 256, 3))
base_model.trainable = False

model = Sequential([
    base_model,
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(220, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(60, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=Adamax(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', metrics.Precision(), metrics.Recall(), metrics.AUC()]
)

model.summary()

In [None]:
# ===========================
# Step 7: Train Model
# ===========================
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train_enc,
    validation_data=(X_val, y_val_enc),
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

In [None]:
# ===========================
# Step 8: Evaluate Model
# ===========================
test_loss, test_acc, test_prec, test_rec, test_auc = model.evaluate(X_test, y_test_enc, verbose=1)
print(f"\nðŸ“Š Test Accuracy: {test_acc:.4f}")
print(f"Precision: {test_prec:.4f}")
print(f"Recall: {test_rec:.4f}")
print(f"AUC: {test_auc:.4f}")

In [None]:
# ===========================
# Step 9: Plot Training Metrics
# ===========================
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()