In [1]:
!pip install kaggle



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!mkdir ~/.kaggle

In [4]:
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

In [5]:
! chmod 600 ~/.kaggle/kaggle.json

In [6]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

Dataset URL: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia
License(s): other
Downloading chest-xray-pneumonia.zip to /content
 99% 2.28G/2.29G [00:21<00:00, 223MB/s]
100% 2.29G/2.29G [00:21<00:00, 114MB/s]


In [7]:
!unzip -q chest-xray-pneumonia.zip -d /content/chest_xray
print("Dataset extracted to /content/chest_xray")

Dataset extracted to /content/chest_xray


In [None]:
!pip install tensorflow



In [8]:

# ✅ Continue from where you stopped
# Dataset has been downloaded and extracted to: /content/chest_xray

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix
import shutil
import random


In [9]:
# Paths
base_path = '/content/chest_xray/chest_xray'
train_dir = os.path.join(base_path, 'train')
val_dir = os.path.join(base_path, 'val')
test_dir = os.path.join(base_path, 'test')

In [10]:
# ✅ Efficient Data Preparation for Stage 1 (Normal vs Pneumonia)
# Optimized for lower computation with comprehensive augmentation

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# ⚡ Optimized Configuration
img_size = 192  # Reduced from 224 for 30% less computation
batch_size = 64  # Increased for better GPU utilization

# 🔧 Enable Mixed Precision for 50% faster training
tf.keras.mixed_precision.set_global_policy('mixed_float16')

# 🎯 Comprehensive Data Augmentation - All Angles & Rotations
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=360,  # Full rotation coverage
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=[0.7, 1.3],
    horizontal_flip=True,
    shear_range=0.15,
    brightness_range=[0.8, 1.2],
    fill_mode='reflect'
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

# 📊 Generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)

# 🚀 Additional tf.data Optimizations
def optimize_dataset(generator):
    dataset = tf.data.Dataset.from_generator(
        lambda: generator,
        output_signature=(
            tf.TensorSpec(shape=(None, img_size, img_size, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(None,), dtype=tf.float32)
        )
    )
    return dataset.cache().prefetch(tf.data.AUTOTUNE)

# Apply optimizations
train_dataset = optimize_dataset(train_generator)
val_dataset = optimize_dataset(val_generator)
test_dataset = optimize_dataset(test_generator)

# 🎯 Performance Optimizations
tf.config.optimizer.set_jit(True)  # Enable XLA
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_memory_growth(gpus[0], True)

print(f"✅ Optimized: {img_size}x{img_size}, batch={batch_size}, 360° rotation, mixed precision")

Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
✅ Optimized: 192x192, batch=64, 360° rotation, mixed precision


In [11]:
# ✅ Stage 1 Model: ResNet50 for Normal vs Pneumonia
from tensorflow.keras.applications import ResNet50

base_model_1 = ResNet50(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

x = base_model_1.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
output_1 = Dense(1, activation='sigmoid')(x)

model_stage1 = Model(inputs=base_model_1.input, outputs=output_1)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:
# Freeze base layers
for layer in base_model_1.layers:
    layer.trainable = False

model_stage1.compile(optimizer='adam',
                     loss='binary_crossentropy',
                     metrics=['accuracy'])

callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
]

In [14]:
# Train Stage 1 model
history_stage1 = model_stage1.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=callbacks
)

Epoch 1/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1224s[0m 15s/step - accuracy: 0.7572 - loss: 0.5114 - val_accuracy: 0.5625 - val_loss: 0.6747
Epoch 2/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1205s[0m 15s/step - accuracy: 0.7502 - loss: 0.5223 - val_accuracy: 0.5625 - val_loss: 0.7037
Epoch 3/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1204s[0m 15s/step - accuracy: 0.7484 - loss: 0.5133 - val_accuracy: 0.6250 - val_loss: 0.6303
Epoch 4/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1204s[0m 15s/step - accuracy: 0.7652 - loss: 0.4921 - val_accuracy: 0.6250 - val_loss: 0.6438
Epoch 5/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1200s[0m 15s/step - accuracy: 0.7678 - loss: 0.4899 - val_accuracy: 0.6250 - val_loss: 0.6625
Epoch 6/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1222s[0m 15s/step - accuracy: 0.7659 - loss: 0.4948 - val_accuracy: 0.5625 - val_loss: 0.6561


In [15]:
# Save predictions from Stage 1 for Stage 2 input
filenames = test_generator.filenames
true_labels = test_generator.classes
pred_probs = model_stage1.predict(test_generator)
pred_labels = (pred_probs > 0.5).astype(int).flatten()

  self._warn_if_super_not_called()


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 14s/step


In [16]:
# Collect pneumonia cases only for Stage 2
pneumonia_indices = np.where(pred_labels == 1)[0]
pneumonia_filenames = [filenames[i] for i in pneumonia_indices]
pneumonia_preds = pred_probs[pneumonia_indices]

In [17]:
# ✅ Stage 2 Preparation: Bacterial vs Viral classification setup
# Construct custom dataframe for Stage 2 filtering from original test set
stage2_dir = '/content/stage2_filtered'
if os.path.exists(stage2_dir):
    shutil.rmtree(stage2_dir)
os.makedirs(stage2_dir + '/bacteria', exist_ok=True)
os.makedirs(stage2_dir + '/virus', exist_ok=True)

In [18]:
# Copy only pneumonia cases to new dir and separate by label
for i in pneumonia_indices:
    fname = filenames[i]
    label = true_labels[i]
    full_path = os.path.join(test_dir, fname)
    if 'bacteria' in fname.lower():
        shutil.copy(full_path, os.path.join(stage2_dir, 'bacteria', os.path.basename(fname)))
    elif 'virus' in fname.lower():
        shutil.copy(full_path, os.path.join(stage2_dir, 'virus', os.path.basename(fname)))


In [19]:
# Stage 2 Data Generator
stage2_datagen = ImageDataGenerator(rescale=1./255)
stage2_generator = stage2_datagen.flow_from_directory(
    stage2_dir,
    target_size=(img_size, img_size),
    batch_size=1,
    class_mode='binary',
    shuffle=False
)

Found 336 images belonging to 2 classes.


In [20]:
# ✅ Stage 2 Model: InceptionV3 for Bacterial vs Viral
base_model_2 = InceptionV3(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

y2 = base_model_2.output
y2 = GlobalAveragePooling2D()(y2)
y2 = Dropout(0.5)(y2)
y2 = Dense(128, activation='relu')(y2)
output_2 = Dense(1, activation='sigmoid')(y2)

model_stage2 = Model(inputs=base_model_2.input, outputs=output_2)

for layer in base_model_2.layers:
    layer.trainable = False

model_stage2.compile(optimizer='adam',
                     loss='binary_crossentropy',
                     metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [21]:
# Train Stage 2
history_stage2 = model_stage2.fit(
    stage2_generator,
    epochs=10,
    callbacks=[EarlyStopping(monitor='loss', patience=2, restore_best_weights=True)]
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 201ms/step - accuracy: 0.6221 - loss: 5.5282
Epoch 2/10
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 201ms/step - accuracy: 0.6039 - loss: 6.3847
Epoch 3/10
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 198ms/step - accuracy: 0.6280 - loss: 5.9957


In [22]:
# Predict on Stage 2 filtered samples
stage2_probs = model_stage2.predict(stage2_generator)
stage2_preds = (stage2_probs > 0.5).astype(int).flatten()


[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 194ms/step


In [23]:
# ✅ Diagnosis and Treatment Recommendations
# 0: Bacteria → Antibiotics recommended
# 1: Virus → Supportive treatment only

diagnosis_report = []
for fname, pred in zip(stage2_generator.filenames, stage2_preds):
    if pred == 0:
        diagnosis = "Bacterial Pneumonia"
        treatment = "Prescribe antibiotics such as Azithromycin or Amoxicillin."
    else:
        diagnosis = "Viral Pneumonia"
        treatment = "Provide supportive care. Antivirals if confirmed and early."
    diagnosis_report.append({
        'Filename': fname,
        'Diagnosis': diagnosis,
        'Treatment': treatment
    })


In [24]:
# Save report
diagnosis_df = pd.DataFrame(diagnosis_report)
diagnosis_df.to_csv("diagnosis_treatment_stage2.csv", index=False)
print("\n✅ Diagnosis and treatment report saved to diagnosis_treatment_stage2.csv")



✅ Diagnosis and treatment report saved to diagnosis_treatment_stage2.csv
