In [None]:
# 📦 1. Imports
import kagglehub
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import classification_report
import numpy as np

# 📂 2. Download Dataset
path = kagglehub.dataset_download("prashant268/chest-xray-covid19-pneumonia")
dataset = Path(path)
train_dir = dataset / "Data/train"
test_dir  = dataset / "Data/test"

# 📸 3. Preprocessing (Friend-style: VGG16 preprocess)
from tensorflow.keras.applications.vgg16 import preprocess_input

train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_batches = train_datagen.flow_from_directory(
    directory=train_dir,
    target_size=(224, 224),
    batch_size=32,
    shuffle=True,
    class_mode='categorical'
)

test_batches = test_datagen.flow_from_directory(
    directory=test_dir,
    target_size=(224, 224),
    batch_size=32,
    shuffle=False,
    class_mode='categorical'
)

# 🧠 4. Model Definition (Friend-style, deep CNN)
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(train_batches.num_classes, activation='softmax')
])

# 🧪 5. Compile Model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 🛠 6. Callbacks (basic)
callbacks = [
    ModelCheckpoint('best_model.keras', save_best_only=True, monitor='loss'),
    ReduceLROnPlateau(monitor='loss', factor=0.3, patience=3, min_lr=1e-6, verbose=1),
    EarlyStopping(monitor='loss', patience=10, restore_best_weights=True, verbose=1)
]

# 🚀 7. Train Model
history = model.fit(
    train_batches,
    epochs=10,
    steps_per_epoch=len(train_batches),
    callbacks=callbacks
)

# ✅ 8. Evaluate Model
loss, acc = model.evaluate(test_batches, steps=len(test_batches))
print(f"\n✅ Test Loss: {loss:.4f}")
print(f"✅ Test Accuracy: {acc:.4f}")

# 📊 9. Classification Report
predictions = model.predict(test_batches, steps=len(test_batches))
predicted_classes = np.argmax(predictions, axis=1)
true_classes = test_batches.classes
labels = list(test_batches.class_indices.keys())

print("\n📄 Classification Report:\n")
print(classification_report(true_classes, predicted_classes, target_names=labels))


Found 5144 images belonging to 3 classes.
Found 1288 images belonging to 3 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 589ms/step - accuracy: 0.7066 - loss: 6.7792 - learning_rate: 1.0000e-04
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 432ms/step - accuracy: 0.9100 - loss: 0.2845 - learning_rate: 1.0000e-04
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 388ms/step - accuracy: 0.9296 - loss: 0.2158 - learning_rate: 1.0000e-04
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 420ms/step - accuracy: 0.9381 - loss: 0.1831 - learning_rate: 1.0000e-04
Epoch 5/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 420ms/step - accuracy: 0.9521 - loss: 0.1465 - learning_rate: 1.0000e-04
Epoch 6/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 386ms/step - accuracy: 0.9589 - loss: 0.1206 - learning_rate: 1.0000e-04
Epoch 7/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 421ms/step - a

In [None]:
from collections import Counter

# Get class indices from train_gen (already loaded)
class_indices = train_batches.class_indices
inv_class_indices = {v: k for k, v in class_indices.items()}  # reverse lookup

# Count the number of samples per class
counts = Counter(train_batches.classes)

# Print each class with its count
print("\n📊 Current Class Distribution:")
for class_id, count in counts.items():
    print(f"  → {inv_class_indices[class_id]}: {count} samples")



📊 Current Class Distribution:
  → COVID19: 460 samples
  → NORMAL: 1266 samples
  → PNEUMONIA: 3418 samples


In [None]:
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

tta_steps = 5
predictions = []

# Create TTA generator with the same preprocessing
tta_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

for i in range(tta_steps):
    tta_gen = tta_datagen.flow_from_directory(
        directory=test_dir,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        shuffle=False
    )
    preds = model.predict(tta_gen, verbose=0)
    predictions.append(preds)

# Average predictions across TTA runs
final_preds = np.mean(predictions, axis=0)
final_pred_classes = np.argmax(final_preds, axis=1)
true_classes = tta_gen.classes
labels = list(tta_gen.class_indices.keys())

from sklearn.metrics import classification_report
print("\n📄 TTA Classification Report:\n")
print(classification_report(true_classes, final_pred_classes, target_names=labels))


Found 1288 images belonging to 3 classes.


  self._warn_if_super_not_called()


Found 1288 images belonging to 3 classes.
Found 1288 images belonging to 3 classes.
Found 1288 images belonging to 3 classes.
Found 1288 images belonging to 3 classes.

📄 TTA Classification Report:

              precision    recall  f1-score   support

     COVID19       0.99      0.96      0.97       116
      NORMAL       0.85      0.97      0.90       317
   PNEUMONIA       0.99      0.94      0.96       855

    accuracy                           0.95      1288
   macro avg       0.94      0.95      0.95      1288
weighted avg       0.95      0.95      0.95      1288



In [None]:
from sklearn.metrics import accuracy_score

tta_accuracy = accuracy_score(true_classes, final_pred_classes)
print(f"\n✅ TTA Accuracy: {tta_accuracy:.4f}")



✅ TTA Accuracy: 0.9472
