# 🏥 Medical AI Bot - High Accuracy Training (DenseNet121) 🏥

This notebook is designed to train a **highly accurate** medical image classifier.
We use **DenseNet121**, a powerful architecture for X-ray analysis, and automatically download high-quality data from Kaggle.

### 🚀 Step 1: Initialize & Authenticate
1.  Upload your **`kaggle.json`** file below (Get it from your [Kaggle Account](https://www.kaggle.com/account) -> API -> Create New Token).

In [None]:
!pip install -q tf-keras kaggle
import os
from google.colab import files

# Force TensorFlow to use Keras 2 (legacy) format
os.environ['TF_USE_LEGACY_KERAS'] = '1'

# Upload kaggle.json
if not os.path.exists('kaggle.json'):
    print("Upload your kaggle.json file:")
    files.upload()

# Configure Kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
print("✅ Kaggle Configured Successfully!")

### 📥 Step 2: Download & Prepare Data
We will download TWO datasets to ensure we have enough diversity:
1.  **COVID-19 Radiography Database** (Good for COVID/Normal)
2.  **Chest X-Ray Prediction** (Good for Pneumonia)

In [None]:
print("⏳ Downloading Datasets... Please wait.")

# 1. Download COVID-19 Radiography Database
if not os.path.exists('covid19-radiography-database.zip'):
    !kaggle datasets download -d tawsifurrahman/covid19-radiography-database
    !unzip -q covid19-radiography-database.zip
    print("✅ COVID-19 Database Downloaded.")

# 2. Download Pneumonia Dataset
if not os.path.exists('chest-xray-pneumonia.zip'):
    !kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
    !unzip -q chest-xray-pneumonia.zip
    print("✅ Pneumonia Database Downloaded.")

In [None]:
import shutil
import random
from tqdm import tqdm

# Setup Dataset Directory
DATASET_DIR = 'dataset'
if os.path.exists(DATASET_DIR):
    shutil.rmtree(DATASET_DIR)
os.makedirs(DATASET_DIR)

CLASSES = ['Normal', 'COVID-19', 'Pneumonia']
for c in CLASSES:
    os.makedirs(os.path.join(DATASET_DIR, c), exist_ok=True)

print("📂 Organizing Data...")

# --- 1. Process COVID-19 Images ---
covid_src = os.path.join('COVID-19_Radiography_Dataset', 'COVID', 'images')
dst = os.path.join(DATASET_DIR, 'COVID-19')
files = [f for f in os.listdir(covid_src) if f.lower().endswith('.png')]
# Use ALL COVID images (usually ~3600)
for f in tqdm(files, desc="Copying COVID"):
    shutil.copy(os.path.join(covid_src, f), os.path.join(dst, f))

# --- 2. Process Normal Images ---
# We limit Normal images to match COVID count roughly to avoid imbalance
normal_src = os.path.join('COVID-19_Radiography_Dataset', 'Normal', 'images')
dst = os.path.join(DATASET_DIR, 'Normal')
files = [f for f in os.listdir(normal_src) if f.lower().endswith('.png')]
selected_files = random.sample(files, min(len(files), 4000)) # improved balance
for f in tqdm(selected_files, desc="Copying Normal"):
    shutil.copy(os.path.join(normal_src, f), os.path.join(dst, f))

# --- 3. Process Pneumonia Images ---
pneum_src = os.path.join('chest_xray', 'train', 'PNEUMONIA')
dst = os.path.join(DATASET_DIR, 'Pneumonia')
files = [f for f in os.listdir(pneum_src) if f.lower().endswith('.jpeg')]
# Pneumonia dataset is large (~3800), take 4000 to match
selected_files = files[:4000] 
for f in tqdm(selected_files, desc="Copying Pneumonia"):
    shutil.copy(os.path.join(pneum_src, f), os.path.join(dst, f))

print("\n✅ Data Preparation Complete!")
for c in CLASSES:
    print(f"   {c}: {len(os.listdir(os.path.join(DATASET_DIR, c)))} images")

### 🧠 Step 3: Build & Train Model (DenseNet121)

In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import numpy as np

# Configuration
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 30
DATASET_DIR = "dataset"
CLASSES = ['COVID-19', 'Normal', 'Pneumonia'] # Match old index order if possible, or print it.

AUTOTUNE = tf.data.AUTOTUNE

def get_label(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    return parts[-2] == CLASSES

def decode_img(img):
    img = tf.io.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, IMG_SIZE)
    return img

def process_path(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

def get_dataset(directory, validation_split=0.2):
    list_ds = tf.data.Dataset.list_files(str(directory + '/*/*'), shuffle=False)
    list_ds = list_ds.shuffle(10000, seed=42)
    
    val_size = int(len(list_ds) * validation_split)
    train_ds = list_ds.skip(val_size)
    val_ds = list_ds.take(val_size)
    
    return train_ds, val_ds

train_ds, val_ds = get_dataset(DATASET_DIR)

def augment(image, label):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, 0.2)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    return image, label

# Set up Train Generator
train_generator = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
train_generator = train_generator.cache()
train_generator = train_generator.shuffle(buffer_size=1000)
train_generator = train_generator.map(augment, num_parallel_calls=AUTOTUNE)
train_generator = train_generator.batch(BATCH_SIZE).prefetch(AUTOTUNE)

# Set up Validation Generator
val_generator = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_generator = val_generator.batch(BATCH_SIZE).cache().prefetch(AUTOTUNE)

print("Dataset pipeline ready (optimized with tf.data).")



In [None]:
def build_model():
    base_model = DenseNet121(
        weights='imagenet',
        include_top=False,
        input_shape=IMG_SIZE + (3,)
    )
    
    # Unfreeze the last block for fine-tuning
    base_model.trainable = True
    for layer in base_model.layers[:-40]: # Fine-tune last 40 layers
        layer.trainable = False
        
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(3, activation='softmax') # 3 classes
    ])
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-4),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

model = build_model()
model.summary()

In [None]:
# Callbacks
checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)
early_stop = EarlyStopping(monitor='val_accuracy', patience=8, verbose=1, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)

print("🚀 Starting Training...")
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=[checkpoint, early_stop, reduce_lr]
)

### 📊 Step 4: Evaluate & Save

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# 1. Plot Accuracy/Loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Loss')
plt.legend()
plt.show()

# 2. Confusion Matrix
print("Generating Classification Report...")
val_generator.reset()
preds = model.predict(val_generator)
y_pred = np.argmax(preds, axis=1)
y_true = val_generator.classes

print(classification_report(y_true, y_pred, target_names=CLASSES))

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=CLASSES, yticklabels=CLASSES)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Save Final Model
model.save('model.h5')
print("✅ Model Saved as 'model.h5'")

# Download
from google.colab import files
files.download('model.h5')