# 🏥 Medical AI Bot - 4-Class Training (DenseNet121) 🏥

This notebook trains a **DenseNet121** chest X-ray classifier with **4 classes**:
- ✅ COVID-19
- ✅ Normal
- ✅ Pneumonia
- ✅ **Tuberculosis (TB)** ← NEW!

### 🚀 Step 1: Initialize & Authenticate
1.  Upload your **`kaggle.json`** file below (Get it from your [Kaggle Account](https://www.kaggle.com/account) -> API -> Create New Token).

In [None]:
!pip install -q tf-keras kaggle
import os
from google.colab import files

# Force TensorFlow to use Keras 2 (legacy) format
os.environ['TF_USE_LEGACY_KERAS'] = '1'

# Upload kaggle.json
if not os.path.exists('kaggle.json'):
    print("Upload your kaggle.json file:")
    files.upload()

# Configure Kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
print("✅ Kaggle Configured Successfully!")

### 📥 Step 2: Download & Prepare Data (4 Classes)
We download THREE datasets:
1.  **COVID-19 Radiography Database** (COVID-19 + Normal images)
2.  **Chest X-Ray Pneumonia** (Pneumonia images)
3.  **Tuberculosis (TB) Chest X-ray Database** (TB images)

In [None]:
print("⏳ Downloading Datasets... Please wait.")

# 1. Download COVID-19 Radiography Database
if not os.path.exists('covid19-radiography-database.zip'):
    !kaggle datasets download -d tawsifurrahman/covid19-radiography-database
    !unzip -q covid19-radiography-database.zip
    print("✅ COVID-19 Database Downloaded.")

# 2. Download Pneumonia Dataset
if not os.path.exists('chest-xray-pneumonia.zip'):
    !kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
    !unzip -q chest-xray-pneumonia.zip
    print("✅ Pneumonia Database Downloaded.")

# 3. Download Tuberculosis Dataset (NEW)
if not os.path.exists('tuberculosis-tb-chest-xray-dataset.zip'):
    !kaggle datasets download -d tawsifurrahman/tuberculosis-tb-chest-xray-dataset
    !unzip -q tuberculosis-tb-chest-xray-dataset.zip
    print("✅ Tuberculosis Database Downloaded.")

In [None]:
import shutil
import random
from tqdm import tqdm

# Setup Dataset Directory
DATASET_DIR = 'dataset'
if os.path.exists(DATASET_DIR):
    shutil.rmtree(DATASET_DIR)
os.makedirs(DATASET_DIR)

CLASSES = ['COVID-19', 'Normal', 'Pneumonia', 'Tuberculosis']
for c in CLASSES:
    os.makedirs(os.path.join(DATASET_DIR, c), exist_ok=True)

print("📂 Organizing Data (4 Classes)...")

# --- 1. Process COVID-19 Images ---
covid_src = os.path.join('COVID-19_Radiography_Dataset', 'COVID', 'images')
dst = os.path.join(DATASET_DIR, 'COVID-19')
files_list = [f for f in os.listdir(covid_src) if f.lower().endswith('.png')]
for f in tqdm(files_list, desc="Copying COVID"):
    shutil.copy(os.path.join(covid_src, f), os.path.join(dst, f))

# --- 2. Process Normal Images ---
normal_src = os.path.join('COVID-19_Radiography_Dataset', 'Normal', 'images')
dst = os.path.join(DATASET_DIR, 'Normal')
files_list = [f for f in os.listdir(normal_src) if f.lower().endswith('.png')]
selected_files = random.sample(files_list, min(len(files_list), 4000))
for f in tqdm(selected_files, desc="Copying Normal"):
    shutil.copy(os.path.join(normal_src, f), os.path.join(dst, f))

# --- 3. Process Pneumonia Images ---
pneum_src = os.path.join('chest_xray', 'train', 'PNEUMONIA')
dst = os.path.join(DATASET_DIR, 'Pneumonia')
files_list = [f for f in os.listdir(pneum_src) if f.lower().endswith('.jpeg')]
selected_files = files_list[:4000]
for f in tqdm(selected_files, desc="Copying Pneumonia"):
    shutil.copy(os.path.join(pneum_src, f), os.path.join(dst, f))

# --- 4. Process Tuberculosis Images (NEW) ---
tb_src = os.path.join('TB_Chest_Radiography_Database', 'Tuberculosis')
dst = os.path.join(DATASET_DIR, 'Tuberculosis')
files_list = [f for f in os.listdir(tb_src) if f.lower().endswith('.png')]
for f in tqdm(files_list, desc="Copying Tuberculosis"):
    shutil.copy(os.path.join(tb_src, f), os.path.join(dst, f))

print("\n✅ Data Preparation Complete!")
for c in CLASSES:
    print(f"   {c}: {len(os.listdir(os.path.join(DATASET_DIR, c)))} images")

### 🧠 Step 3: Build & Train Model (DenseNet121 - 4 Classes)

In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import numpy as np

# Configuration
IMG_SIZE = (224, 224)
BATCH_SIZE = 64
EPOCHS = 30
DATASET_DIR = "dataset"
CLASSES = ['COVID-19', 'Normal', 'Pneumonia', 'Tuberculosis']
NUM_CLASSES = len(CLASSES)  # 4

print(f"Training {NUM_CLASSES}-class model: {CLASSES}")
AUTOTUNE = tf.data.AUTOTUNE

def get_label(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    return tf.argmax(parts[-2] == CLASSES)

def decode_img(img):
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    return tf.cast(img, tf.uint8)

def process_path(file_path):
    label = get_label(file_path)
    label = tf.one_hot(label, NUM_CLASSES)
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

# Build dataset
list_ds = tf.data.Dataset.list_files(str(DATASET_DIR + '/*/*'), shuffle=False)
list_ds = list_ds.shuffle(15000, seed=42)
image_count = len(list_ds)

val_size = int(image_count * 0.2)
train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)

def augment_and_scale(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, 0.2)
    return image, label

def scale_only(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    return image, label

train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.cache()
train_ds = train_ds.shuffle(buffer_size=2000)
train_ds = train_ds.map(augment_and_scale, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.cache()
val_ds = val_ds.map(scale_only, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

def build_model():
    base_model = DenseNet121(
        weights='imagenet',
        include_top=False,
        input_shape=IMG_SIZE + (3,)
    )
    
    base_model.trainable = True
    for layer in base_model.layers[:-40]:
        layer.trainable = False
        
    inputs = tf.keras.Input(shape=IMG_SIZE + (3,))
    x = base_model(inputs)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(NUM_CLASSES, activation='softmax')(x)  # 4 classes!
    
    model = tf.keras.Model(inputs, outputs)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-4),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

model = build_model()
model.summary()

# Callbacks
checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)
early_stop = EarlyStopping(monitor='val_accuracy', patience=8, verbose=1, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)

print("🚀 Starting 4-Class Training...")
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds,
    callbacks=[checkpoint, early_stop, reduce_lr]
)

# Save Final Model
model.save('model.h5')
print("✅ Model Saved as 'model.h5'")


### 📊 Step 4: Evaluate & Download

In [None]:
# Show training results
print(f"\n\n=== FINAL RESULTS ===")
print(f"Best Val Accuracy: {max(history.history['val_accuracy']):.4f}")
print(f"Final Train Accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"Classes: {CLASSES}")
print(f"\nDownloading model.h5...")

from google.colab import files
files.download('model.h5')