# 🏥 Medical AI Bot - 6-Class Training (DenseNet121) 🏥

This notebook trains a **DenseNet121** chest X-ray classifier with **6 classes**:
- ✅ COVID-19
- ✅ Normal
- ✅ Pneumonia
- ✅ Tuberculosis (TB)
- ✅ **Lung Cancer** ← NEW!
- ✅ **Pleural Effusion** ← NEW!

### 🚀 Step 1: Initialize & Authenticate
1.  Upload your **`kaggle.json`** file below (Get it from your [Kaggle Account](https://www.kaggle.com/account) -> API -> Create New Token).

In [None]:
!pip install -q tf-keras kaggle
import os
from google.colab import files

# Force TensorFlow to use Keras 2 (legacy) format
os.environ['TF_USE_LEGACY_KERAS'] = '1'

# Upload kaggle.json
if not os.path.exists('kaggle.json'):
    print("Upload your kaggle.json file:")
    files.upload()

# Configure Kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
print("✅ Kaggle Configured Successfully!")

### 📥 Step 2: Download & Prepare Data (6 Classes)
We download FOUR datasets:
1.  **COVID-19 Radiography Database** (COVID-19 + Normal images)
2.  **Chest X-Ray Pneumonia** (Pneumonia images)
3.  **Tuberculosis (TB) Chest X-ray Database** (TB images)
4.  **X-ray Lung Diseases (9 classes)** (Lung Cancer + Pleural Effusion images)

In [None]:
print("⏳ Downloading Datasets... Please wait.")

# 1. Download COVID-19 Radiography Database
if not os.path.exists('covid19-radiography-database.zip'):
    !kaggle datasets download -d tawsifurrahman/covid19-radiography-database
    !unzip -q covid19-radiography-database.zip
    print("✅ COVID-19 Database Downloaded.")

# 2. Download Pneumonia Dataset
if not os.path.exists('chest-xray-pneumonia.zip'):
    !kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
    !unzip -q chest-xray-pneumonia.zip
    print("✅ Pneumonia Database Downloaded.")

# 3. Download Tuberculosis Dataset
if not os.path.exists('tuberculosis-tb-chest-xray-dataset.zip'):
    !kaggle datasets download -d tawsifurrahman/tuberculosis-tb-chest-xray-dataset
    !unzip -q tuberculosis-tb-chest-xray-dataset.zip
    print("✅ Tuberculosis Database Downloaded.")

# 4. Download Lung Diseases 9-class Dataset (for Lung Cancer + Pleural Effusion)
if not os.path.exists('x-ray-lung-diseases-images-9-classes.zip'):
    !kaggle datasets download -d fernando-feltrin/x-ray-lung-diseases-images-9-classes
    !unzip -q x-ray-lung-diseases-images-9-classes.zip -d lung_diseases_9class
    print("✅ Lung Diseases 9-Class Database Downloaded.")

# List the 9-class dataset structure
print("\n📂 9-class dataset folders:")
if os.path.exists('lung_diseases_9class'):
    for item in sorted(os.listdir('lung_diseases_9class')):
        full = os.path.join('lung_diseases_9class', item)
        if os.path.isdir(full):
            count = len([f for f in os.listdir(full) if not f.startswith('.')])
            print(f'   {item}: {count} images')

In [None]:
import shutil
import random
from tqdm import tqdm
import glob

# Setup Dataset Directory
DATASET_DIR = 'dataset'
if os.path.exists(DATASET_DIR):
    shutil.rmtree(DATASET_DIR)
os.makedirs(DATASET_DIR)

CLASSES = ['COVID-19', 'Lung Cancer', 'Normal', 'Pleural Effusion', 'Pneumonia', 'Tuberculosis']
for c in CLASSES:
    os.makedirs(os.path.join(DATASET_DIR, c), exist_ok=True)

print("📂 Organizing Data (6 Classes)...")

def copy_images(src_dir, dst_dir, max_count=None, extensions=('.png', '.jpg', '.jpeg')):
    """Copy images from src to dst, optionally limiting count."""
    files_list = [f for f in os.listdir(src_dir) if f.lower().endswith(extensions)]
    if max_count and len(files_list) > max_count:
        files_list = random.sample(files_list, max_count)
    for f in tqdm(files_list, desc=f'  -> {os.path.basename(dst_dir)}'):
        shutil.copy(os.path.join(src_dir, f), os.path.join(dst_dir, f))
    return len(files_list)

# --- 1. COVID-19 Images ---
covid_src = os.path.join('COVID-19_Radiography_Dataset', 'COVID', 'images')
copy_images(covid_src, os.path.join(DATASET_DIR, 'COVID-19'))

# --- 2. Normal Images ---
normal_src = os.path.join('COVID-19_Radiography_Dataset', 'Normal', 'images')
copy_images(normal_src, os.path.join(DATASET_DIR, 'Normal'), max_count=4000)

# --- 3. Pneumonia Images ---
pneum_src = os.path.join('chest_xray', 'train', 'PNEUMONIA')
copy_images(pneum_src, os.path.join(DATASET_DIR, 'Pneumonia'), max_count=4000)

# --- 4. Tuberculosis Images ---
tb_src = os.path.join('TB_Chest_Radiography_Database', 'Tuberculosis')
copy_images(tb_src, os.path.join(DATASET_DIR, 'Tuberculosis'))

# --- 5. Lung Cancer Images (from 9-class dataset) ---
# Look for folder names containing 'cancer', 'tumor', 'mass', or 'nodule'
lung_cancer_candidates = []
if os.path.exists('lung_diseases_9class'):
    for item in os.listdir('lung_diseases_9class'):
        lower = item.lower()
        if any(kw in lower for kw in ['cancer', 'tumor', 'mass', 'nodule', 'encapsulated']):
            lung_cancer_candidates.append(item)
    print(f'\n  Lung Cancer candidate folders: {lung_cancer_candidates}')
    for folder in lung_cancer_candidates:
        src = os.path.join('lung_diseases_9class', folder)
        if os.path.isdir(src):
            copy_images(src, os.path.join(DATASET_DIR, 'Lung Cancer'))

# --- 6. Pleural Effusion Images (from 9-class dataset) ---
effusion_candidates = []
if os.path.exists('lung_diseases_9class'):
    for item in os.listdir('lung_diseases_9class'):
        lower = item.lower()
        if any(kw in lower for kw in ['effusion', 'pleural']):
            effusion_candidates.append(item)
    print(f'  Pleural Effusion candidate folders: {effusion_candidates}')
    for folder in effusion_candidates:
        src = os.path.join('lung_diseases_9class', folder)
        if os.path.isdir(src):
            copy_images(src, os.path.join(DATASET_DIR, 'Pleural Effusion'))

print("\n✅ Data Preparation Complete!")
for c in CLASSES:
    count = len(os.listdir(os.path.join(DATASET_DIR, c)))
    print(f"   {c}: {count} images")

### 🧠 Step 3: Build & Train Model (DenseNet121 - 6 Classes)

In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import numpy as np

# Configuration
IMG_SIZE = (224, 224)
BATCH_SIZE = 64
EPOCHS = 30
DATASET_DIR = "dataset"
CLASSES = ['COVID-19', 'Lung Cancer', 'Normal', 'Pleural Effusion', 'Pneumonia', 'Tuberculosis']
NUM_CLASSES = len(CLASSES)  # 6

print(f"Training {NUM_CLASSES}-class model: {CLASSES}")
AUTOTUNE = tf.data.AUTOTUNE

def get_label(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    return tf.argmax(parts[-2] == CLASSES)

def decode_img(img):
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    return tf.cast(img, tf.uint8)

def process_path(file_path):
    label = get_label(file_path)
    label = tf.one_hot(label, NUM_CLASSES)
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

# Build dataset
list_ds = tf.data.Dataset.list_files(str(DATASET_DIR + '/*/*'), shuffle=False)
list_ds = list_ds.shuffle(20000, seed=42)
image_count = len(list_ds)

val_size = int(image_count * 0.2)
train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)

def augment_and_scale(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, 0.2)
    return image, label

def scale_only(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    return image, label

train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.cache()
train_ds = train_ds.shuffle(buffer_size=2000)
train_ds = train_ds.map(augment_and_scale, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.cache()
val_ds = val_ds.map(scale_only, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

def build_model():
    base_model = DenseNet121(
        weights='imagenet',
        include_top=False,
        input_shape=IMG_SIZE + (3,)
    )
    
    base_model.trainable = True
    for layer in base_model.layers[:-40]:
        layer.trainable = False
        
    inputs = tf.keras.Input(shape=IMG_SIZE + (3,))
    x = base_model(inputs)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(NUM_CLASSES, activation='softmax')(x)  # 6 classes!
    
    model = tf.keras.Model(inputs, outputs)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-4),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

model = build_model()
model.summary()

# Callbacks
checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)
early_stop = EarlyStopping(monitor='val_accuracy', patience=8, verbose=1, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)

print("🚀 Starting 6-Class Training...")
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds,
    callbacks=[checkpoint, early_stop, reduce_lr]
)

# Save Final Model
model.save('model.h5')
print("✅ Model Saved as 'model.h5'")


### 📊 Step 4: Evaluate & Download

In [None]:
# Show training results
print(f"\n\n=== FINAL RESULTS ===")
print(f"Best Val Accuracy: {max(history.history['val_accuracy']):.4f}")
print(f"Final Train Accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"Classes: {CLASSES}")
print(f"\nDownloading model.h5...")

from google.colab import files
files.download('model.h5')