In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import seaborn as sns

# -------------------------------
# PARAMETERS
# -------------------------------
img_height, img_width = 224, 224
batch_size = 32
num_classes = 2  # Binary classification
epochs = 20

# -------------------------------
# DEFINE DIRECTORIES FOR ALL CANCER TYPES
# -------------------------------
malignant_dirs = [
    r"E:\LY Project\Multi Cancer\Data\Malignant\all_early",
    r"E:\LY Project\Multi Cancer\Data\Malignant\all_pre",
    r"E:\LY Project\Multi Cancer\Data\Malignant\all_pro",
    r"E:\LY Project\Multi Cancer\Data\Malignant\breast_malignant",
    r"E:\LY Project\Multi Cancer\Data\Malignant\colon_aca",
    r"E:\LY Project\Multi Cancer\Data\Malignant\lung_aca",
    r"E:\LY Project\Multi Cancer\Data\Malignant\lung_scc",
    r"E:\LY Project\Multi Cancer\Data\Malignant\oral_scc"
]

benign_dirs = [
    r"E:\LY Project\Multi Cancer\Data\Benign\all_benign",
    r"E:\LY Project\Multi Cancer\Data\Benign\breast_benign",
    r"E:\LY Project\Multi Cancer\Data\Benign\colon_bnt",
    r"E:\LY Project\Multi Cancer\Data\Benign\lung_bnt",
    r"E:\LY Project\Multi Cancer\Data\Benign\oral_normal"
]

# -------------------------------
# LOAD DATA FROM MULTIPLE DIRECTORIES
# -------------------------------
print("Loading data from multiple directories...")
print("="*60)

def load_images_from_directories(directories, label):
    """Load image paths from multiple directories and assign same label"""
    image_paths = []
    labels = []
    
    for directory in directories:
        if not os.path.exists(directory):
            print(f"⚠ Warning: Directory not found - {directory}")
            continue
        
        # Get all image files
        files = [os.path.join(directory, f) for f in os.listdir(directory)
                if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'))]
        
        folder_name = os.path.basename(directory)
        print(f"  {folder_name}: {len(files)} images")
        
        image_paths.extend(files)
        labels.extend([label] * len(files))
    
    return image_paths, labels

# Load malignant images (label = 1)
print("\nMALIGNANT Cancer Types:")
malignant_paths, malignant_labels = load_images_from_directories(malignant_dirs, label=1)

# Load benign images (label = 0)
print("\nBENIGN Cancer Types:")
benign_paths, benign_labels = load_images_from_directories(benign_dirs, label=0)

# Combine all data
all_image_paths = malignant_paths + benign_paths
all_labels = malignant_labels + benign_labels

print(f"\n{'='*60}")
print(f"DATASET SUMMARY")
print(f"{'='*60}")
print(f"Total images: {len(all_image_paths)}")
print(f"  Malignant: {len(malignant_paths)} ({100*len(malignant_paths)/len(all_image_paths):.1f}%)")
print(f"  Benign: {len(benign_paths)} ({100*len(benign_paths)/len(all_image_paths):.1f}%)")
print(f"{'='*60}\n")

# -------------------------------
# CREATE TF.DATA.DATASET
# -------------------------------
def load_and_preprocess_image(path, label):
    """Load and preprocess a single image"""
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [img_height, img_width])
    image = image / 255.0  # Normalize to [0, 1]
    label = tf.one_hot(label, depth=num_classes)
    return image, label

# Create dataset
AUTOTUNE = tf.data.AUTOTUNE
path_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_labels))
path_ds = path_ds.shuffle(buffer_size=len(all_image_paths), seed=42)

# Split into train and validation (80-20 split)
train_size = int(0.8 * len(all_image_paths))
val_size = len(all_image_paths) - train_size

train_ds = path_ds.take(train_size)
val_ds = path_ds.skip(train_size)

print(f"Training samples: {train_size}")
print(f"Validation samples: {val_size}\n")

# Map preprocessing and batch
train_ds = train_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.batch(batch_size)
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)

val_ds = val_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.batch(batch_size)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

# -------------------------------
# LOAD DENSENET201 BASE MODEL AND FREEZE LAYERS
# -------------------------------
print("Loading DenseNet201 pre-trained base model...")
base_model = DenseNet201(
    weights='imagenet', 
    include_top=False, 
    input_shape=(img_height, img_width, 3)
)

# Freeze all layers in the base model
for layer in base_model.layers:
    layer.trainable = False
print(f"Frozen {len(base_model.layers)} layers of DenseNet201 base.")

# -------------------------------
# ADD CLASSIFIER ON TOP
# -------------------------------
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

print("\nModel Architecture Summary:")
model.summary()

# -------------------------------
# COMPILE MODEL
# -------------------------------
print("\nCompiling model...")
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# -------------------------------
# CALLBACKS
# -------------------------------
checkpoint_filepath = 'best_densenet201_cancer_model.weights.h5'

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_filepath, 
    monitor='val_accuracy', 
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

# -------------------------------
# TRAIN MODEL
# -------------------------------
print(f"\nTraining for {epochs} epochs...")
print("="*60)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    callbacks=[checkpoint, early_stopping, reduce_lr],
    verbose=1
)

print("\n" + "="*60)
print("Training completed.")
print(f"Best model saved at {checkpoint_filepath}")
print("="*60)

# -------------------------------
# PLOT TRAINING HISTORY
# -------------------------------
print("\nPlotting training history...")
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot accuracy
axes[0].plot(history.history['accuracy'], label='Training Accuracy', marker='o')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy', marker='s')
axes[0].set_title('Model Accuracy Over Epochs')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot loss
axes[1].plot(history.history['loss'], label='Training Loss', marker='o')
axes[1].plot(history.history['val_loss'], label='Validation Loss', marker='s')
axes[1].set_title('Model Loss Over Epochs')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('densenet201_training_history.png', dpi=300, bbox_inches='tight')
plt.show()

# -------------------------------
# LOAD BEST MODEL FROM CHECKPOINT
# -------------------------------
print("\nLoading best model for evaluation...")
model.load_weights(checkpoint_filepath)

# -------------------------------
# EVALUATE ON VALIDATION SET
# -------------------------------
print("\nEvaluating model on validation set...")
val_loss, val_accuracy = model.evaluate(val_ds)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

# -------------------------------
# PREDICTIONS AND CONFUSION MATRIX
# -------------------------------
print("\nPredicting classes on validation data...")

# Collect predictions and true labels
y_pred_probs = []
y_true = []

for images, labels in val_ds:
    predictions = model.predict(images, verbose=0)
    y_pred_probs.extend(predictions)
    y_true.extend(np.argmax(labels.numpy(), axis=1))

y_pred_probs = np.array(y_pred_probs)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.array(y_true)

# Class labels
class_labels = ['Benign', 'Malignant']

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Display confusion matrix as heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(
    cm, 
    annot=True, 
    fmt='d', 
    cmap='Blues', 
    xticklabels=class_labels, 
    yticklabels=class_labels,
    cbar_kws={'label': 'Count'}
)
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.title('Confusion Matrix - DenseNet201 Binary Classification\n(Benign vs Malignant)', 
          fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('densenet201_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

# -------------------------------
# CLASSIFICATION REPORT
# -------------------------------
print("\n" + "="*60)
print("CLASSIFICATION REPORT - DENSENET201")
print("="*60)
print(classification_report(y_true, y_pred, target_names=class_labels, digits=4))

# -------------------------------
# ADDITIONAL METRICS
# -------------------------------
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, accuracy_score

# Extract probabilities for positive class (Malignant)
y_pred_probs_positive = y_pred_probs[:, 1]

# Compute ROC curve and AUC
fpr, tpr, thresholds = roc_curve(y_true, y_pred_probs_positive)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.title('Receiver Operating Characteristic (ROC) Curve - DenseNet201', 
          fontsize=14, fontweight='bold')
plt.legend(loc="lower right")
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig('densenet201_roc_curve.png', dpi=300, bbox_inches='tight')
plt.show()

# -------------------------------
# DETAILED METRICS SUMMARY
# -------------------------------
accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, support = precision_recall_fscore_support(
    y_true, y_pred, average=None, labels=[0, 1]
)

# Calculate clinical metrics
tn, fp, fn, tp = cm.ravel()
sensitivity = tp / (tp + fn)  # Recall for Malignant
specificity = tn / (tn + fp)  # Recall for Benign

print("\n" + "="*60)
print("DETAILED PERFORMANCE METRICS")
print("="*60)
print(f"\nOverall Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"ROC-AUC Score: {roc_auc:.4f}")

print(f"\nPer-Class Metrics:")
print(f"  Benign:")
print(f"    Precision: {precision[0]:.4f}")
print(f"    Recall (Specificity): {recall[0]:.4f}")
print(f"    F1-Score: {f1[0]:.4f}")
print(f"    Support: {support[0]}")

print(f"\n  Malignant:")
print(f"    Precision: {precision[1]:.4f}")
print(f"    Recall (Sensitivity): {recall[1]:.4f}")
print(f"    F1-Score: {f1[1]:.4f}")
print(f"    Support: {support[1]}")

print(f"\nClinical Metrics:")
print(f"  Sensitivity (Malignant Detection Rate): {sensitivity:.4f} ({sensitivity*100:.2f}%)")
print(f"  Specificity (Benign Detection Rate): {specificity:.4f} ({specificity*100:.2f}%)")

print(f"\nConfusion Matrix Breakdown:")
print(f"  True Negatives (Benign correctly identified): {tn}")
print(f"  False Positives (Benign misclassified as Malignant): {fp}")
print(f"  False Negatives (Malignant misclassified as Benign): {fn}")
print(f"  True Positives (Malignant correctly identified): {tp}")

print("\n" + "="*60)
print("DENSENET201 MODEL EVALUATION COMPLETED!")
print("="*60)

# Save results to file
with open('densenet201_evaluation_results.txt', 'w') as f:
    f.write("="*60 + "\n")
    f.write("DENSENET201 BINARY CANCER CLASSIFICATION RESULTS\n")
    f.write("="*60 + "\n\n")
    f.write(f"Total Training Images: {train_size}\n")
    f.write(f"Total Validation Images: {val_size}\n")
    f.write(f"Malignant Images: {len(malignant_paths)}\n")
    f.write(f"Benign Images: {len(benign_paths)}\n\n")
    f.write(f"Overall Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)\n")
    f.write(f"ROC-AUC Score: {roc_auc:.4f}\n")
    f.write(f"Sensitivity: {sensitivity:.4f}\n")
    f.write(f"Specificity: {specificity:.4f}\n\n")
    f.write("="*60 + "\n")
    f.write("CLASSIFICATION REPORT\n")
    f.write("="*60 + "\n")
    f.write(classification_report(y_true, y_pred, target_names=class_labels, digits=4))

print("\nResults saved to 'densenet201_evaluation_results.txt'")


Loading training data without augmentation (only rescaling)...
Found 52002 images belonging to 2 classes.
Found 13000 images belonging to 2 classes.
Loading DenseNet201 pre-trained base model...
Frozen 707 layers of DenseNet201 base.

Model Architecture Summary:



Compiling model...

Training for 20 epochs...
Epoch 1/20
[1m 391/1626[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m18:19[0m 890ms/step - accuracy: 0.7696 - loss: 0.4575

KeyboardInterrupt: 