In [None]:
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras import layers, models
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import ResNet50, EfficientNetB3, InceptionV3, DenseNet121, VGG16
import matplotlib.pyplot as plt

# === Step 1: Dataset ===
train_dir = r"C:\Users\vikra\Lungs_cancer_detection_using_deeplearning\Dataset"
class_mapping = {"normal": 0, "benign": 1, "malignant": 2}

image_paths, labels = [], []
for class_name, label in class_mapping.items():
    folder = os.path.join(train_dir, class_name)
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_paths.append(os.path.join(folder, fname))
            labels.append(label)

image_paths, labels = np.array(image_paths), np.array(labels)
print(f"Total Images: {len(image_paths)}")

# === Step 2: Split into Train (85%) and Test (15%) ===
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, labels, test_size=0.15, stratify=labels, random_state=42)

print(f"\nTrain Set: {len(train_paths)} images")
print(f"Test Set: {len(test_paths)} images")

# === Step 3: Image preprocessing ===
IMG_SIZE, BATCH_SIZE = 256, 16
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.05),
    tf.keras.layers.RandomContrast(0.05),
])

def parse_image(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    return image / 255.0, label

def create_dataset(paths, labels, shuffle=False, augment=False):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths))
    ds = ds.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
    if augment:
        ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y), num_parallel_calls=tf.data.AUTOTUNE)
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# === Step 4: Class Weights ===
class_weights = {
    label: len(train_labels) / (3 * np.sum(train_labels == label))
    for label in class_mapping.values()
}

# === Step 5: Callbacks ===
early_stopper = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7)

# === Step 6: Architectures and K-Fold ===
model_architectures = {
    "ResNet50": ResNet50,
    "EfficientNetB3": EfficientNetB3,
    "InceptionV3": InceptionV3,
    "DenseNet121": DenseNet121,
    "VGG16": VGG16
}

k = 3
results = {}

for model_name, ModelClass in model_architectures.items():
    print(f"\n=== {model_name}: 3-Fold Cross-Validation ===")
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    fold_accuracies = []

    best_model = None
    best_val_acc = 0.0

    for fold, (train_idx, val_idx) in enumerate(kf.split(train_paths)):
        print(f"--- Fold {fold+1}/{k} ---")

        tr_paths, tr_labels = train_paths[train_idx], train_labels[train_idx]
        vl_paths, vl_labels = train_paths[val_idx], train_labels[val_idx]

        ds_train = create_dataset(tr_paths, tr_labels, shuffle=True, augment=True)
        ds_val = create_dataset(vl_paths, vl_labels)

        base_model = ModelClass(include_top=False, weights='imagenet', input_shape=(IMG_SIZE, IMG_SIZE, 3))
        base_model.trainable = False
        x = layers.GlobalAveragePooling2D()(base_model.output)
        x = layers.Dense(128, activation='relu')(x)
        x = layers.Dropout(0.3)(x)
        output = layers.Dense(3, activation='softmax')(x)
        model = Model(inputs=base_model.input, outputs=output)

        model.compile(optimizer=tf.keras.optimizers.Adam(0.0003),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        history = model.fit(ds_train, validation_data=ds_val, epochs=30,
                            class_weight=class_weights,
                            callbacks=[early_stopper, lr_reducer],
                            verbose=0)

        val_preds = np.argmax(model.predict(ds_val), axis=1)
        acc = accuracy_score(vl_labels, val_preds)
        fold_accuracies.append(acc)
        print(f"Fold {fold+1} Accuracy: {acc:.4f}")

        if acc > best_val_acc:
            best_model = model
            best_val_acc = acc

    avg_acc = np.mean(fold_accuracies)
    results[model_name] = {
        "fold_accuracies": fold_accuracies,
        "avg_accuracy": avg_acc,
        "best_model": best_model
    }
    print(f"Average CV Accuracy for {model_name}: {avg_acc:.4f}")

# === Step 7: Final Evaluation on Test Set ===
ds_test = create_dataset(test_paths, test_labels)
print("\n=== Final Evaluation on Hold-Out Test Set ===")

for model_name, data in results.items():
    print(f"\n--- {model_name} ---")
    model = data["best_model"]

    test_preds = np.argmax(model.predict(ds_test), axis=1)
    acc = accuracy_score(test_labels, test_preds)
    print(f"Test Accuracy: {acc:.4f}")
    print("Classification Report:")
    print(classification_report(test_labels, test_preds, target_names=class_mapping.keys()))
    print("Confusion Matrix:")
    print(confusion_matrix(test_labels, test_preds))


Total Images: 800

Train Set: 680 images
Test Set: 120 images

=== ResNet50: 3-Fold Cross-Validation ===
--- Fold 1/3 ---
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 2s/step
Fold 1 Accuracy: 0.4802
--- Fold 2/3 ---
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 2s/step
Fold 2 Accuracy: 0.6035
--- Fold 3/3 ---
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 2s/step
Fold 3 Accuracy: 0.6504
Average CV Accuracy for ResNet50: 0.5780

=== EfficientNetB3: 3-Fold Cross-Validation ===
--- Fold 1/3 ---
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 847ms/step
Fold 1 Accuracy: 0.4537
--- Fold 2/3 ---
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 966ms/step
Fold 2 Accuracy: 0.3965
--- Fold 3/3 ---
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 870ms/step
Fold 3 Accuracy: 0.5177
Average CV Accuracy for EfficientNetB3: 0.4560

=== InceptionV3: 3-Fold Cross-Validation ===
--- Fold 1/3 ---
[

In [2]:
# === Step 7: Final Evaluation on Test Set ===
ds_test = create_dataset(test_paths, test_labels)
print("\n=== Final Evaluation on Hold-Out Test Set ===")

for model_name, data in results.items():
    print(f"\n--- {model_name} ---")
    model = data["best_model"]

    test_preds = np.argmax(model.predict(ds_test), axis=1)
    acc = accuracy_score(test_labels, test_preds)
    print(f"Test Accuracy: {acc:.4f}")
    print("Classification Report:")
    print(classification_report(test_labels, test_preds, target_names=class_mapping.keys()))
    print("Confusion Matrix:")
    print(confusion_matrix(test_labels, test_preds))


=== Final Evaluation on Hold-Out Test Set ===

--- ResNet50 ---
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2s/step
Test Accuracy: 0.7083
Classification Report:
              precision    recall  f1-score   support

      normal       0.68      0.76      0.72        45
      benign       0.00      0.00      0.00        15
   malignant       0.73      0.85      0.78        60

    accuracy                           0.71       120
   macro avg       0.47      0.54      0.50       120
weighted avg       0.62      0.71      0.66       120

Confusion Matrix:
[[34  0 11]
 [ 7  0  8]
 [ 9  0 51]]

--- EfficientNetB3 ---


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 955ms/step
Test Accuracy: 0.5000
Classification Report:
              precision    recall  f1-score   support

      normal       0.00      0.00      0.00        45
      benign       0.00      0.00      0.00        15
   malignant       0.50      1.00      0.67        60

    accuracy                           0.50       120
   macro avg       0.17      0.33      0.22       120
weighted avg       0.25      0.50      0.33       120

Confusion Matrix:
[[ 0  0 45]
 [ 0  0 15]
 [ 0  0 60]]

--- InceptionV3 ---


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step
Test Accuracy: 0.8750
Classification Report:
              precision    recall  f1-score   support

      normal       0.79      1.00      0.88        45
      benign       0.75      0.20      0.32        15
   malignant       0.97      0.95      0.96        60

    accuracy                           0.88       120
   macro avg       0.84      0.72      0.72       120
weighted avg       0.87      0.88      0.85       120

Confusion Matrix:
[[45  0  0]
 [10  3  2]
 [ 2  1 57]]

--- DenseNet121 ---
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2s/step
Test Accuracy: 0.8167
Classification Report:
              precision    recall  f1-score   support

      normal       0.78      0.93      0.85        45
      benign       0.31      0.27      0.29        15
   malignant       0.98      0.87      0.92        60

    accuracy                           0.82       120
   macro avg       0.69      0.69      0