In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.utils import class_weight
from tensorflow.keras.applications.efficientnet import preprocess_input
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, precision_recall_curve, recall_score


In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# paths for originaldataset directories
data_dir = r'Dataset'  
tb_dir = os.path.join(data_dir, 'Tuberculosis')
normal_dir = os.path.join(data_dir, 'Normal')

# Get all file paths
tb_files = [os.path.join(tb_dir, f) for f in os.listdir(tb_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
normal_files = [os.path.join(normal_dir, f) for f in os.listdir(normal_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

# Create labels
tb_labels = [1] * len(tb_files)
normal_labels = [0] * len(normal_files)

# Combine
all_files = tb_files + normal_files
all_labels = tb_labels + normal_labels

# Split into train (70%), validation (15%), test (15%)
train_files, temp_files, train_labels, temp_labels = train_test_split(
    all_files, all_labels, test_size=0.3, random_state=42, stratify=all_labels
)

val_files, test_files, val_labels, test_labels = train_test_split(
    temp_files, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels
)

# Create folder structure
train_dir = os.path.join(data_dir, 'Train')
val_dir = os.path.join(data_dir, 'Validation')
test_dir = os.path.join(data_dir, 'Test')

for split_dir in [train_dir, val_dir, test_dir]:
    os.makedirs(os.path.join(split_dir, 'Tuberculosis'), exist_ok=True)
    os.makedirs(os.path.join(split_dir, 'Normal'), exist_ok=True)

# Copy files to respective folders
def copy_files(file_list, label_list, destination_dir):
    for file_path, label in zip(file_list, label_list):
        class_name = 'Tuberculosis' if label == 1 else 'Normal'
        dest_path = os.path.join(destination_dir, class_name, os.path.basename(file_path))
        shutil.copy2(file_path, dest_path)

copy_files(train_files, train_labels, train_dir)
copy_files(val_files, val_labels, val_dir)
copy_files(test_files, test_labels, test_dir)

print(f"✓ Created Train folder with {len(train_files)} images")
print(f"✓ Created Validation folder with {len(val_files)} images")
print(f"✓ Created Test folder with {len(test_files)} images")

✓ Created Train folder with 980 images
✓ Created Validation folder with 210 images
✓ Created Test folder with 210 images


In [None]:
# Data set for model
train_dir = r'Dataset/Train'
val_dir = r'Dataset/Validation'
test_dir = r'Dataset/Test'

In [123]:
# Image settings
image_size = (224, 224)  
batch_size = 32

In [124]:
# Data generators
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input, 
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    zoom_range=0.3,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest'
)
val_test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input  
)

In [125]:
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)

Found 980 images belonging to 2 classes.
Found 210 images belonging to 2 classes.
Found 210 images belonging to 2 classes.


In [126]:
# Compute class weights 
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights = dict(enumerate(class_weights))

In [127]:
# Build model: EfficientNetB0
base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze base

In [128]:
# After initial fit and before fine-tuning:
base_model.trainable = True
# Freeze only the first N layers (experiment N≈100)
for layer in base_model.layers[:100]:
    layer.trainable = False

In [129]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

In [130]:
model = Model(inputs=base_model.input, outputs=output)


In [131]:
model.compile(optimizer=Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])


In [136]:
# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-8)

In [137]:
# Training
steps_per_epoch = max(1, len(train_generator))
validation_steps = max(1, len(val_generator))

history = model.fit(
    train_generator,
    validation_data=val_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    epochs=50,
    callbacks=[early_stopping, reduce_lr],
    class_weight=class_weights
)

Epoch 1/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 4s/step - accuracy: 0.7786 - loss: 0.4888 - val_accuracy: 0.7286 - val_loss: 0.5048 - learning_rate: 1.0000e-04
Epoch 2/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 3s/step - accuracy: 0.9316 - loss: 0.2277 - val_accuracy: 0.8476 - val_loss: 0.3025 - learning_rate: 1.0000e-04
Epoch 3/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 3s/step - accuracy: 0.9643 - loss: 0.1202 - val_accuracy: 0.9095 - val_loss: 0.2027 - learning_rate: 1.0000e-04
Epoch 4/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 3s/step - accuracy: 0.9673 - loss: 0.0943 - val_accuracy: 0.9524 - val_loss: 0.1299 - learning_rate: 1.0000e-04
Epoch 5/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 3s/step - accuracy: 0.9806 - loss: 0.0669 - val_accuracy: 0.9238 - val_loss: 0.2245 - learning_rate: 1.0000e-04
Epoch 6/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [138]:
# Fine-tuning 
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False
    
model.compile(optimizer=Adam(1e-5), loss='binary_crossentropy', metrics=['accuracy'])

In [139]:
fine_tune_history = model.fit(
    train_generator,
    validation_data=val_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    epochs=30,
    callbacks=[early_stopping, reduce_lr],
    class_weight=class_weights
)

Epoch 1/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 651ms/step - accuracy: 0.9939 - loss: 0.0209 - val_accuracy: 0.9810 - val_loss: 0.0484 - learning_rate: 1.0000e-05
Epoch 2/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 578ms/step - accuracy: 0.9959 - loss: 0.0189 - val_accuracy: 0.9810 - val_loss: 0.0486 - learning_rate: 1.0000e-05
Epoch 3/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 614ms/step - accuracy: 0.9949 - loss: 0.0195 - val_accuracy: 0.9810 - val_loss: 0.0483 - learning_rate: 1.0000e-05
Epoch 4/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 618ms/step - accuracy: 0.9939 - loss: 0.0173 - val_accuracy: 0.9810 - val_loss: 0.0497 - learning_rate: 1.0000e-05
Epoch 5/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 619ms/step - accuracy: 0.9959 - loss: 0.0184 - val_accuracy: 0.9762 - val_loss: 0.0519 - learning_rate: 1.0000e-05
Epoch 6/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━

In [140]:
# Save
model.save("new_efficientnet_model_fixed.h5")



In [141]:
# Evaluate
test_loss, test_acc = model.evaluate(test_generator, steps=len(test_generator))
print(f"Test Accuracy: {test_acc:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 698ms/step - accuracy: 0.9714 - loss: 0.0721
Test Accuracy: 0.9714


In [None]:
# Predictions
y_true = test_generator.classes
y_pred_prob = model.predict(test_generator)
y_pred = (y_pred_prob > 0.5).astype("int32")

In [None]:
def tta_predict(model, img, tta_steps=5):
    preds = []
    for _ in range(tta_steps):
        aug = train_datagen.random_transform(img)
        aug = np.expand_dims(aug, 0)
        preds.append(model.predict(aug)[0][0])
    return np.mean(preds)

# Run TTA predictions
tta_preds = []
for i in range(len(test_generator)):
    batch_x, _ = test_generator[i]
    for img in batch_x:
        tta_preds.append(tta_predict(model, img))

y_pred_prob = np.array(tta_preds)
y_pred_classes = (y_pred_prob > 0.5).astype("int32")

In [None]:
def focal_loss(alpha=0.25, gamma=2.0):
    def loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        bce = tf.keras.backend.binary_crossentropy(y_true, y_pred)
        p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
        alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
        mod_factor = tf.keras.backend.pow((1 - p_t), gamma)
        return tf.keras.backend.mean(alpha_factor * mod_factor * bce)
    return loss
model.compile(
    optimizer=Adam(1e-5),          # lower LR for fine-tuning
    loss=focal_loss(alpha=0.25, gamma=2.0),
    metrics=['accuracy']
)
print("\nClassification Report:\n", classification_report(y_true, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
print("AUC Score:", roc_auc_score(y_true, y_pred_prob))

In [None]:
cm = confusion_matrix(y_true, y_pred_classes)
class_names = ['Benign', 'Malignant']  
# Plot heatmap
plt.figure(figsize=(6, 5))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix Heatmap')
plt.colorbar()

tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)

# Annotate each cell
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, format(cm[i, j], 'd'),
                 ha='center', va='center',
                 color='white' if cm[i, j] > thresh else 'black')

plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()

In [None]:
best_thr, best_rec = 0.5, 0
for t in np.arange(0.2, 0.8, 0.05):
    preds = (y_pred_prob > t).astype(int)
    rec = recall_score(test_generator.classes, preds)
    if rec > best_rec:
        best_rec, best_thr = rec, t
print(f"\nBest recall {best_rec:.3f} at threshold {best_thr:.2f}")

In [None]:
print(train_generator.class_indices)


In [None]:
# ROC Curve
fpr, tpr, _ = roc_curve(y_true, y_pred_prob)
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, label=f"AUC = {roc_auc_score(y_true, y_pred_prob):.2f}")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()

In [None]:
# Optional: Threshold tuning
print("\nRecall at different thresholds:")
for t in np.arange(0.3, 0.7, 0.05):
    preds = (y_pred_prob > t).astype("int32")
    recall = recall_score(y_true, preds)
    print(f"Threshold={t:.2f} => Recall: {recall:.3f}")

In [None]:
import matplotlib.pyplot as plt

# Combine accuracy and loss values
acc = history.history['accuracy'] + fine_tune_history.history['accuracy']
val_acc = history.history['val_accuracy'] + fine_tune_history.history['val_accuracy']
loss = history.history['loss'] + fine_tune_history.history['loss']
val_loss = history.history['val_loss'] + fine_tune_history.history['val_loss']
# Plot Accuracy
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(acc, label='Train Accuracy')
plt.plot(val_acc, label='Val Accuracy')
plt.axvline(x=len(history.history['accuracy']) - 1, color='gray', linestyle='--', label='Fine-tuning Start')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
# Plot Loss
plt.subplot(1, 2, 2)
plt.plot(loss, label='Train Loss')
plt.plot(val_loss, label='Val Loss')
plt.axvline(x=len(history.history['loss']) - 1, color='gray', linestyle='--', label='Fine-tuning Start')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()