In [3]:
import os
import re
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report

# Set image size and batch size
IMG_SIZE = (150, 150)  # Reduced for faster training
BATCH_SIZE = 32
EPOCHS = 10

# Directories
train_base_dir = r"D:\01 STUDY MATERIAL\ai project\mkfold\fold1\train"
test_base_dir = r"D:\01 STUDY MATERIAL\ai project\mkfold\fold1\test"

# Function to extract label (B/M) from filename
def extract_label_from_filename(filename):
    match = re.match(r"SOB_([MB])_.*-(\d+)-(\d+)-(\d+)-(\d+).png", filename)
    if match:
        return 1 if match.group(1) == "M" else 0  # 1 = Malignant, 0 = Benign
    return None

# Load dataset based on magnification
def load_data_for_magnification(mag_factor, dataset_dir):
    image_paths, labels = [], []
    mag_folder = os.path.join(dataset_dir, f"{mag_factor}X")

    for file in os.listdir(mag_folder):
        if file.endswith(".png"):
            full_path = os.path.join(mag_folder, file)
            label = extract_label_from_filename(file)
            if label is not None:
                image_paths.append(full_path)
                labels.append(label)

    return np.array(image_paths), np.array(labels)

# Organize data per magnification
datasets = {}
for mag in [40, 100, 200, 400]:
    train_paths, train_labels = load_data_for_magnification(mag, train_base_dir)
    test_paths, test_labels = load_data_for_magnification(mag, test_base_dir)

    # Compute Class Weights (to handle imbalance)
    class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
    class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}
    print(f"Class Weights for {mag}X: {class_weight_dict}")

    datasets[mag] = {
        "train_paths": train_paths, "train_labels": train_labels,
        "test_paths": test_paths, "test_labels": test_labels,
        "class_weights": class_weight_dict
    }

Class Weights for 40X: {0: 4.171171171171171, 1: 0.5680981595092025}
Class Weights for 100X: {0: 4.226495726495727, 1: 0.5670871559633027}
Class Weights for 200X: {0: 3.6654135338345863, 1: 0.578978622327791}
Class Weights for 400X: {0: 3.560483870967742, 1: 0.5816864295125165}


In [4]:
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Custom Data Generator
class ImageDataLoader(Sequence):
    def __init__(self, image_paths, labels, batch_size, augment=False):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.augment = augment
        self.datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=30 if augment else 0,
            width_shift_range=0.2 if augment else 0,
            height_shift_range=0.2 if augment else 0,
            shear_range=0.2 if augment else 0,
            zoom_range=0.2 if augment else 0,
            horizontal_flip=True if augment else False
        )

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, idx):
        batch_paths = self.image_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_labels = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        batch_images = np.array([img_to_array(load_img(path, target_size=IMG_SIZE)) for path in batch_paths]) / 255.0
        return batch_images, np.array(batch_labels)

# Create train and test loaders
def create_generators(mag):
    train_loader = ImageDataLoader(datasets[mag]["train_paths"], datasets[mag]["train_labels"], BATCH_SIZE, augment=True)
    test_loader = ImageDataLoader(datasets[mag]["test_paths"], datasets[mag]["test_labels"], BATCH_SIZE, augment=False)
    return train_loader, test_loader

In [6]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

results = {}

# Enable Mixed Precision for Faster Training
tf.keras.mixed_precision.set_global_policy("mixed_float16")

for mag in [40, 100, 200, 400]:
    print(f"\nTraining & Evaluating for {mag}X magnification...\n")
    
    train_loader, test_loader = create_generators(mag)
    class_weights = datasets[mag]["class_weights"]

    # Load Pretrained Model (ResNet50)
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
    base_model.trainable = False  # Freeze layers

    # Define Model
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(1, activation='sigmoid', dtype='float32')(x)  # Keep output in float32 for stability

    model = Model(inputs=base_model.input, outputs=x)

    # Compile Model
    model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Train Model
    history = model.fit(train_loader, epochs=EPOCHS, validation_data=test_loader, class_weight=class_weights)

    # Evaluate Model
    y_true = test_loader.labels
    y_pred = model.predict(test_loader)
    y_pred = (y_pred > 0.5).astype(int)

    # Compute Performance Metrics
    report = classification_report(y_true, y_pred, target_names=["Benign", "Malignant"], output_dict=True)
    results[mag] = {
        "accuracy": report["accuracy"],
        "precision": report["Malignant"]["precision"],
        "recall": report["Malignant"]["recall"],
        "f1-score": report["Malignant"]["f1-score"]
    }

    # Save Model
    model.save(f"resnet_model_{mag}X.h5")

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 4060 Laptop GPU, compute capability 8.9

Training & Evaluating for 40X magnification...

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Training & Evaluating for 100X magnification...

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Training & Evaluating for 200X magnification...

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Training & Evaluating for 400X magnification...

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
import pandas as pd

# Convert results to DataFrame
df_results = pd.DataFrame.from_dict(results, orient='index')
print("\nEvaluation Results for Each Magnification:\n")
print(df_results)

# Find the best magnification based on highest accuracy
best_mag = df_results["accuracy"].idxmax()
print(f"\nBest magnification for classification: {best_mag}X")


Evaluation Results for Each Magnification:

     accuracy  precision    recall  f1-score
40   0.837524   0.850394  0.981818  0.911392
100  0.156425   0.000000  0.000000  0.000000
200  0.160985   0.000000  0.000000  0.000000
400  0.851259   0.851259  1.000000  0.919654

Best magnification for classification: 400X
