D:\BreastCancer\preprocessed_BUSI_data

In [2]:
# Import necessary libraries
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50  # Replace VGG16 with ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split


ultrasound_dir = r'D:\BreastCancer\preprocessed_BUSI_data'

# Image size for ResNet50
IMG_HEIGHT, IMG_WIDTH = 224, 224  # ResNet50 expects 224x224 images

# Helper function to load images from a directory
def load_images_from_folder(folder):
    dataset = []
    labels = []
    for label_value, label_name in enumerate(['no', 'yes']):
        label_folder = os.path.join(folder, label_name)
        for image_name in os.listdir(label_folder):
            if image_name.endswith('.png') or image_name.endswith('.jpg'):
                image_path = os.path.join(label_folder, image_name)
                image = tf.keras.preprocessing.image.load_img(image_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
                image_array = tf.keras.preprocessing.image.img_to_array(image)
                dataset.append(image_array)
                labels.append(label_value)
    return np.array(dataset), np.array(labels)


In [4]:

ultrasound_data, ultrasound_labels = load_images_from_folder(ultrasound_dir)
# Split dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(ultrasound_data, ultrasound_labels, test_size=0.2, random_state=0)

# Normalize images
x_train = x_train.astype('float32') / 255.0

x_test = x_test.astype('float32') / 255.0

# One-hot encode labels
y_train = tf.keras.utils.to_categorical(y_train, num_classes=2)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=2)
# Data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load pre-trained SqueezeNet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
squeezenet = models.squeezenet1_1(pretrained=True)

# Freeze all layers (Optional: If doing Transfer Learning)
for param in squeezenet.features.parameters():
    param.requires_grad = False

# Modify the last classifier layer for 2 classes (No Cancer, Cancer)
squeezenet.classifier[1] = nn.Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))
squeezenet.num_classes = 2
squeezenet = squeezenet.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(squeezenet.parameters(), lr=0.0001)



In [8]:
# Custom Dataset Class (if using Image Dataset)
class CancerDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label
# Define image transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


In [10]:
train_dataset = CancerDataset(x_train, y_train, transform=transform)
test_dataset = CancerDataset(x_test, y_test, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
num_epochs = 20
train_losses, val_losses = [], []
train_acc, val_acc = [], []

for epoch in range(num_epochs):
    squeezenet.train()
    running_loss, correct, total = 0.0, 0, 0

    # Training Phase
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = squeezenet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Compute Training Accuracy
        _, predicted = torch.max(outputs, 1)
        _, true_labels = torch.max(labels, 1)  # Convert one-hot to class index
        correct += (predicted == true_labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(train_loader)
    train_accuracy = correct / total
    train_losses.append(train_loss)
    train_acc.append(train_accuracy)

    # Validation Phase
    squeezenet.eval()
    val_running_loss, val_correct, val_total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = squeezenet(images)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()

            # Compute Validation Accuracy
            _, predicted = torch.max(outputs, 1)
            _, true_labels = torch.max(labels, 1)
            val_correct += (predicted == true_labels).sum().item()
            val_total += labels.size(0)

    val_loss = val_running_loss / len(test_loader)
    val_accuracy = val_correct / val_total
    val_losses.append(val_loss)
    val_acc.append(val_accuracy)

    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f} | "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
print("Training Done")

Epoch 1/20 | Train Loss: 0.5899, Train Acc: 0.6759 | Val Loss: 0.5403, Val Acc: 0.7110
Epoch 2/20 | Train Loss: 0.4855, Train Acc: 0.7760 | Val Loss: 0.4537, Val Acc: 0.7871
Epoch 3/20 | Train Loss: 0.4242, Train Acc: 0.8055 | Val Loss: 0.4010, Val Acc: 0.8213
Epoch 4/20 | Train Loss: 0.3863, Train Acc: 0.8275 | Val Loss: 0.3689, Val Acc: 0.8289
Epoch 5/20 | Train Loss: 0.3531, Train Acc: 0.8465 | Val Loss: 0.3471, Val Acc: 0.8441
Epoch 6/20 | Train Loss: 0.3333, Train Acc: 0.8551 | Val Loss: 0.3298, Val Acc: 0.8631
Epoch 7/20 | Train Loss: 0.3198, Train Acc: 0.8732 | Val Loss: 0.3170, Val Acc: 0.8631
Epoch 8/20 | Train Loss: 0.3007, Train Acc: 0.8856 | Val Loss: 0.3085, Val Acc: 0.8631
Epoch 9/20 | Train Loss: 0.2948, Train Acc: 0.8732 | Val Loss: 0.2988, Val Acc: 0.8745
Epoch 10/20 | Train Loss: 0.2897, Train Acc: 0.8866 | Val Loss: 0.2921, Val Acc: 0.8707
Epoch 11/20 | Train Loss: 0.2796, Train Acc: 0.8875 | Val Loss: 0.2858, Val Acc: 0.8783
Epoch 12/20 | Train Loss: 0.2677, Train A

In [11]:
import random
from copy import deepcopy

class DingoOptimizer:
    def __init__(self, model_class, param_space, population_size=5, iterations=10, device='cpu'):
        self.model_class = model_class
        self.param_space = param_space
        self.population_size = population_size
        self.iterations = iterations
        self.device = device
        self.best_params = None
        self.best_model = None

    def sample_params(self):
        return {k: random.choice(v) for k, v in self.param_space.items()}

    def optimize(self, train_loader, val_loader):
        best_loss = float('inf')

        for i in range(self.iterations):
            print(f"\n--- Iteration {i+1}/{self.iterations} ---")
            candidates = [self.sample_params() for _ in range(self.population_size)]

            for params in candidates:
                model = self.model_class().to(self.device)
                model.classifier[1] = nn.Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))
                model.num_classes = 2

                optimizer = optim.Adam(model.parameters(), lr=params['lr'])
                criterion = nn.CrossEntropyLoss()

                val_loss = self.train_and_evaluate(model, optimizer, criterion, train_loader, val_loader, params['epochs'])

                print(f"Params: {params}, Val Loss: {val_loss:.4f}")

                if val_loss < best_loss:
                    best_loss = val_loss
                    self.best_params = params
                    self.best_model = deepcopy(model)

        return self.best_model, self.best_params

    def train_and_evaluate(self, model, optimizer, criterion, train_loader, val_loader, epochs):
        for epoch in range(epochs):
            model.train()
            for images, labels in train_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, torch.argmax(labels, dim=1))
                loss.backward()
                optimizer.step()

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = model(images)
                loss = criterion(outputs, torch.argmax(labels, dim=1))
                val_loss += loss.item()
        return val_loss / len(val_loader)

In [12]:
param_space = {
    'lr': [0.01, 0.001, 0.0001],
    'epochs': [5, 10, 15]
}


In [13]:
# Function to return a new SqueezeNet model
def build_squeezenet():
    model = models.squeezenet1_1(pretrained=True)
    for param in model.features.parameters():
        param.requires_grad = False
    return model

# Instantiate and run Dingo
dingo = DingoOptimizer(
    model_class=build_squeezenet,
    param_space=param_space,
    population_size=4,
    iterations=3,
    device=device
)

best_model, best_params = dingo.optimize(train_loader, test_loader)
print(f"\nBest Hyperparameters: {best_params}")



--- Iteration 1/3 ---
Params: {'lr': 0.0001, 'epochs': 10}, Val Loss: 0.3104
Params: {'lr': 0.001, 'epochs': 15}, Val Loss: 0.2248
Params: {'lr': 0.0001, 'epochs': 5}, Val Loss: 0.3760
Params: {'lr': 0.01, 'epochs': 15}, Val Loss: 0.2193

--- Iteration 2/3 ---
Params: {'lr': 0.001, 'epochs': 15}, Val Loss: 0.2432
Params: {'lr': 0.001, 'epochs': 15}, Val Loss: 0.2222
Params: {'lr': 0.01, 'epochs': 10}, Val Loss: 0.2117
Params: {'lr': 0.0001, 'epochs': 10}, Val Loss: 0.3040

--- Iteration 3/3 ---
Params: {'lr': 0.0001, 'epochs': 10}, Val Loss: 0.2881
Params: {'lr': 0.0001, 'epochs': 10}, Val Loss: 0.2970
Params: {'lr': 0.0001, 'epochs': 15}, Val Loss: 0.2537
Params: {'lr': 0.01, 'epochs': 15}, Val Loss: 0.2125

Best Hyperparameters: {'lr': 0.01, 'epochs': 10}


In [14]:
def build_final_model():
    model = models.squeezenet1_1(pretrained=True)
    for param in model.features.parameters():
        param.requires_grad = False
    model.classifier[1] = nn.Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))
    model.num_classes = 2
    return model.to(device)

# Use best parameters
lr = best_params['lr']
epochs = best_params['epochs']


In [15]:
model = build_final_model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)


In [16]:
train_losses, val_losses = [], []
train_acc, val_acc = [], []

for epoch in range(epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, torch.argmax(labels, dim=1))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        _, true_labels = torch.max(labels, 1)
        correct += (predicted == true_labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(train_loader)
    train_accuracy = correct / total
    train_losses.append(train_loss)
    train_acc.append(train_accuracy)

    # Validation
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, torch.argmax(labels, dim=1))
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            _, true_labels = torch.max(labels, 1)
            val_correct += (predicted == true_labels).sum().item()
            val_total += labels.size(0)

    val_loss /= len(test_loader)
    val_accuracy = val_correct / val_total
    val_losses.append(val_loss)
    val_acc.append(val_accuracy)

    print(f"Epoch {epoch+1}/{epochs} | "
          f"Train Loss: {train_loss:.4f}, Acc: {train_accuracy:.4f} | "
          f"Val Loss: {val_loss:.4f}, Acc: {val_accuracy:.4f}")


Epoch 1/10 | Train Loss: 0.4667, Acc: 0.6702 | Val Loss: 0.4258, Acc: 0.6654
Epoch 2/10 | Train Loss: 0.3946, Acc: 0.6826 | Val Loss: 0.3971, Acc: 0.6654
Epoch 3/10 | Train Loss: 0.3776, Acc: 0.6835 | Val Loss: 0.3886, Acc: 0.6654
Epoch 4/10 | Train Loss: 0.3777, Acc: 0.6826 | Val Loss: 0.4106, Acc: 0.6654
Epoch 5/10 | Train Loss: 0.3658, Acc: 0.6864 | Val Loss: 0.3927, Acc: 0.6654
Epoch 6/10 | Train Loss: 0.3629, Acc: 0.6845 | Val Loss: 0.3822, Acc: 0.6654
Epoch 7/10 | Train Loss: 0.3619, Acc: 0.6873 | Val Loss: 0.3681, Acc: 0.6654
Epoch 8/10 | Train Loss: 0.3298, Acc: 0.7550 | Val Loss: 0.2759, Acc: 0.8175
Epoch 9/10 | Train Loss: 0.2417, Acc: 0.8684 | Val Loss: 0.2565, Acc: 0.8859
Epoch 10/10 | Train Loss: 0.2263, Acc: 0.9056 | Val Loss: 0.2371, Acc: 0.9011


In [17]:
from sklearn.metrics import classification_report, confusion_matrix

model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        _, targets = torch.max(labels, 1)
        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

print("Classification Report:\n", classification_report(all_targets, all_preds))
print("Confusion Matrix:\n", confusion_matrix(all_targets, all_preds))


Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.98      0.93       175
           1       0.96      0.74      0.83        88

    accuracy                           0.90       263
   macro avg       0.92      0.86      0.88       263
weighted avg       0.91      0.90      0.90       263

Confusion Matrix:
 [[172   3]
 [ 23  65]]


In [18]:
def train_and_evaluate_model(model, train_loader, test_loader, epochs=10, lr=0.0001):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, torch.argmax(labels, dim=1))
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            _, true_labels = torch.max(labels, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(true_labels.cpu().numpy())

    return {
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds),
        'recall': recall_score(all_labels, all_preds),
        'f1': f1_score(all_labels, all_preds),
        'mcc': matthews_corrcoef(all_labels, all_preds)
    }


In [19]:
# Define all models
def get_models():
    return {
        "SqueezeNet": models.squeezenet1_1(pretrained=True),
        "ResNet50": models.resnet50(pretrained=True),
        "VGG16": models.vgg16(pretrained=True),
        "MobileNetV2": models.mobilenet_v2(pretrained=True),
        "DenseNet121": models.densenet121(pretrained=True)
    }


In [20]:
def train_and_evaluate_model(model, train_loader, test_loader, epochs=10, lr=0.0001):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_acc_list, val_acc_list = [], []
    train_loss_list, val_loss_list = [], []

    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, torch.argmax(labels, dim=1))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            _, targets = torch.max(labels, 1)
            correct += (predicted == targets).sum().item()
            total += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct / total
        train_loss_list.append(train_loss)
        train_acc_list.append(train_accuracy)

        # Validation phase
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, torch.argmax(labels, dim=1))
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                _, targets = torch.max(labels, 1)
                val_correct += (predicted == targets).sum().item()
                val_total += labels.size(0)

        val_loss /= len(test_loader)
        val_accuracy = val_correct / val_total
        val_loss_list.append(val_loss)
        val_acc_list.append(val_accuracy)

        print(f"Epoch {epoch+1}/{epochs} | "
              f"Train Loss: {train_loss:.4f}, Acc: {train_accuracy:.4f} | "
              f"Val Loss: {val_loss:.4f}, Acc: {val_accuracy:.4f}")

    # Final evaluation metrics
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            _, targets = torch.max(labels, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())

    return {
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds),
        'recall': recall_score(all_labels, all_preds),
        'f1': f1_score(all_labels, all_preds),
        'mcc': matthews_corrcoef(all_labels, all_preds),
        'train_acc': train_acc_list,
        'val_acc': val_acc_list,
        'train_loss': train_loss_list,
        'val_loss': val_loss_list
    }


In [21]:
def modify_model_for_binary(model, model_name):
    if model_name == "SqueezeNet":
        # Replacing classifier with 2-class head
        model.classifier[1] = nn.Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))
        model.num_classes = 2

    elif model_name == "ResNet50":
        # ResNet uses `fc` as final classifier
        in_features = model.fc.in_features
        model.fc = nn.Linear(in_features, 2)

    elif model_name == "DenseNet121":
        # DenseNet uses `classifier`
        in_features = model.classifier.in_features
        model.classifier = nn.Linear(in_features, 2)

    elif model_name == "VGG16":
        # VGG classifier is a sequential with [0-6]
        in_features = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(in_features, 2)

    elif model_name == "MobileNetV2":
        # MobileNetV2 classifier is sequential with 2 layers
        in_features = model.classifier[1].in_features
        model.classifier[1] = nn.Linear(in_features, 2)

    else:
        raise ValueError(f"Model {model_name} not supported")

    return model


In [None]:
results = {}
models_dict = get_models()

for name, model in models_dict.items():
    print(f"\nTraining {name}...")
    model = modify_model_for_binary(model, name)
    metrics = train_and_evaluate_model(model, train_loader, test_loader, epochs=10, lr=0.0001)
    results[name] = metrics





Training SqueezeNet...
Epoch 1/10 | Train Loss: 0.4601, Acc: 0.7712 | Val Loss: 0.3427, Acc: 0.8441
Epoch 2/10 | Train Loss: 0.2790, Acc: 0.8742 | Val Loss: 0.2587, Acc: 0.8935
Epoch 3/10 | Train Loss: 0.2323, Acc: 0.9037 | Val Loss: 0.2409, Acc: 0.8783
Epoch 4/10 | Train Loss: 0.1786, Acc: 0.9199 | Val Loss: 0.2378, Acc: 0.9125
Epoch 5/10 | Train Loss: 0.2216, Acc: 0.8951 | Val Loss: 0.2468, Acc: 0.8935
Epoch 6/10 | Train Loss: 0.1488, Acc: 0.9418 | Val Loss: 0.2324, Acc: 0.9278
Epoch 7/10 | Train Loss: 0.1165, Acc: 0.9523 | Val Loss: 0.1715, Acc: 0.9316
Epoch 8/10 | Train Loss: 0.1423, Acc: 0.9390 | Val Loss: 0.2281, Acc: 0.9011
Epoch 9/10 | Train Loss: 0.1172, Acc: 0.9523 | Val Loss: 0.2020, Acc: 0.9163
Epoch 10/10 | Train Loss: 0.0865, Acc: 0.9647 | Val Loss: 0.1642, Acc: 0.9430

Training ResNet50...
Epoch 1/10 | Train Loss: 0.3557, Acc: 0.8246 | Val Loss: 0.3384, Acc: 0.8175
Epoch 2/10 | Train Loss: 0.1794, Acc: 0.9361 | Val Loss: 0.2742, Acc: 0.8783
Epoch 3/10 | Train Loss: 0.13

In [None]:
import matplotlib.pyplot as plt

def plot_metrics(results, metric_name):
    plt.figure(figsize=(10, 6))
    for model_name, metrics in results.items():
        plt.plot(metrics[metric_name], label=model_name)
    plt.xlabel('Epoch')
    plt.ylabel(metric_name.replace('_', ' ').title())
    plt.title(f"{metric_name.replace('_', ' ').title()} per Epoch")
    plt.legend()
    plt.grid(True)
    plt.show()

plot_metrics(results, 'train_acc')
plot_metrics(results, 'val_acc')
plot_metrics(results, 'train_loss')
plot_metrics(results, 'val_loss')
