In [None]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
import pandas as pd
import torchvision.transforms as transforms
from PIL import Image

# Settings
image_size = 128
batch_size = 25
epochs = 100
learning_rate = 0.001
momentum = 0.9
weight_decay = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load image paths and labels
def load_data(data_dir, label_file):
    df = pd.read_csv(label_file)
    valid_exts = ['.pgm', '.tif', '.jpg', '.png']
    image_paths, labels = [], []

    for i, fname in enumerate(df['filename']):
        found = False
        for ext in valid_exts:
            full_path = os.path.join(data_dir, fname + ext)
            if os.path.isfile(full_path):
                image_paths.append(full_path)
                labels.append(df['label'][i])
                found = True
                break
        if not found:
            print(f" Could not find image for: {fname} (tried {valid_exts})")
    return image_paths, labels

# Custom dataset with augmentation
class BreastCancerDataset(Dataset):
    def __init__(self, image_paths, labels, augment=False):
        self.image_paths = image_paths
        self.labels = labels
        self.augment = augment

        self.aug_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(degrees=10),
            transforms.ColorJitter(contrast=0.1, brightness=0.1),
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            raise FileNotFoundError(f" Failed to load image at: {img_path}")
        img = cv2.equalizeHist(img)
        img = cv2.resize(img, (image_size, image_size))

        img = Image.fromarray(img)
        if self.augment:
            img = self.aug_transform(img)

        img = np.expand_dims(np.array(img), axis=0).astype(np.float32) / 255.0
        label = int(self.labels[idx])
        return torch.tensor(img), torch.tensor(label)

# CNN Model (Table 1)
class CustomBreastCancerCNN(nn.Module):
    def __init__(self):
        super(CustomBreastCancerCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=9, stride=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(16, 32, kernel_size=7, stride=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(32, 64, kernel_size=5, stride=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.pool3 = nn.MaxPool2d(2, 2)

        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1)
        self.bn4 = nn.BatchNorm2d(128)
        self.pool4 = nn.MaxPool2d(2, 2)

        self.fc = nn.Linear(128 * 4 * 4, 4098)
        self.output = nn.Linear(4098, 2)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = self.pool4(F.relu(self.bn4(self.conv4(x))))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc(x))
        return self.output(x)

# Training & Evaluation
def train_and_evaluate(image_paths, labels):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    fold_accuracies = []  # Store total combined accuracy per fold

    for fold, (train_idx, test_idx) in enumerate(skf.split(image_paths, labels)):
        print(f"\n--- Fold {fold+1} ---")

        # Reset counters for this fold
        total_correct_train = 0
        total_samples_train = 0
        total_correct_test = 0
        total_samples_test = 0

        train_paths = [image_paths[i] for i in train_idx]
        train_labels = [labels[i] for i in train_idx]
        test_paths = [image_paths[i] for i in test_idx]
        test_labels = [labels[i] for i in test_idx]

        train_dataset = BreastCancerDataset(train_paths, train_labels, augment=True)
        test_dataset = BreastCancerDataset(test_paths, test_labels, augment=False)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        model = CustomBreastCancerCNN().to(device)
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
        criterion = nn.CrossEntropyLoss()

        for epoch in range(epochs):
            model.train()
            running_loss, correct_train, total_train = 0.0, 0, 0

            for images, targets in train_loader:
                images, targets = images.to(device), targets.to(device)

                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

                _, preds = torch.max(outputs, 1)
                correct_train += (preds == targets).sum().item()
                total_train += targets.size(0)

            # Evaluate on test
            model.eval()
            correct_test, total_test = 0, 0
            with torch.no_grad():
                for images, targets in test_loader:
                    images, targets = images.to(device), targets.to(device)
                    outputs = model(images)
                    _, preds = torch.max(outputs, 1)
                    correct_test += (preds == targets).sum().item()
                    total_test += targets.size(0)

            train_acc = 100 * correct_train / total_train
            test_acc = 100 * correct_test / total_test
            print(f"Epoch {epoch+1}/{epochs} → Loss: {running_loss/len(train_loader):.4f}, "
                  f"Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%")

        # Final accuracy calculation for this fold
        total_correct_train += correct_train
        total_samples_train += total_train
        total_correct_test += correct_test
        total_samples_test += total_test

        total_correct_all = total_correct_train + total_correct_test
        total_samples_all = total_samples_train + total_samples_test
        total_accuracy = 100 * total_correct_all / total_samples_all
        fold_accuracies.append(total_accuracy)

        print(f"\n Total Combined Accuracy (Train + Test) for Fold {fold+1}: {total_accuracy:.2f}%")

    # After all folds
    avg_accuracy = sum(fold_accuracies) / len(fold_accuracies)
    print(f"\n Average Combined Accuracy over all 5 folds: {avg_accuracy:.2f}%")

# Run
image_paths, labels = load_data(
    data_dir='C:/Users/masud/Personallaptob12Jan2025/Freelancer/Parscoders/11_Breast Cancer/allmias',
    label_file='C:/Users/masud/Personallaptob12Jan2025/Freelancer/Parscoders/11_Breast Cancer/allmias/labels.csv'
)

train_and_evaluate(image_paths, labels)
