In [101]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- 1. N/C Ratio Feature Extraction ---
def extract_nc_ratio(image):
    img_gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
    _, nucleus_mask = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    cytoplasm_mask = cv2.bitwise_not(nucleus_mask)

    nucleus_area = np.sum(nucleus_mask > 0)
    cytoplasm_area = np.sum(cytoplasm_mask > 0)
    
    nc_ratio = nucleus_area / (cytoplasm_area + 1e-5)
    return np.array([nc_ratio], dtype=np.float32)

# --- 2. Custom Dataset ---
class CervicalDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        nc_ratio = extract_nc_ratio(img)

        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(nc_ratio), label

# --- 3. Preprocessing & Load Data ---
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

def load_image_paths_labels(data_dir):
    paths = []
    labels = []
    class_names = sorted(os.listdir(data_dir))
    class_to_idx = {cls_name: idx for idx, cls_name in enumerate(class_names)}

    for label in class_names:
        label_path = os.path.join(data_dir, label)
        if os.path.isdir(label_path):
            for img in os.listdir(label_path):
                paths.append(os.path.join(label_path, img))
                labels.append(class_to_idx[label])

    return paths, labels


In [103]:
# Update this to your dataset path
data_dir = r'C:\Users\CVR\Desktop\DataSet\Herlev Dataset\augmented_train'
image_paths, labels = load_image_paths_labels(data_dir)

X_train, X_val, y_train, y_val = train_test_split(image_paths, labels, test_size=0.2, stratify=labels)

train_dataset = CervicalDataset(X_train, y_train, transform)
val_dataset = CervicalDataset(X_val, y_val, transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [115]:
class SqueezeNetWithNC(nn.Module):
    def __init__(self, num_classes):
        super(SqueezeNetWithNC, self).__init__()
        self.squeezenet = models.squeezenet1_1(weights=models.SqueezeNet1_1_Weights.IMAGENET1K_V1)
        
        # Replace final conv layer to match num_classes
        self.squeezenet.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=1)
        self.squeezenet.num_classes = num_classes
        
        # N/C feature processing (optional, if using handcrafted features)
        self.nc_fc = nn.Linear(1, 32)
        self.final_fc = nn.Linear(num_classes + 32, num_classes)

    def forward(self, x, nc_ratio):
        x = self.squeezenet.features(x)
        x = nn.functional.relu(x)
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = torch.flatten(x, 1)
        
        nc_feat = nn.functional.relu(self.nc_fc(nc_ratio.view(-1, 1)))
        
        x = torch.cat((x, nc_feat), dim=1)
        x = self.final_fc(x)
        return x


In [117]:
# --- 5. Custom Gazelle Optimizer ---
class GazelleOptimizer(torch.optim.Optimizer):
    def __init__(self, params, lr=0.01, alpha=0.1, beta=0.5):
        defaults = dict(lr=lr, alpha=alpha, beta=beta)
        super(GazelleOptimizer, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = closure() if closure else None
        for group in self.param_groups:
            lr = group['lr']
            alpha = group['alpha']
            beta = group['beta']

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data

                noise = torch.randn_like(grad) * beta
                motion = torch.sign(grad + noise)
                update = -lr * motion + alpha * torch.randn_like(p.data)

                p.data.add_(update)
        return loss

In [118]:
# --- 6. Training & Evaluation ---
def train_model(model, train_loader, val_loader, num_epochs=10):
    model = model.to(device)
    optimizer = GazelleOptimizer(model.parameters(), lr=0.01)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0, 0, 0
        for images, nc_features, labels in train_loader:
            images, labels, nc_features = images.to(device), labels.to(device), nc_features.to(device)

            def closure():
                optimizer.zero_grad()
                outputs = model(images, nc_features)
                loss = criterion(outputs, labels)
                loss.backward()
                return loss

            loss = optimizer.step(closure)
            running_loss += loss.item()
            preds = model(images, nc_features).argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        acc = correct / total
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {running_loss:.4f} | Train Acc: {acc:.4f}")

        # Validation
        model.eval()
        with torch.no_grad():
            correct_val, total_val = 0, 0
            for images, nc_features, labels in val_loader:
                images, labels, nc_features = images.to(device), labels.to(device), nc_features.to(device)
                outputs = model(images, nc_features)
                preds = outputs.argmax(dim=1)
                correct_val += (preds == labels).sum().item()
                total_val += labels.size(0)
            val_acc = correct_val / total_val
            print(f"Validation Accuracy: {val_acc:.4f}\n")


In [119]:
class_names = sorted(os.listdir(data_dir))
num_classes = len(class_names)


In [125]:
model = SqueezeNetWithNC(num_classes=num_classes)

train_model(model, train_loader, val_loader, num_epochs=10)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x544 and 39x7)

In [87]:
# ------------------------- Model -------------------------
num_classes=7
class CombinedModel(nn.Module):
    def __init__(self):
        super(CombinedModel, self).__init__()
        self.base = models.squeezenet1_1(pretrained=True)
        self.base.classifier = nn.Identity()
        self.gap = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Linear(512 + 1, 128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )

    def forward(self, x, nc):
        x = self.base.features(x)
        x = self.gap(x).view(x.size(0), -1)
        x = torch.cat((x, nc.unsqueeze(1)), dim=1)
        out = self.fc(x)
        return out


In [89]:
# Dummy Gazelle Optimizer (Replace with your real optimizer)
class GazelleOptimizer(torch.optim.Optimizer):
    def __init__(self, params, lr=0.001):
        defaults = dict(lr=lr)
        super(GazelleOptimizer, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = closure() if closure is not None else None
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is not None:
                    p.data -= group['lr'] * p.grad.data
        return loss

In [91]:
# Training and Validation

def train_validate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    train_accs, val_accs, train_losses, val_losses = [], [], [], []
    for epoch in range(num_epochs):
        model.train()
        train_loss, train_correct = 0.0, 0
        for imgs, nc_ratios, labels in train_loader:
            imgs, nc_ratios, labels = imgs.to(device), nc_ratios.to(device), labels.to(device)

            def closure():
                optimizer.zero_grad()
                outputs = model(imgs, nc_ratios)
                loss = criterion(outputs, labels)
                loss.backward()
                return loss

            loss = optimizer.step(closure)

            with torch.no_grad():
                preds = model(imgs, nc_ratios).argmax(dim=1)
                train_correct += (preds == labels).sum().item()
                train_loss += loss.item()

        train_acc = train_correct / len(train_loader.dataset)

        model.eval()
        val_loss, val_correct = 0.0, 0
        with torch.no_grad():
            for imgs, nc_ratios, labels in val_loader:
                imgs, nc_ratios, labels = imgs.to(device), nc_ratios.to(device), labels.to(device)
                outputs = model(imgs, nc_ratios)
                loss = criterion(outputs, labels)
                preds = outputs.argmax(dim=1)
                val_correct += (preds == labels).sum().item()
                val_loss += loss.item()

        val_acc = val_correct / len(val_loader.dataset)

        train_accs.append(train_acc)
        val_accs.append(val_acc)
        train_losses.append(train_loss)
        val_losses.append(val_loss)

        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc*100:.2f}% - Val Loss: {val_loss:.4f}, Val Acc: {val_acc*100:.2f}%")

    return train_accs, val_accs, train_losses, val_losses



In [93]:
# Plotting

def plot_metrics(train_accs, val_accs, train_losses, val_losses):
    epochs = range(1, len(train_accs) + 1)
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_accs, label='Train Accuracy')
    plt.plot(epochs, val_accs, label='Val Accuracy')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_losses, label='Train Loss')
    plt.plot(epochs, val_losses, label='Val Loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()



In [95]:
# Run Training
num_classes=7
data_dir = r"C:\Users\CVR\Desktop\DataSet\Herlev Dataset\augmented_train"
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

dataset = CervicalCancerDataset(data_dir, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

num_classes = len(os.listdir(data_dir))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CombinedModel(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = GazelleOptimizer(model.parameters(), lr=0.001)

train_accs, val_accs, train_losses, val_losses = train_validate(
    model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10
)

plot_metrics(train_accs, val_accs, train_losses, val_losses)

TypeError: CombinedModel.__init__() got an unexpected keyword argument 'num_classes'

In [62]:
# ------------------------- Gazelle Optimizer -------------------------
class GazelleOptimizer(torch.optim.Optimizer):
    def __init__(self, params, lr=1e-3):
        defaults = dict(lr=lr)
        super(GazelleOptimizer, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = closure() if closure is not None else None
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                noise = torch.randn_like(grad)
                levy = torch.randn_like(grad) * torch.sign(torch.randn_like(grad)) / (torch.abs(torch.randn_like(grad)) + 1e-6)
                update = -group['lr'] * grad + 0.05 * noise + 0.01 * levy
                p.data.add_(update)
        return loss


In [64]:
# ------------------------- Training -------------------------
def train_validate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    train_accs, val_accs, train_losses, val_losses = [], [], [], []

    for epoch in range(num_epochs):
        model.train()
        train_loss, train_correct = 0.0, 0
        for imgs, nc_ratios, labels in train_loader:
            imgs, nc_ratios, labels = imgs.to(device), nc_ratios.to(device), labels.to(device)

            def closure():
                optimizer.zero_grad()
                outputs = model(imgs, nc_ratios)
                loss = criterion(outputs, labels)
                loss.backward()
                return loss

            loss = optimizer.step(closure)

            with torch.no_grad():
                preds = model(imgs, nc_ratios).argmax(dim=1)
                train_correct += (preds == labels).sum().item()
                train_loss += loss.item()

        train_acc = train_correct / len(train_loader.dataset)
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        # Validation
        model.eval()
        val_loss, val_correct = 0.0, 0
        with torch.no_grad():
            for imgs, nc_ratios, labels in val_loader:
                imgs, nc_ratios, labels = imgs.to(device), nc_ratios.to(device), labels.to(device)
                outputs = model(imgs, nc_ratios)
                loss = criterion(outputs, labels)
                preds = outputs.argmax(dim=1)
                val_correct += (preds == labels).sum().item()
                val_loss += loss.item()

        val_acc = val_correct / len(val_loader.dataset)
        val_losses.append(val_loss)
        val_accs.append(val_acc)

        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc*100:.2f}% - Val Loss: {val_loss:.4f}, Val Acc: {val_acc*100:.2f}%")

    return train_accs, val_accs, train_losses, val_losses

In [83]:
# ------------------------- Main -------------------------
if __name__ == "__main__":
    data_dir = r"C:\Users\CVR\Desktop\DataSet\Herlev Dataset\augmented_train"
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])

    dataset = CervicalCancerDataset(data_dir, transform=transform)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_set, val_set = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=16, shuffle=False)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CombinedModel().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = GazelleOptimizer(model.parameters(), lr=0.001)

    train_accs, val_accs, train_losses, val_losses = train_validate(
        model, loader, loader, criterion, optimizer, device, num_epochs=10
    )



NameError: name 'loader' is not defined

In [68]:
# Plotting
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_accs, label='Train Acc')
    plt.plot(val_accs, label='Val Acc')
    plt.title("Accuracy")
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.title("Loss")
    plt.legend()
    plt.tight_layout()
    plt.show()


IndentationError: unexpected indent (1387400865.py, line 2)