### Model 1

evaluation matrix

Classification Report:
                     precision    recall  f1-score   support     

           adenosis       0.78      0.85      0.82        68     
   ductal_carcinoma       0.83      0.90      0.86       519     
       fibroadenoma       0.62      0.80      0.70       153     
  lobular_carcinoma       0.66      0.59      0.62        95     
 mucinous_carcinoma       0.88      0.43      0.58       120     
papillary_carcinoma       0.56      0.64      0.60        84     
    phyllodes_tumor       0.75      0.35      0.48        69     
    tubular_adenoma       0.72      0.76      0.74        86     

           accuracy                           0.75      1194     
          macro avg       0.73      0.67      0.67      1194     
       weighted avg       0.76      0.75      0.74      1194 

code

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import time
import copy
from tqdm import tqdm  # <-- added

if __name__ == "__main__":
    data_dir = "dataset_split"
    batch_size = 32
    num_epochs = 10
    learning_rate = 0.001
    num_classes = 8

    data_transforms = {
        "train": transforms.Compose(
            [
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        ),
        "val": transforms.Compose(
            [
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        ),
        "test": transforms.Compose(
            [
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        ),
    }

    print("Loading datasets...")
    image_datasets = {
        x: datasets.ImageFolder(root=f"{data_dir}/{x}", transform=data_transforms[x])
        for x in ["train", "val", "test"]
    }

    dataloaders = {
        x: DataLoader(
            image_datasets[x],
            batch_size=batch_size,
            shuffle=(x == "train"),
            num_workers=4,
        )
        for x in ["train", "val", "test"]
    }

    dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "val", "test"]}
    class_names = image_datasets["train"].classes

    print(f"Classes: {class_names}")
    print(f"Train size: {dataset_sizes['train']}")
    print(f"Val size: {dataset_sizes['val']}")
    print(f"Test size: {dataset_sizes['test']}")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    def train_model(model, criterion, optimizer, num_epochs=10):
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch + 1}/{num_epochs}")
            print("-" * 10)

            for phase in ["train", "val"]:
                if phase == "train":
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0
                running_corrects = 0

                # Wrap dataloader with tqdm for progress bar
                loop = tqdm(dataloaders[phase], desc=f"{phase} batches", leave=False)

                for inputs, labels in loop:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    optimizer.zero_grad()

                    with torch.set_grad_enabled(phase == "train"):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        if phase == "train":
                            loss.backward()
                            optimizer.step()

                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                    # Update tqdm bar postfix with loss and accuracy so far
                    loop.set_postfix(
                        loss=loss.item(),
                        acc=(torch.sum(preds == labels.data).item() / inputs.size(0)),
                    )

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

                if phase == "val" and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())

        print(f"\nBest val Acc: {best_acc:.4f}")

        model.load_state_dict(best_model_wts)
        return model

    trained_model = train_model(model, criterion, optimizer, num_epochs=num_epochs)

    torch.save(trained_model.state_dict(), "model1.pth")
    print("Training complete.model saved")


## Model 2 (ResNet 50)

evaluation matrix

Classification Report:
                     precision    recall  f1-score   support

           adenosis       0.94      0.96      0.95        68     
   ductal_carcinoma       0.92      0.92      0.92       519     
       fibroadenoma       0.88      0.93      0.90       153     
  lobular_carcinoma       0.65      0.63      0.64        95     
 mucinous_carcinoma       0.92      0.92      0.92       120     
papillary_carcinoma       0.91      0.92      0.91        84     
    phyllodes_tumor       0.91      0.90      0.91        69     
    tubular_adenoma       1.00      0.90      0.94        86     

           accuracy                           0.90      1194     
          macro avg       0.89      0.88      0.89      1194     
       weighted avg       0.90      0.90      0.90      1194 

code

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import copy

if __name__ == "__main__":
    data_dir = "dataset_split"
    batch_size = 32
    num_epochs = 25
    learning_rate = 0.001
    num_classes = 8

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    data_transforms = {
        "train": transforms.Compose(
            [
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(15),
                transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        ),
        "val": transforms.Compose(
            [
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        ),
        "test": transforms.Compose(
            [
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        ),
    }

    print("Loading datasets...")
    image_datasets = {
        x: datasets.ImageFolder(root=f"{data_dir}/{x}", transform=data_transforms[x])
        for x in ["train", "val", "test"]
    }
    dataloaders = {
        x: DataLoader(
            image_datasets[x],
            batch_size=batch_size,
            shuffle=(x == "train"),
            num_workers=4,
        )
        for x in ["train", "val", "test"]
    }
    dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "val", "test"]}
    class_names = image_datasets["train"].classes

    print(f"Classes: {class_names}")
    print(f"Train size: {dataset_sizes['train']}")
    print(f"Val size: {dataset_sizes['val']}")
    print(f"Test size: {dataset_sizes['test']}")

    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model = model.to(device)

    # Freeze all layers except final FC initially
    for param in model.parameters():
        param.requires_grad = False
    for param in model.fc.parameters():
        param.requires_grad = True

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate
    )

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch + 1}/{num_epochs}")
            print("-" * 10)

            for phase in ["train", "val"]:
                if phase == "train":
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0
                running_corrects = 0

                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    optimizer.zero_grad()

                    with torch.set_grad_enabled(phase == "train"):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        if phase == "train":
                            loss.backward()
                            optimizer.step()

                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

                if phase == "val" and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())

            scheduler.step()

            # Unfreeze all layers for fine-tuning after 10 epochs
            if epoch == 9:
                print("Unfreezing all layers for fine-tuning...")
                for param in model.parameters():
                    param.requires_grad = True
                optimizer = optim.Adam(model.parameters(), lr=learning_rate / 10)
                scheduler = torch.optim.lr_scheduler.StepLR(
                    optimizer, step_size=7, gamma=0.1
                )

        print(f"\nBest val Acc: {best_acc:.4f}")
        model.load_state_dict(best_model_wts)
        return model

    trained_model = train_model(
        model, criterion, optimizer, scheduler, num_epochs=num_epochs
    )

    torch.save(trained_model.state_dict(), "model2.pth")
    print("Training complete")
