Imports

In [None]:
%matplotlib inline 

In [None]:
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [None]:
pip install scipy 

In [None]:
pip install pandas

In [None]:
pip install ultralytics

In [None]:
pip install seaborn

In [None]:
import torch
import torch.nn as nn
import numpy as np
from torchvision import models
from torchvision.datasets import Flowers102
from torch.utils.data import Subset, ConcatDataset
from collections import defaultdict, Counter
import random
from collections import defaultdict
from torch.utils.data import Dataset, Subset, DataLoader
from torchvision import transforms
from typing import Tuple, Dict, Optional, Callable
import copy
import json
import matplotlib.pyplot as plt
import os
from pathlib import Path
import time

Checking what device is available

In [None]:
print("Torch:", torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
torch.backends.cudnn.benchmark = True # Accelerating training for fixed input size

Constants

In [None]:
DATASET_ROOT: str = "./data" 
TRAIN_RELATIVE_SIZE: float = 0.5
VALIDATION_RELATIVE_SIZE: float = 0.25
TEST_RELATIVE_SIZE: float = 0.25
YOLOV5_MODEL: str = 'yolov5s'
DATA_SPLIT_SEED_LIST: list[int] = [42, 43, 44]
EXPECTED_IMAGE_SIZE_YOLO: tuple[int, int] = (224, 224)
IMAGENET_STD: list[float] = [0.229, 0.224, 0.225]
IMAGENET_MEAN: list[float] = [0.485, 0.456, 0.406]
PATIENCE_EPOCHS: int = 10
IMPROVEMENT_DELTA: float = 0.01
MAX_EPOCHS: int = 100
BATCH_SIZE: int = 32
NUM_WORKERS_FOR_DATALOADER: int = 0  # Set to 0 for Windows compatibility
FRESH_LOCAL_VGG_WEIGHTS: str = "vgg19_fresh.pt"  # Path to save fresh weights

Fetching the database

In [None]:
# Load the entire dataset to be processed later.
full_dataset = torch.utils.data.ConcatDataset([
    Flowers102(root=DATASET_ROOT, split="train", download=True),
    Flowers102(root=DATASET_ROOT, split="val",   download=True),
    Flowers102(root=DATASET_ROOT, split="test",  download=True),
]) 

Database information

In [None]:
all_labels = []

for single_dataset in full_dataset.datasets:  # full_dataset is ConcatDataset
    all_labels.extend(single_dataset._labels)  # Flowers102 stores labels here
counts = Counter(all_labels)

num_classes = len(counts)
total = len(all_labels)
min_c = min(counts.values())
max_c = max(counts.values())

print(f"Total samples: {total}")
print(f"Classes: {num_classes}")
print(f"Min per class: {min_c}")
print(f"Max per class: {max_c}")
print(f"Imbalance ratio (max/min): {max_c/min_c:.2f}")

Data Preparation

Added utility functions to properly separate data.

In [None]:
def extract_all_labels_from_concat_dataset(concat_dataset: ConcatDataset):
    """Collect labels from each underlying dataset without loading images."""
    all_labels_list = []

    for single_dataset in concat_dataset.datasets:
        if hasattr(single_dataset, "_labels"):          # Flowers102
            labels_array = np.asarray(single_dataset._labels, dtype=int)
        elif hasattr(single_dataset, "targets"):        # ImageFolder, CIFAR, etc.
            labels_array = np.asarray(single_dataset.targets, dtype=int)
        else:  # fallback (slow)
            labels_array = np.array(
                [single_dataset[i][1] for i in range(len(single_dataset))],
                dtype=int
            )

        all_labels_list.append(labels_array)

    return np.concatenate(all_labels_list)

In [None]:
def stratified_split_concat_dataset(
    concat_dataset: ConcatDataset,
    split_fractions=(TRAIN_RELATIVE_SIZE, VALIDATION_RELATIVE_SIZE, TEST_RELATIVE_SIZE),
    random_seed=42
):
    """Split ConcatDataset into stratified Subsets with same class proportions."""

    normalized_fractions = np.array(split_fractions, dtype=float)
    normalized_fractions = normalized_fractions / normalized_fractions.sum()

    all_labels = extract_all_labels_from_concat_dataset(concat_dataset)
    random_generator = np.random.default_rng(random_seed)

    # group global indices by class
    class_to_indices = defaultdict(list)
    for global_index, class_label in enumerate(all_labels):
        class_to_indices[int(class_label)].append(global_index)

    split_indices_per_subset = [[] for _ in range(len(normalized_fractions))]

    for class_indices in class_to_indices.values():
        class_indices = np.array(class_indices, dtype=int)
        random_generator.shuffle(class_indices)

        total_class_samples = len(class_indices)
        samples_per_split = np.floor(normalized_fractions * total_class_samples).astype(int)

        # distribute leftover samples
        remainder = total_class_samples - samples_per_split.sum()
        for split_id in random_generator.permutation(len(samples_per_split))[:remainder]:
            samples_per_split[split_id] += 1

        start_pointer = 0
        for split_id, count in enumerate(samples_per_split):
            split_indices_per_subset[split_id].extend(
                class_indices[start_pointer:start_pointer + count].tolist()
            )
            start_pointer += count

    # shuffle each split's indices
    for split_list in split_indices_per_subset:
        random_generator.shuffle(split_list)

    train_subset, val_subset, test_subset = [
        Subset(concat_dataset, indices) for indices in split_indices_per_subset
    ]

    return train_subset, val_subset, test_subset

Data augemntation and requried transformations

In [None]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(EXPECTED_IMAGE_SIZE_YOLO, scale=(0.8, 1.0)),  # random crop
    transforms.RandomHorizontalFlip(p=0.5), # Random horizontal flip
    transforms.RandomAffine( # small random rotations, translations, scaling
        degrees=10, 
        translate=(0.03, 0.03),  
        scale=(0.97, 1.03)       
    ),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN,
                         IMAGENET_STD),
])

A transformation that will be used for testing, just resizing and normalizing.

In [None]:
val_test_transform = transforms.Compose([
    transforms.Resize(EXPECTED_IMAGE_SIZE_YOLO),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN,
                         IMAGENET_STD),
])

Database to properly handle transformations

In [None]:
class FlowersDataset(Dataset):
    """
    Wrap your existing dataset that returns:
      - image: PIL.Image
      - label: int (0..num_classes-1)
    """
    def __init__(self, base_dataset: Dataset, transform):
        self.base_dataset = base_dataset
        self.transform = transform

    def __len__(self):
        return len(self.base_dataset)

    def __getitem__(self, idx):
        image_pil, label = self.base_dataset[idx]
        image_pil = self.transform(image_pil)
        return image_pil, label

Splitting data while preseving ratios

In [None]:
def create_dataset_splits(
    full_dataset: ConcatDataset,
    random_seed: int,
    transform_for_train: Optional[Callable],
    transform_for_val_test: Optional[Callable]
) -> Tuple[Dataset, Dataset, Dataset]:
    train_subset, val_subset, test_subset = stratified_split_concat_dataset(
        concat_dataset=full_dataset,
        random_seed=random_seed
    )
    train_dataset = FlowersDataset(train_subset, transform=transform_for_train)
    val_dataset = FlowersDataset(val_subset, transform=transform_for_val_test)
    test_dataset = FlowersDataset(test_subset, transform=transform_for_val_test)
    return train_dataset, val_dataset, test_dataset

Creating dataloaders    

In [None]:
def create_dataloaders(
    train_dataset: Dataset,
    val_dataset: Dataset,
    test_dataset: Dataset,
    batch_size: int = BATCH_SIZE,
    num_workers: int = NUM_WORKERS_FOR_DATALOADER
) -> Tuple[DataLoader, DataLoader, DataLoader]:
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True
    )
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    return train_dataloader, val_dataloader, test_dataloader

Model training code

Early stopping logic

In [None]:
class EarlyStopper:
    def __init__(self, patience_epochs: int = PATIENCE_EPOCHS, min_delta: float = IMPROVEMENT_DELTA):
        self.patience_epochs = patience_epochs
        self.min_delta = min_delta
        self.best_value: Optional[float] = None
        self.epochs_without_improvement: int = 0
        self.best_state_dict: Optional[Dict[str, torch.Tensor]] = None

    def step(self, current_value: float, model: nn.Module) -> tuple[bool, bool]:
        """
        Returns should_stop, improved
        """
        if self.best_value is None: # Only relevant in the first call
            self.best_value = current_value
            self.best_state_dict = copy.deepcopy(model.state_dict())
            return False, True

        improved = current_value < (self.best_value - self.min_delta)

        if improved:
            self.best_value = current_value
            self.epochs_without_improvement = 0
            self.best_state_dict = copy.deepcopy(model.state_dict())
            return False, True

        self.epochs_without_improvement += 1
        should_stop = self.epochs_without_improvement >= self.patience_epochs
        return should_stop, improved

Running single training loop

In [None]:
def run_one_epoch_train(
    model: nn.Module,
    train_dataloader: DataLoader,
    loss_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
    optimizer: torch.optim.Optimizer,
    device: torch.device,
) -> Tuple[float, float]:
    model.train()

    total_loss_value = 0.0
    total_correct = 0
    total_samples = 0

    for batch_images, batch_targets in train_dataloader:
        batch_images = batch_images.to(device, non_blocking=True)
        batch_targets = batch_targets.to(device, non_blocking=True).long()

        optimizer.zero_grad(set_to_none=True)

        batch_logits = model(batch_images)
        # If your model returns a tuple/dict, adapt here:
        if isinstance(batch_logits, (tuple, list)):
            batch_logits = batch_logits[0]

        batch_loss = loss_function(batch_logits, batch_targets)
        batch_loss.backward()
        optimizer.step()

        batch_size = batch_targets.size(0)
        total_loss_value += batch_loss.item() * batch_size

        batch_predicted = batch_logits.argmax(dim=1)
        total_correct += (batch_predicted == batch_targets).sum().item()
        total_samples += batch_size

    average_loss = total_loss_value / max(1, total_samples)
    average_accuracy = total_correct / max(1, total_samples)
    return average_loss, average_accuracy

Running single validation loop

In [None]:
@torch.no_grad()
def run_one_epoch_no_grad(
    model: nn.Module,
    val_dataloader: DataLoader,
    loss_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
    device: torch.device,
) -> Tuple[float, float]:
    model.eval()

    total_loss_value = 0.0
    total_correct = 0
    total_samples = 0

    for batch_images, batch_targets in val_dataloader:
        batch_images = batch_images.to(device, non_blocking=True)
        batch_targets = batch_targets.to(device, non_blocking=True).long()

        batch_logits = model(batch_images)
        if isinstance(batch_logits, (tuple, list)):
            batch_logits = batch_logits[0]

        batch_loss = loss_function(batch_logits, batch_targets)

        batch_size = batch_targets.size(0)
        total_loss_value += batch_loss.item() * batch_size

        batch_predicted = batch_logits.argmax(dim=1)
        total_correct += (batch_predicted == batch_targets).sum().item()
        total_samples += batch_size

    average_loss = total_loss_value / max(1, total_samples)
    average_accuracy = total_correct / max(1, total_samples)
    return average_loss, average_accuracy

Training loop with early stopping that returns the model that acheived the best results instead of the latest.

In [None]:
def train_with_early_stopping(
    model: nn.Module,
    train_dataloader: DataLoader,
    val_dataloader: DataLoader,
    test_dataloader: DataLoader,
    loss_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
    optimizer_factory: Callable[[nn.Module], torch.optim.Optimizer],
    device: torch.device,
    max_epochs: int = MAX_EPOCHS,
    improvement_delta: float = IMPROVEMENT_DELTA,
    patience_epochs: int = PATIENCE_EPOCHS
) -> nn.Module:
    epoch_loss: list[float] = list()
    epoch_acc: list[float] = list()
    epoch_val_loss: list[float] = list()
    epoch_val_acc: list[float] = list()
    epoch_test_loss: list[float] = list()
    epoch_test_acc: list[float] = list()
    optimizer = optimizer_factory(model)
    early_stopper = EarlyStopper(patience_epochs=patience_epochs, min_delta=improvement_delta)
    for epoch_index in range(1, max_epochs + 1):
        start = time.time()
        train_loss, train_accuracy = run_one_epoch_train(
            model=model,
            train_dataloader=train_dataloader,
            loss_function=loss_function,
            optimizer=optimizer,
            device=device,
        )
        val_loss, val_accuracy = run_one_epoch_no_grad(
            model=model,
            val_dataloader=val_dataloader,
            loss_function=loss_function,
            device=device,
        )
        test_loss, test_accuracy = run_one_epoch_no_grad(
            model=model,
            val_dataloader=test_dataloader,
            loss_function=loss_function,
            device=device,
        )
        epoch_loss.append(train_loss)
        epoch_acc.append(train_accuracy)
        epoch_val_loss.append(val_loss)
        epoch_val_acc.append(val_accuracy)
        epoch_test_loss.append(test_loss)
        epoch_test_acc.append(test_accuracy)
        print(
            f"Epoch {epoch_index:03d} | "
            f"train_loss={train_loss:.6f}, train_acc={train_accuracy:.4f} | "
            f"val_loss={val_loss:.6f}, val_acc={val_accuracy:.4f} | Epoch time: {time.time() - start:.2f} seconds"
        )

        should_stop, improved = early_stopper.step(current_value=val_loss, model=model)
        if not improved:
            print(
                f"No improvement in val_loss for {early_stopper.epochs_without_improvement} "
                f"out of {patience_epochs} allowed epochs."
            )
        if should_stop:
            print(
                f"Early stopping: no val_loss improvement >= {improvement_delta} "
                f"for {patience_epochs} epochs."
            )
            break
        print(f"")
    model.load_state_dict(early_stopper.best_state_dict)
    print("Loaded best model weights (by validation loss).")
    model.eval()
    return model, epoch_loss, epoch_acc, epoch_val_loss, epoch_val_acc, epoch_test_loss, epoch_test_acc

Evaluate model's performance

In [None]:
def evaluate_model(
    model: torch.nn.Module,
    dataloader: torch.utils.data.DataLoader,
    loss_function: torch.nn.Module,
    device: torch.device
) -> Tuple[float, float]:
    model.eval()

    total_loss = 0.0
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for images, targets in dataloader:
            images = images.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True).long()

            logits = model(images)
            if isinstance(logits, (tuple, list)):  # in case model returns extra outputs
                logits = logits[0]

            loss = loss_function(logits, targets)

            total_loss += loss.item() * images.size(0)
            preds = logits.argmax(dim=1)
            total_correct += (preds == targets).sum().item()
            total_samples += images.size(0)

    avg_loss = total_loss / total_samples
    accuracy = total_correct / total_samples

    return avg_loss, accuracy

Replacing classifier head with a different classifier due to different categories and nubmer of categories


In [None]:
def replace_last_linear_layer(model: nn.Module, num_classes: int) -> nn.Module:
    last_linear_name = None
    last_linear_module = None

    for module_name, module in model.named_modules():
        if isinstance(module, nn.Linear):
            last_linear_name = module_name
            last_linear_module = module

    if last_linear_module is None:
        raise RuntimeError("No nn.Linear layer found to replace.")

    new_linear_layer = nn.Linear(last_linear_module.in_features, num_classes)

    # set by walking to the parent module
    parent = model
    name_parts = last_linear_name.split(".")
    for part in name_parts[:-1]:
        parent = getattr(parent, part)
    setattr(parent, name_parts[-1], new_linear_layer)

    print(f"Replaced: {last_linear_name} -> Linear({last_linear_module.in_features}, {num_classes})")
    return model

Prepare fresh yolo5 model

In [None]:
def get_fresh_yolo5_model(
        num_classes: int,
        device: torch.device) -> nn.Module:
    yolo_model = torch.hub.load(
    "ultralytics/yolov5",
    "custom",
    path=f"{YOLOV5_MODEL}-cls.pt",   # classification checkpoint
    autoshape=False,        # important for training
    verbose=False
    )
    yolo_model = replace_last_linear_layer(yolo_model, num_classes)
    yolo_model = yolo_model.to(device)
    yolo_model.train()
    return yolo_model

Prepare fresh vgg19

In [None]:
def get_fresh_vgg19_model(
        num_classes: int,
        device: torch.device,
        weight_location: str = FRESH_LOCAL_VGG_WEIGHTS
        ) -> nn.Module:
    ckpt_path = Path(weight_location)
    ckpt_path.parent.mkdir(parents=True, exist_ok=True)

    # Build architecture (must match the saved state_dict)
    model = models.vgg19(weights=None)
    model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, num_classes)


    # Load if exists, otherwise "download" pretrained VGG19 and save
    if ckpt_path.exists():
        state = torch.load(ckpt_path, map_location="cpu")
        model.load_state_dict(state)
    else:
        base = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1)  # downloads if needed
        base.classifier[-1] = nn.Linear(base.classifier[-1].in_features, num_classes)
        model.load_state_dict(base.state_dict())
        torch.save(model.state_dict(), ckpt_path)
    
    # Freeze ALL parameters
    for param in model.parameters():
        param.requires_grad = False

    for param in model.features[28:].parameters(): 
        param.requires_grad = True

    # Unfreeze ONLY the new last layer
    for param in model.classifier[-1].parameters():
        param.requires_grad = True
    return model.to(device)

Training code

Factory is used since it cannot be used before the model is properly setup.

In [None]:
def yolo5_optimizer_factory(model):
    params = [p for p in model.parameters() if p.requires_grad] # Only trainable parameters
    return torch.optim.AdamW(params, lr=3e-4, weight_decay=1e-2)

def vgg19_optimizer_factory(model):
    params = [p for p in model.parameters() if p.requires_grad] # Only trainable parameters
    return torch.optim.SGD(params, lr=1e-3, momentum=0.9, weight_decay=1e-4)

In [None]:
loss_function = nn.CrossEntropyLoss()

In [None]:
run_info: list[tuple] = list()
for seed in DATA_SPLIT_SEED_LIST:
    train_dataset, val_dataset, test_dataset = create_dataset_splits(
        full_dataset, 
        random_seed=seed, 
        transform_for_train=train_transform, 
        transform_for_val_test=val_test_transform
        )
    
    train_dataloader, val_dataloader, test_dataloader = create_dataloaders(train_dataset, val_dataset, test_dataset)
    # Creating a new model each time.
    #
    models_to_train = [('VGG19', get_fresh_vgg19_model, vgg19_optimizer_factory), ('Yolo5', get_fresh_yolo5_model, yolo5_optimizer_factory)]
    for model_name, model_factory, optimizer_factory in models_to_train:
        print(f"\n--- Training {model_name} with data seed {seed} ---")
        classifier_model, epoch_loss, epoch_acc, epoch_val_loss, epoch_val_acc, epoch_test_loss, epoch_test_acc = train_with_early_stopping(
            model=model_factory(num_classes=num_classes, device=device),
            train_dataloader=train_dataloader,
            val_dataloader=val_dataloader,
            test_dataloader=test_dataloader,
            loss_function=loss_function,
            optimizer_factory=optimizer_factory,
            device=device,
            max_epochs=MAX_EPOCHS,
        )
        test_loss, test_accuracy = evaluate_model(
            model=classifier_model,
            dataloader=test_dataloader,
            loss_function=loss_function,
            device=device
        )
        run_data = (model_name, seed, epoch_loss, epoch_acc, epoch_val_loss, epoch_val_acc, epoch_test_loss, epoch_test_acc, test_loss, test_accuracy)
        run_info.append(run_data)
        with open(f"{model_name}_run_seed_{seed}.json", "w") as f:
            json.dump(run_data, f, indent=2)

Evaluating Models

Exrtacting data from files. Done as a failsafe incase model crashes unexpectdly during a long run.

In [None]:
run_info: list[tuple] = list()
for seed in DATA_SPLIT_SEED_LIST:
    for model_name, _, _ in models_to_train:
        seed_run_location = f"{model_name}_run_seed_{seed}.json"
        if os.path.exists(seed_run_location):
            with open(seed_run_location) as f:
                run_data = json.load(f)
                run_info.append(tuple(run_data))

In [None]:
model_data: dict = defaultdict(lambda: defaultdict(list))
seed_run_data: dict = defaultdict(lambda: defaultdict(list))

for model_name, seed, epoch_loss, epoch_acc, epoch_val_loss, epoch_val_acc, epoch_test_loss, epoch_test_acc, _, _ in run_info:
    model_data[model_name]['seed'].append(seed)
    model_data[model_name]["epochs"].append(len(epoch_loss))
    model_data[model_name]["epoch_loss"].append(epoch_loss)
    model_data[model_name]["epoch_acc"].append(epoch_acc)
    model_data[model_name]["epoch_val_loss"].append(epoch_val_loss)
    model_data[model_name]["epoch_val_acc"].append(epoch_val_acc)
    model_data[model_name]["epoch_test_loss"].append(epoch_test_loss)
    model_data[model_name]["epoch_test_acc"].append(epoch_test_acc)
    seed_run_data[seed][model_name] = {
        "epoch_loss": epoch_loss,
        "epoch_acc": epoch_acc,
        "epoch_val_loss": epoch_val_loss,
        "epoch_val_acc": epoch_val_acc,
        "epoch_test_loss": epoch_test_loss,
        "epoch_test_acc": epoch_test_acc
    }

Organizing Data

In [None]:
for model_name, data in model_data.items():
    print(f"{model_name}:")
    print(f"  Epochs mean: {np.mean(data['epochs']):.2f}")
    final_train_accs = [accs[-1] for accs in data["epoch_acc"]]
    print(f"  Final training accuracy mean: {np.mean(final_train_accs):.4f}")
    final_val_accs = [accs[-1] for accs in data["epoch_val_acc"]]
    print(f"  Final validation accuracy mean: {np.mean(final_val_accs):.4f}")
    final_train_losses = [losses[-1] for losses in data["epoch_loss"]]
    print(f"  Final training loss mean: {np.mean(final_train_losses):.4f}")
    final_val_losses = [losses[-1] for losses in data["epoch_val_loss"]]
    print(f"  Final validation loss mean: {np.mean(final_val_losses):.4f}")
    print(f"  Final test loss: {seed_run_data[seed][model_name]['epoch_test_loss'][-1]:.4f}")
    print(f"  Final test accuracy: {seed_run_data[seed][model_name]['epoch_test_acc'][-1]:.4f}")
    print("")
    first_epoch_over_70_train = []
    for accs in data["epoch_acc"]:
        epoch_idx = next((i+1 for i, acc in enumerate(accs) if acc >= 0.7), len(accs))
        first_epoch_over_70_train.append(epoch_idx)
    print(f"  First epoch to reach over 70% acc on training mean: {np.mean(first_epoch_over_70_train):.2f}")
    first_epoch_over_70_val = []
    for accs in data["epoch_val_acc"]:
        epoch_idx = next((i+1 for i, acc in enumerate(accs) if acc >= 0.7), len(accs))
        first_epoch_over_70_val.append(epoch_idx)
    print(f"  First epoch to reach over 70% acc on validation mean: {np.mean(first_epoch_over_70_val):.2f}")
    print("")

Graphing loss

In [None]:
plt.figure()
for model_name, data in model_data.items():
    max_epochs = max(len(epoch_loss) for epoch_loss in data["epoch_loss"])
    epochs = range(1, max_epochs + 1)
    seed_index = 0
    for (epoch_loss, epoch_val_loss, epoch_test_loss) in zip(data["epoch_loss"], data["epoch_val_loss"], data["epoch_test_loss"]):
        plt.plot(epochs[:len(epoch_loss)], epoch_loss, label=f"{model_name} loss_{data['seed'][seed_index]}")
        plt.plot(epochs[:len(epoch_val_loss)], epoch_val_loss, label=f"{model_name} val loss_{data['seed'][seed_index]}")
        plt.plot(epochs[:len(epoch_test_loss)], epoch_test_loss, label=f"{model_name} test loss_{data['seed'][seed_index]}")
        seed_index += 1
    plt.xlabel("Epochs")
    plt.ylabel("CE Loss")
    plt.title(f"{model_name} Loss /w seeds vs Epochs")
    plt.legend()
    plt.grid(True)
    plt.show()

Graphing accuracy

In [None]:
plt.figure()
for model_name, data in model_data.items():
    max_epochs = max(len(epoch_acc) for epoch_acc in data["epoch_acc"])
    epochs = range(1, max_epochs + 1)
    seed_index = 0
    for (epoch_acc, epoch_val_acc, epoch_test_acc) in zip(data["epoch_acc"], data["epoch_val_acc"], data["epoch_test_acc"]):
        plt.plot(epochs[:len(epoch_acc)], epoch_acc, label=f"{model_name} acc_{data['seed'][seed_index]}")
        plt.plot(epochs[:len(epoch_val_acc)], epoch_val_acc, label=f"{model_name} val acc_{data['seed'][seed_index]}")
        plt.plot(epochs[:len(epoch_test_acc)], epoch_test_acc, label=f"{model_name} test acc_{data['seed'][seed_index]}")
        seed_index += 1
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.title(f"{model_name} Accuracy /w seeds vs Epochs")
    plt.legend()
    plt.grid(True)
    plt.show()

Graphin data per seed

In [None]:
for curr_seed in seed_run_data.keys():
    plt.figure()
    for model_name, run_data in seed_run_data[curr_seed].items():
        epoch_loss = run_data['epoch_loss']
        epoch_val_loss = run_data['epoch_val_loss']
        epoch_test_loss = run_data['epoch_test_loss']
        epochs = range(1, len(epoch_loss) + 1)
        plt.plot(epochs, epoch_loss, label=f"{model_name} loss")
        plt.plot(epochs, epoch_val_loss, label=f"{model_name} val loss")
        plt.plot(epochs, epoch_test_loss, label=f"{model_name} test loss")
        plt.xlabel("Epochs")
        plt.ylabel("CE Loss")
        plt.title(f"Model: {model_name} Seed {curr_seed} Loss vs Epochs")
        plt.legend()
        plt.grid(True)
        plt.show()



In [None]:
for curr_seed in seed_run_data.keys():
    plt.figure()
    for model_name, run_data in seed_run_data[curr_seed].items():
        epoch_test_loss = run_data['epoch_test_loss']
        epochs = range(1, len(epoch_loss) + 1)
        plt.plot(epochs, epoch_test_loss, label=f"{model_name} test loss")
        plt.xlabel("Epochs")
        plt.ylabel("CE Loss")
        plt.title(f"Yolo5 and VGG19, Seed {curr_seed} Test loss vs Epochs")
    plt.legend()
    plt.grid(True)
    plt.show()


Test accuracy & loss

Extract data

In [None]:
seeds: list[int] = list()
test_acc_list: list[float] = list()
test_loss_list: list[float] = list()
for model_name, seed, _, _, _, _, _, _, test_loss, test_accuracy in run_info:
    seeds.append(f"{model_name}_{seed}")
    test_acc_list.append(test_accuracy)
    test_loss_list.append(test_loss)

Loss

In [None]:
plt.figure()
bars = plt.bar(seeds, test_loss_list, color="skyblue")

plt.xlabel("Model & Data Seed")
plt.ylabel("Test Loss")
plt.title("Test Loss per Seed")
plt.grid(True, axis="y")

# Add values on top of bars
for bar, loss in zip(bars, test_loss_list):
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        bar.get_height(),
        f"{loss:.3f}",
        ha='center',
        va='bottom'
    )

Accuracy

In [None]:
plt.figure()
bars = plt.bar(seeds, test_acc_list, color="skyblue")

plt.xlabel("Model & Data Seed")
plt.ylabel("Test Accuracy")
plt.title("Test Accuracy per Seed")
plt.grid(True, axis="y")

# Add values on top of bars
for bar, acc in zip(bars, test_acc_list):
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        bar.get_height(),
        f"{acc:.3f}",
        ha='center',
        va='bottom'
    )

In [None]:
print('Final accuracies and losses per model and seed:')
print('Accuracy:')
for index in range(len(seeds)):
    print(f"{seeds[index]}: {test_acc_list[index]:.4f}")