In [1]:
import os, sys
import torch
import torch.nn as nn
import torch.optim as optim
from utils.data_utils import get_loaders
from utils.trainer import train, validate
from models.backbones import get_model

In [2]:
sys.path = list(dict.fromkeys(sys.path))  # reset path first
# paths relative to project root
PROJECT_ROOT = os.path.abspath("..")
os.chdir(PROJECT_ROOT)
sys.path.append(PROJECT_ROOT)

In [3]:
# directories
DATA_DIR = "data/splits"

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'torch running on {device}')

torch running on cuda


In [4]:
# counts trainable parameters
def count_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [5]:
# testing models,validation and training loops, and dataloaders

# experiment setup
model_list = ["resnet50", "vit_base", "dinov3"]
num_epochs = 1
results = {}

for name in model_list:
    print(f"\n>>> Testing {name}")

    fine_tune = (name != "dinov3") # for DINOv3 it will not matter (we only will preform linear probing)
    model = get_model(name, num_classes=10, fine_tune=fine_tune).to(device)

    # count trainable parameters
    trainable_params = count_trainable_params(model)
    print(f"trainable parameters: {trainable_params:,}")

    dataloaders = get_loaders(DATA_DIR, batch_size=32)

    optimizer = optim.AdamW(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    try: # test and debugging the training loop and get_model function the model (and checking if the models fits on our gpu)
        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch + 1}/{num_epochs}")

            train_loss, train_acc = train(model, dataloaders["train"], criterion, optimizer, device, leave=True)

            val_loss, val_acc = validate(model, dataloaders["val"], criterion, device)

            print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% || "
                  f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")

        results[name] = {"status": "SUCCESS", "val_acc": val_acc}

    except RuntimeError as e:
        if "out of memory" in str(e).lower():
            print(f"OOM on {name}")
            torch.cuda.empty_cache()
        else:
            raise e



>>> Testing resnet50
trainable parameters: 23,528,522

Epoch 1/1


Training: 100%|██████████| 675/675 [03:46<00:00,  2.98it/s]
                                                           

Train Loss: 0.4090 | Train Acc: 87.05% || Val Loss: 0.2085 | Val Acc: 92.74%

>>> Testing vit_base
trainable parameters: 85,806,346

Epoch 1/1


Training: 100%|██████████| 675/675 [11:54<00:00,  1.06s/it]
                                                           

Train Loss: 1.4263 | Train Acc: 45.91% || Val Loss: 0.9930 | Val Acc: 62.44%

>>> Testing dinov3
trainable parameters: 7,690

Epoch 1/1


Training: 100%|██████████| 675/675 [05:58<00:00,  1.89it/s]
                                                           

Train Loss: 0.3453 | Train Acc: 91.51% || Val Loss: 0.2389 | Val Acc: 92.37%


