In [5]:
# Check directory
import os
print("Current directory:", os.getcwd())
print("Files:", os.listdir())


Current directory: /Users/berkcalisir/fed_talos_project/experiment_notebooks
Files: ['federated_baseline.ipynb', 'centralized_baseline.ipynb', 'model_editing.ipynb']


In [6]:
import sys
from pathlib import Path

# Go one level up to project root
project_root = Path().resolve().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

In [7]:
import torch
import torch.nn.functional as F
import wandb
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch.utils
from models.dino_vits16 import DINO_ViT
from project_utils.data_split import load_cifar100

# Setup wandb & hyperparameters

In [8]:
import project_utils.wandb_logger
print("Imported from:", project_utils.wandb_logger.__file__)


Imported from: /Users/berkcalisir/fed_talos_project/project_utils/wandb_logger.py


In [9]:
from project_utils.wandb_logger import load_config, init_wandb

raw_config = load_config("../config.yaml")
config = init_wandb(raw_config)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mahmetberk2000[0m ([33mahmetberk2000-politecnico-di-torino[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Core training loop

In [10]:
def train_one_epoch(model, loader, optimizer, device):
    model.train()
    total_loss, correct, total = 0, 0, 0

    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model(x)
        loss = F.cross_entropy(outputs, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * x.size(0)
        _, predicted = outputs.max(1)
        total += y.size(0)
        correct += predicted.eq(y).sum().item()

    return total_loss / total, correct / total


# Evaluation

In [11]:
def evaluate(model, loader, device):
    model.eval()
    loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss += F.cross_entropy(outputs, y).item() * x.size(0)
            _, predicted = outputs.max(1)
            total += y.size(0)
            correct += predicted.eq(y).sum().item()

    return loss / total, correct / total


# Main Script

In [13]:
import torch
from models.dino_vits16 import DINO_ViT
from project_utils.data_split import load_cifar100

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load data
train_set, val_set, test_set = load_cifar100()
train_loader = DataLoader(train_set, batch_size=config.batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=config.batch_size)
test_loader = DataLoader(test_set, batch_size=config.batch_size)

# Load model
model = DINO_ViT(num_classes=100).to(device)

# Optimizer & Scheduler
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=config.lr,
    momentum=config.momentum,
    weight_decay=config.weight_decay
)
scheduler = CosineAnnealingLR(optimizer, T_max=config.epochs)

# Training loop
for epoch in range(config.epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, device)

    wandb.log({
        "epoch": epoch,
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_loss": val_loss,
        "val_acc": val_acc,
        "lr": scheduler.get_last_lr()[0]
    })

    scheduler.step()

# Final test evaluation
test_loss, test_acc = evaluate(model, test_loader, device)
wandb.log({"test_loss": test_loss, "test_acc": test_acc})
wandb.finish()

Using cache found in /Users/berkcalisir/.cache/torch/hub/facebookresearch_dino_main


TypeError: '<' not supported between instances of 'str' and 'float'