In [1]:
import sys
sys.path.append("../../")

In [2]:
import torch
from torch import nn, optim
from torch.amp import autocast, GradScaler

from deepfaune.deepfaune_model import Deepfaune
from utils.dataloaders import get_dataloaders
from utils.class_names import class_names

#### Settings

In [3]:
BATCH_SIZE = 70

In [4]:
weights_path = '../../deepfaune/models/deepfaune-vit_large_patch14_dinov2.lvd142m.v4.pt'
model_wrapper = Deepfaune(weights_path)
model = model_wrapper.model.base_model
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

Using model in resolution 480x480
CUDA available


VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, out_features=3072, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inpl

#### Dataloaders

In [5]:
dataloaders = get_dataloaders(BATCH_SIZE, 476)
train_dataloader = dataloaders.get('train_dataloader')
test_dataloader = dataloaders.get('test_dataloader')

Number of train classes:  26
Number of train images:  39027
Number of test images:  3983


#### Training

In [6]:
# Freeze all except the classifier head
for p in model.parameters():
    p.requires_grad = False

# Replace head
# in_dim = model.get_classifier().in_features
model.reset_classifier(num_classes=len(class_names))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.get_classifier().parameters(), lr=5e-4)
scaler = GradScaler()

In [None]:
def train_epoch(epoch: int) -> tuple[float, dict]:
    """Returns test loss and state_dict() of a model."""
    correct_train = 0
    total_train = 0
    correct_test = 0
    total_test = 0
    train_loss = 0
    test_loss = 0
    batch = 0

    # train
    model.train()
    for imgs, labels in train_dataloader:
        batch += 1
        print(f'Batch {batch} / {len(train_dataloader)}', end='\r')
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        with autocast('cuda'):
            logits = model(imgs)
            loss = criterion(logits, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        pred = logits.argmax(1)
        correct_train += (pred == labels).sum().item()
        total_train += labels.size(0)
    print(f"Epoch {epoch} train_acc: {correct_train / total_train:.3f} train_loss: {train_loss:.3f}")

    # test
    batch = 0
    model.eval()
    with torch.no_grad():
        for imgs, labels in test_dataloader:
            batch += 1
            print(f'Batch {batch} / {len(test_dataloader)}', end='\r')
            imgs, labels = imgs.to(device), labels.to(device)

            with autocast('cuda'):
                logits = model(imgs)
                loss = criterion(logits, labels)
            
            test_loss += loss.item()
            pred = logits.argmax(1)
            correct_test += (pred == labels).sum().item()
            total_test += labels.size(0)

    print(f"Epoch {epoch} test_acc: {correct_test / total_test:.3f} test_loss: {test_loss:.3f}")

    return test_loss, model.state_dict()


In [8]:
loss_best = float('inf')
best_state = 0

for epoch in range(4):
    loss, state_dict = train_epoch(epoch)
    if loss < loss_best:
        best_state = model.state_dict()
        loss_best = loss

Epoch 0 train_acc: 0.935 train_loss: 129.762
Epoch 0 test_acc: 0.839 test_loss: 37.506
Epoch 1 train_acc: 0.964 train_loss: 64.851
Epoch 1 test_acc: 0.840 test_loss: 39.644
Epoch 2 train_acc: 0.972 train_loss: 50.255
Epoch 2 test_acc: 0.834 test_loss: 42.516
Batch 18 / 558

KeyboardInterrupt: 

Move mapping to fine tuning method

In [9]:
name = "deepfaune_polish_lr4"

# Save the checkpoint
checkpoint = {
    'state_dict': best_state,
    'class_names': class_names,
    'num_classes': len(class_names)
}
torch.save(checkpoint, f"{name}_checkpoint.pt")

In [10]:
torch.save(model, f"{name}_model.pt")