In [1]:
import os.path
import wandb
import torch.nn as nn
import torch
import pandas as pd
from torch.utils.data import DataLoader
from models.transformer import SwinTransformer
from utils.dataset import EmotionalDataSet
from utils.generals import load_checkpoint, save_checkpoint, get_transform, get_lr_scheduler, save_strategy, increment_path
from trainer.engine import train_one_epoch, evaluate
from timm.loss import LabelSmoothingCrossEntropy
from torch.optim import AdamW
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

In [2]:
class_mapping = {
    "angry": 0,
    "disgust": 1,
    "fear": 2,
    "happy": 3,
    "neutral": 4,
    "sad": 5,
    "surprise": 6
}
imgsz = w = h = 224
in_channel = 1
num_classes = len(class_mapping.keys())
embed_dim = 96
depths = [2, 2, 6, 2]
num_heads = [3, 6, 12, 24]
learning_rate = 1e-3
decay = 5e-3
epochs = 200
save_root = increment_path("runs/exp", exist_ok=False)
save_ckpt_path = save_root + "/" + "checkpoint.pth"
device = "cuda" if torch.cuda.is_available() else "cpu"
save_epoch = 10
use_tensorboard = False
use_wandb = False

In [3]:
df_test = pd.read_csv("data/test.csv")
df_train = pd.read_csv("data/train.csv")

In [4]:
trainset = EmotionalDataSet(
    df_train,
    root="data/train",
    cache_train=False,
    cache_path="",
    transformer=get_transform(w, h)
)

testset = EmotionalDataSet(
    df_test,
    root="data/test",
    cache_train=False,
    cache_path="",
    transformer=get_transform(w, h)
)

In [5]:
trainloader = DataLoader(trainset, shuffle=True, batch_size=2)
testloader = DataLoader(testset, shuffle=True, batch_size=2)

In [8]:
model = SwinTransformer(img_size=imgsz,
                        in_chans=in_channel,
                        num_classes=num_classes,
                        embed_dim=embed_dim,
                        depths=depths,
                        num_heads=num_heads,
                        drop_rate=0.1,
                        drop_path_rate=0.2
                        )

criterion = LabelSmoothingCrossEntropy()
optimizer = AdamW(model.parameters(), lr = learning_rate, weight_decay=decay, betas=(0.937, 0.999))
lr_scheduler = get_lr_scheduler(optimizer, epochs=epochs)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [13]:
model = nn.DataParallel(model).to(device)
criterion.to(device)

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 4.00 GiB total capacity; 2.84 GiB already allocated; 0 bytes free; 2.92 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
if save_ckpt_path.endswith(".pth") and os.path.isfile(save_ckpt_path):
    state_dict, start_epoch, optim, lr_, loss, acc = load_checkpoint(save_ckpt_path)
    model.load_state_dict(state_dict)
    optimizer.load_state_dict(optim)
    lr_scheduler.load_state_dict(lr_)
    best_acc1, best_acc2 = acc
else:
    start_epoch = 1
    best_acc1 = best_acc2 = 0

In [14]:
writer = SummaryWriter(save_root) if use_tensorboard else None
wandb_run = wandb.init(
    project="Emotional Face",
    name=save_root.split("/")[-1],
    resume="allow",
    config={
        "img size": imgsz,
        "lr":learning_rate,
        "epochs": epochs,
        "embed_dim": embed_dim,
        "depths": depths,
        "num_heads": num_heads,
        "decay": decay
    }) if use_wandb else None

In [15]:
for epoch in range(start_epoch, epochs + 1):
    train_pdar = tqdm(trainloader, desc=f"Training Epoch {epoch}/{epochs}")
    train_loss, train_acc1, train_acc2 = train_one_epoch(model, train_pdar, optimizer, criterion, device)

    test_pdar = tqdm(testloader, desc=f"Evaluating")
    test_loss, test_acc1, test_acc2 = evaluate(model, test_pdar, criterion, device)

    if use_tensorboard:
        writer.add_scalars(
            "EmotionalExp",
            {
                'train/loss': train_loss,
                'train/acc1': train_acc1,
                'train/acc2': train_acc2,
                'test/loss': test_loss,
                'test/acc1': test_acc1,
                'test/acc2': test_acc1
            },
            epoch
        )
    if use_wandb:
        wandb_run.log({
            'train/loss': train_loss,
            'train/acc1': train_acc1,
            'train/acc2': train_acc2,
            'test/loss': test_loss,
            'test/acc1': test_acc1,
            'test/acc2': test_acc1})

    if (epoch > save_epoch) and save_strategy(test_acc1, train_acc2, best_acc1, best_acc2):
        save_checkpoint(epoch, optimizer, lr_scheduler, test_loss, model, [test_acc1, test_acc2], save_ckpt_path)

if use_wandb:
    wandb.finish()

Training Epoch 1/200:   0%|          | 0/14355 [00:00<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 4.00 GiB total capacity; 2.84 GiB already allocated; 0 bytes free; 2.92 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF