In [1]:
!pip install kaggle --upgrade





In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d thienkhonghoc/affectnet -p /content

Dataset URL: https://www.kaggle.com/datasets/thienkhonghoc/affectnet
License(s): unknown


In [4]:
!unzip -q /content/affectnet.zip -d /content/affectnet > /dev/null 2>&1

In [5]:
!pip install torch torchvision timm matplotlib tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
from collections import Counter
from PIL import Image
import os

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transformations
transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB") if isinstance(img, Image.Image) else img),
    transforms.ToTensor(),
    transforms.Resize((224, 224)),  # Stable size
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Dataset paths
train_path = "/content/affectnet/AffectNet/train"
val_path = "/content/affectnet/AffectNet/val"
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)

# Filter out corrupt images
def filter_images(dataset):
    valid = []
    for path, label in dataset.samples:
        try:
            Image.open(path).convert("RGB")
            valid.append((path, label))
        except:
            continue
    dataset.samples = valid

filter_images(train_dataset)
filter_images(val_dataset)

# Class weights
class_counts = Counter(train_dataset.targets)
total = sum(class_counts.values())
weights = torch.tensor([total / class_counts[i] for i in range(len(class_counts))], dtype=torch.float).to(device)

# Loaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# Load model
model = models.convnext_large(weights=models.ConvNeXt_Large_Weights.IMAGENET1K_V1)
model.classifier = nn.Sequential(
    nn.AdaptiveAvgPool2d((1, 1)),
    nn.Flatten(),
    nn.LayerNorm(model.classifier[2].in_features),
    nn.Dropout(0.5),
    nn.Linear(model.classifier[2].in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, 8)
)

# Load checkpoint
checkpoint = torch.load("/content/affectnet_convnext_large_epoch40.pt", map_location=device)
cleaned = {k.replace("_orig_mod.", ""): v for k, v in checkpoint.items()}
model.load_state_dict(cleaned, strict=False)
model = model.to(device)

# Setup
criterion = nn.CrossEntropyLoss(weight=weights, label_smoothing=0.05)
optimizer = optim.AdamW(model.parameters(), lr=2e-6, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20, eta_min=1e-7)
scaler = GradScaler()

# Training config
best_val_acc = 0
patience = 5
no_improve = 0

print("Checkpoint loaded from epoch 40.")
print("Training from Epoch 41 to 60...\n")

for epoch in range(41, 61):
    model.train()
    correct, total, loss_sum = 0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        loss_sum += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total

    # Validation
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            correct_val += (outputs.argmax(1) == labels).sum().item()
            total_val += labels.size(0)

    val_acc = 100 * correct_val / total_val
    scheduler.step()

    print(f"Epoch [{epoch}/60] | Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        no_improve = 0
    else:
        no_improve += 1

    if no_improve >= patience:
        print(f"Early stopping at epoch {epoch}. Best Val Acc: {best_val_acc:.2f}%")
        break

    if epoch % 5 == 0:
        torch.save(model.state_dict(), f"affectnet_convnext_large_epoch{epoch}.pt")
        print(f"Model saved: affectnet_convnext_large_epoch{epoch}.pt")

torch.save(model.state_dict(), "affectnet_convnext_large_final.pt")
print("Training complete. Final model saved.")


Checkpoint loaded from epoch 40.
Training from Epoch 41 to 60...



  scaler = GradScaler()
  with autocast():


Epoch [41/60] | Train Acc: 83.19% | Val Acc: 60.25%
Epoch [42/60] | Train Acc: 83.97% | Val Acc: 60.75%
Epoch [43/60] | Train Acc: 85.06% | Val Acc: 61.50%
Epoch [44/60] | Train Acc: 85.58% | Val Acc: 61.00%
Epoch [45/60] | Train Acc: 86.15% | Val Acc: 60.88%
Model saved: affectnet_convnext_large_epoch45.pt


KeyboardInterrupt: 