In [1]:
!pip install kaggle --upgrade



Collecting kaggle
  Downloading kaggle-1.7.4.2-py3-none-any.whl.metadata (16 kB)
Downloading kaggle-1.7.4.2-py3-none-any.whl (173 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.2/173.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.6.17
    Uninstalling kaggle-1.6.17:
      Successfully uninstalled kaggle-1.6.17
Successfully installed kaggle-1.7.4.2


In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d thienkhonghoc/affectnet -p /content

Dataset URL: https://www.kaggle.com/datasets/thienkhonghoc/affectnet
License(s): unknown


In [4]:
!unzip -q /content/affectnet.zip -d /content/affectnet > /dev/null 2>&1

In [5]:
!pip install torch torchvision timm matplotlib tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from torch.cuda.amp import autocast, GradScaler
import numpy as np
from PIL import Image, ImageOps

#  Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#  Clear CUDA Cache
torch.cuda.empty_cache()

#  Enable Optimized Computation
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False

#  Validate & Filter Images
def is_valid_image(path):
    try:
        img = Image.open(path).convert("RGB")
        img = ImageOps.exif_transpose(img)
        return img
    except Exception:
        return None

class VerifiedImageFolder(datasets.ImageFolder):
    def __getitem__(self, index):
        path, target = self.samples[index]
        image = is_valid_image(path)
        if image is None:
            return None, None
        return self.transform(image), target

#  Data Augmentation (Lower Res to Save Memory)
transform = transforms.Compose([
    transforms.Lambda(lambda img: img if isinstance(img, torch.Tensor) else transforms.ToTensor()(img)),
    transforms.RandomResizedCrop(192, scale=(0.75, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomErasing(p=0.4),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

#  Load Dataset (Remove Invalid Images)
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"

train_dataset = VerifiedImageFolder(root=train_data_path, transform=transform)
val_dataset = VerifiedImageFolder(root=val_data_path, transform=transform)

train_dataset.samples = [(s[0], s[1]) for s in train_dataset.samples if is_valid_image(s[0]) is not None]
val_dataset.samples = [(s[0], s[1]) for s in val_dataset.samples if is_valid_image(s[0]) is not None]

print(f" Total Valid Training Images: {len(train_dataset.samples)}")
print(f" Total Valid Validation Images: {len(val_dataset.samples)}")

#  Compute Class Weights
class_counts = Counter(train_dataset.targets)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
weights = torch.tensor(weights, dtype=torch.float).to(device)

#  Reduce Batch Size & Enable Gradient Accumulation
batch_size = 16
accumulation_steps = 2

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True, drop_last=True)

#  Load ConvNeXt-Large Model
model = models.convnext_large(weights=models.ConvNeXt_Large_Weights.IMAGENET1K_V1)

#  Modify Classifier (Fix LayerNorm Shape)
model.classifier = nn.Sequential(
    nn.AdaptiveAvgPool2d(1),  #  Convert (B, 1536, 7, 7) → (B, 1536, 1, 1)
    nn.Flatten(),  #  Convert (B, 1536, 1, 1) → (B, 1536)
    nn.LayerNorm(1536),  #  Now works correctly
    nn.Dropout(0.5),
    nn.Linear(1536, 512),
    nn.ReLU(),
    nn.BatchNorm1d(512),
    nn.Dropout(0.4),
    nn.Linear(512, 8)
)

#  Optimize Model Memory Usage
model = torch.compile(model)
model = model.to(device)

#  Define Loss, Optimizer & Scheduler
criterion = nn.CrossEntropyLoss(weight=weights, label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-6)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15, eta_min=1e-7)

#  Mixed Precision Training
scaler = GradScaler()

#  Train for 30 Epochs
print("\n Training ConvNeXt-Large with Fixes...\n")

for epoch in range(1, 31):
    model.train()
    running_loss, correct_train, total_train = 0.0, 0, 0

    optimizer.zero_grad()

    for i, (images, labels) in enumerate(train_loader):
        if images is None or labels is None:
            continue

        images, labels = images.to(device), labels.to(device)

        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels) / accumulation_steps

        scaler.scale(loss).backward()

        if (i + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train
    scheduler.step()

    # Validation Phase
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            if images is None or labels is None:
                continue

            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = 100 * correct_val / total_val

    print(f"Epoch [{epoch}/30], Loss: {running_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

    if epoch % 5 == 0:
        torch.save(model.state_dict(), f"affectnet_convnext_large_epoch{epoch}.pt")

torch.save(model.state_dict(), "affectnet_convnext_large_final.pt")
print("\n Training complete! Final model saved.")


✅ Total Valid Training Images: 37553
✅ Total Valid Validation Images: 800

🚀 Training ConvNeXt-Large with Fixes...



  scaler = GradScaler()
  with autocast():
  with autocast():


Epoch [1/30], Loss: 2193.4017, Train Acc: 32.34%, Val Acc: 47.50%


  with autocast():


Epoch [2/30], Loss: 1873.9873, Train Acc: 46.10%, Val Acc: 52.75%
Epoch [3/30], Loss: 1764.3671, Train Acc: 50.96%, Val Acc: 55.50%
Epoch [4/30], Loss: 1702.3823, Train Acc: 53.40%, Val Acc: 57.12%
Epoch [5/30], Loss: 1647.8567, Train Acc: 55.83%, Val Acc: 55.25%
Epoch [6/30], Loss: 1608.9441, Train Acc: 57.33%, Val Acc: 56.75%
Epoch [7/30], Loss: 1567.0674, Train Acc: 59.31%, Val Acc: 56.75%
Epoch [8/30], Loss: 1532.4867, Train Acc: 60.87%, Val Acc: 58.88%
Epoch [9/30], Loss: 1496.6272, Train Acc: 62.44%, Val Acc: 56.12%
Epoch [10/30], Loss: 1460.9200, Train Acc: 63.73%, Val Acc: 58.25%
Epoch [11/30], Loss: 1434.9340, Train Acc: 65.06%, Val Acc: 57.88%
Epoch [12/30], Loss: 1418.5782, Train Acc: 65.76%, Val Acc: 58.88%
Epoch [13/30], Loss: 1402.5688, Train Acc: 66.59%, Val Acc: 57.88%
Epoch [14/30], Loss: 1388.3524, Train Acc: 67.05%, Val Acc: 58.75%
