In [1]:
!pip install kaggle --upgrade



In [2]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"sibbir","key":"c9b1f65daec00b23325e5e866738f3b6"}'}

In [3]:
import os
import zipfile

# Create a Kaggle directory
os.makedirs("/root/.kaggle", exist_ok=True)

# Move kaggle.json to the correct directory
!mv kaggle.json /root/.kaggle/

# Set correct permissions
!chmod 600 /root/.kaggle/kaggle.json

In [4]:
!kaggle datasets download -d thienkhonghoc/affectnet -p /content

Dataset URL: https://www.kaggle.com/datasets/thienkhonghoc/affectnet
License(s): unknown


In [5]:
!kaggle datasets download -d thienkhonghoc/affectnet

Dataset URL: https://www.kaggle.com/datasets/thienkhonghoc/affectnet
License(s): unknown
affectnet.zip: Skipping, found more recently modified local copy (use --force to force download)


In [6]:
!unzip -q /content/affectnet.zip -d /content/affectnet > /dev/null 2>&1

In [7]:
!pip install torch torchvision timm matplotlib tqdm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from PIL import Image
import os
import numpy as np

# Set device (CPU/GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\nUsing device: {device}")

# Optimize CUDA performance
torch.backends.cudnn.benchmark = True

# Efficient Data Augmentation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.AutoAugment(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load AffectNet dataset (Ensure paths are correct)
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"

# Function to filter out corrupt images
def filter_corrupt_images(dataset):
    dataset.samples = [(p, l) for p, l in dataset.samples if Image.open(p).convert("RGB")]

# Load datasets
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)

# Filter corrupt images
filter_corrupt_images(train_dataset)
filter_corrupt_images(val_dataset)

# Compute class weights efficiently
class_counts = np.bincount(train_dataset.targets)
weights = torch.tensor(len(train_dataset) / (len(class_counts) * class_counts), dtype=torch.float).to(device)

print(f"Class Weights: {weights}")

# Create data loaders
num_workers = min(4, os.cpu_count() // 2)  # Dynamically assign workers
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=num_workers, pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=num_workers, pin_memory=True, persistent_workers=True)

# Load ConvNeXt-Small model
model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)
model.classifier[2] = nn.Linear(model.classifier[2].in_features, 8)
model = model.to(device)

print("Model initialized!")

# Define loss function & optimizer
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=5e-4, steps_per_epoch=len(train_loader), epochs=20)

# Early Stopping & Mixed Precision
scaler = torch.cuda.amp.GradScaler()
best_val_acc = 0.0
early_stopping_patience = 5
epochs_without_improvement = 0

# Training loop
num_epochs = 20
save_interval = 5
accumulation_steps = 2  # Gradient accumulation

print("\nStarting Training...\n")
for epoch in range(1, num_epochs + 1):
    model.train()
    running_loss = 0.0
    correct_train, total_train = 0, 0

    # Training phase
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            outputs = model(images)

            # Apply MixUp Augmentation
            alpha = 0.2
            lam = np.random.beta(alpha, alpha)
            index = torch.randperm(images.size(0)).to(device)
            mixed_images = lam * images + (1 - lam) * images[index]
            mixed_labels = lam * criterion(outputs, labels) + (1 - lam) * criterion(outputs, labels[index])

            loss = mixed_labels.mean()

        scaler.scale(loss).backward()

        # Gradient accumulation
        if (i + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train

    # Validation phase
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = 100 * correct_val / total_val
    scheduler.step()

    # Print epoch summary
    print(f"Epoch [{epoch}/{num_epochs}]")
    print(f"   Loss: {running_loss:.4f}")
    print(f"   Train Accuracy: {train_accuracy:.2f}%")
    print(f"   Validation Accuracy: {val_accuracy:.2f}%\n")

    # Save model every 5 epochs
    if epoch % save_interval == 0:
        save_path = f"affectnet_convnext_epoch{epoch}.pt"
        torch.save(model.state_dict(), save_path)
        print(f"Model saved: {save_path}\n")

    # Early stopping with checkpointing
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        epochs_without_improvement = 0
        torch.save(model.state_dict(), "affectnet_best.pt")
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= early_stopping_patience:
        print(f"Early stopping triggered. Best validation accuracy: {best_val_acc:.2f}%")
        break

# Save final model
torch.save(model.state_dict(), "affectnet_final.pt")
print("\nTraining complete! Final model saved.")



Using device: cuda
Class Weights: tensor([0.9388, 1.2343, 0.9388, 0.9388, 0.9388, 0.9388, 0.9388, 1.2518],
       device='cuda:0')
Model initialized!

Starting Training...



  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch [1/20]
   Loss: 1203.5327
   Train Accuracy: 24.31%
   Validation Accuracy: 37.62%

Epoch [2/20]
   Loss: 1160.1177
   Train Accuracy: 37.05%
   Validation Accuracy: 41.88%

Epoch [3/20]
   Loss: 1140.3326
   Train Accuracy: 42.67%
   Validation Accuracy: 44.38%

Epoch [4/20]
   Loss: 1132.5473
   Train Accuracy: 45.96%
   Validation Accuracy: 47.38%

Epoch [5/20]
   Loss: 1118.2333
   Train Accuracy: 48.50%
   Validation Accuracy: 47.38%

Model saved: affectnet_convnext_epoch5.pt

Epoch [6/20]
   Loss: 1112.8137
   Train Accuracy: 50.36%
   Validation Accuracy: 48.50%

Epoch [7/20]
   Loss: 1103.3352
   Train Accuracy: 51.26%
   Validation Accuracy: 51.50%

Epoch [8/20]
   Loss: 1094.6571
   Train Accuracy: 53.20%
   Validation Accuracy: 53.75%

Epoch [9/20]
   Loss: 1098.1338
   Train Accuracy: 53.67%
   Validation Accuracy: 54.62%

Epoch [10/20]
   Loss: 1099.5441
   Train Accuracy: 54.79%
   Validation Accuracy: 51.50%

Model saved: affectnet_convnext_epoch10.pt

Epoch [11/20