In [1]:
!pip install kaggle --upgrade



Collecting kaggle
  Downloading kaggle-1.7.4.2-py3-none-any.whl.metadata (16 kB)
Downloading kaggle-1.7.4.2-py3-none-any.whl (173 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.2/173.2 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.6.17
    Uninstalling kaggle-1.6.17:
      Successfully uninstalled kaggle-1.6.17
Successfully installed kaggle-1.7.4.2


In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d thienkhonghoc/affectnet -p /content

Dataset URL: https://www.kaggle.com/datasets/thienkhonghoc/affectnet
License(s): unknown


In [4]:
!unzip -q /content/affectnet.zip -d /content/affectnet > /dev/null 2>&1

In [5]:
!pip install torch torchvision timm matplotlib tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from PIL import Image
import os
import numpy as np

# Set device (CPU/GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\nUsing device: {device}")

# Optimize CUDA performance
torch.backends.cudnn.benchmark = True

# Data Augmentation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.2, 0.2), shear=10),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=3)], p=0.3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load AffectNet dataset (Ensure paths are correct)
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"

# Load datasets
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)

# Compute class weights
class_counts = Counter(train_dataset.targets)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
weights = torch.tensor(weights, dtype=torch.float).to(device)

print(f"Class Weights: {weights}")

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)

# Load ConvNeXt-Small model (MODIFY CLASSIFIER BEFORE LOADING CHECKPOINT)
model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)

# **Modify the classifier before loading the checkpoint**
model.classifier[2] = nn.Linear(model.classifier[2].in_features, 8)

# Move model to device
model = model.to(device)

# Load the previously trained model checkpoint
checkpoint_path = "/content/affectnet_convnext_epoch10.pt"
checkpoint = torch.load(checkpoint_path)

# Now we can safely load the state dict
model.load_state_dict(checkpoint)

print("Checkpoint successfully loaded!")

# Define loss function & optimizer
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.AdamW(model.parameters(), lr=3e-5, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

# Training Parameters
best_val_acc = 0.0
early_stopping_patience = 5
epochs_without_improvement = 0

# Continue training for 10 more epochs
start_epoch = 10
num_epochs = 20
save_interval = 5

print("\nContinuing Training...\n")
for epoch in range(start_epoch + 1, num_epochs + 1):
    model.train()
    running_loss = 0.0
    correct_train, total_train = 0, 0

    # Training phase
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)

        loss = criterion(outputs, labels)
        loss.backward()

        # Gradient Clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train

    # Validation phase
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = 100 * correct_val / total_val
    scheduler.step(val_accuracy)

    # Print epoch summary
    print(f"Epoch [{epoch}/{num_epochs}]")
    print(f"   Loss: {running_loss:.4f}")
    print(f"   Train Accuracy: {train_accuracy:.2f}%")
    print(f"   Validation Accuracy: {val_accuracy:.2f}%\n")

    # Save model every 5 epochs
    if (epoch) % save_interval == 0:
        save_path = f"affectnet_convnext_epoch{epoch}.pt"
        torch.save(model.state_dict(), save_path)
        print(f"Model saved: {save_path}\n")

    # Early stopping condition
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= early_stopping_patience:
        print(f"Early stopping triggered. Best validation accuracy: {best_val_acc:.2f}%")
        break

# Save final model
torch.save(model.state_dict(), "affectnet_convnext_20.pt")
print("\nTraining complete! Final model saved.")



Using device: cuda
Class Weights: tensor([ 7.5106,  9.8746,  7.5106,  7.5106,  7.5106,  7.5106,  7.5106, 10.0141],
       device='cuda:0')
Checkpoint successfully loaded!

Continuing Training...





Epoch [11/20]
   Loss: 578.5042
   Train Accuracy: 63.18%
   Validation Accuracy: 57.25%

Epoch [12/20]
   Loss: 565.4945
   Train Accuracy: 64.01%
   Validation Accuracy: 57.50%

Epoch [13/20]
   Loss: 554.8590
   Train Accuracy: 64.72%
   Validation Accuracy: 58.00%

Epoch [14/20]
   Loss: 542.9779
   Train Accuracy: 65.50%
   Validation Accuracy: 58.75%

Epoch [15/20]
   Loss: 533.7010
   Train Accuracy: 66.05%
   Validation Accuracy: 57.38%

Model saved: affectnet_convnext_epoch15.pt

Epoch [16/20]
   Loss: 519.2197
   Train Accuracy: 66.68%
   Validation Accuracy: 56.62%

Epoch [17/20]
   Loss: 506.7439
   Train Accuracy: 67.70%
   Validation Accuracy: 58.25%

Epoch [18/20]
   Loss: 497.0590
   Train Accuracy: 68.29%
   Validation Accuracy: 55.25%

Epoch [19/20]
   Loss: 466.8468
   Train Accuracy: 70.22%
   Validation Accuracy: 58.25%

Early stopping triggered. Best validation accuracy: 58.75%

Training complete! Final model saved.


In [None]:
# Train accuracy is still increasing, meaning the model is learning patterns in the training data.
# Validation accuracy has not improved beyond 58.75%, and even dropped at some epochs.
# Early stopping triggered at 19 epochs, meaning further training may not significantly improve validation performance.