In [1]:
!pip install kaggle --upgrade



Collecting kaggle
  Downloading kaggle-1.7.4.2-py3-none-any.whl.metadata (16 kB)
Downloading kaggle-1.7.4.2-py3-none-any.whl (173 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.2/173.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.6.17
    Uninstalling kaggle-1.6.17:
      Successfully uninstalled kaggle-1.6.17
Successfully installed kaggle-1.7.4.2


In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d thienkhonghoc/affectnet -p /content

Dataset URL: https://www.kaggle.com/datasets/thienkhonghoc/affectnet
License(s): unknown


In [4]:
!unzip -q /content/affectnet.zip -d /content/affectnet > /dev/null 2>&1

In [5]:
!pip install torch torchvision timm matplotlib tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
#Resumes training from epoch 20
#Reduces learning rate for fine-tuning
#Applies Test-Time Augmentation (TTA)
#Improves data augmentation to balance learning
#Uses early stopping to prevent overfitting

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from PIL import Image
import os
import numpy as np

# Set device (CPU/GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\nUsing device: {device}")

# Optimize CUDA performance
torch.backends.cudnn.benchmark = True

# Data Augmentation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.75, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=3)], p=0.3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load AffectNet dataset
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"

# Load datasets
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)

# Compute class weights
class_counts = Counter(train_dataset.targets)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
weights = torch.tensor(weights, dtype=torch.float).to(device)

print(f"Class Weights: {weights}")

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)

# Load ConvNeXt-Small model
model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)

# **Ensure correct classifier replacement**
# ConvNeXt outputs `[batch_size, 768]`, so we need to ensure correct feature mapping
in_features = model.classifier[2].in_features

model.classifier = nn.Sequential(
    nn.Flatten(),  # Ensure correct tensor shape
    nn.Dropout(p=0.4),  # Dropout for regularization
    nn.Linear(in_features, 8)  # Adjust for 8 emotion classes
)

# Move model to device
model = model.to(device)

# Load the previously trained model checkpoint (Resume from epoch 25)
checkpoint_path = "/content/affectnet_convnext_epoch25.pt"
checkpoint = torch.load(checkpoint_path)

# **Load checkpoint with `strict=False` to ignore classifier mismatches**
model.load_state_dict(checkpoint, strict=False)

print("Checkpoint successfully loaded!")

# Define loss function & optimizer
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.AdamW(model.parameters(), lr=5e-6, weight_decay=5e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)

# Early Stopping Parameters
best_val_acc = 0.0
early_stopping_patience = 4
epochs_without_improvement = 0

# Continue training for 5 more epochs
start_epoch = 25
num_epochs = 30  # Only train for 5 more epochs
save_interval = 5

print("\nContinuing Training with Improved Regularization...\n")
for epoch in range(start_epoch + 1, num_epochs + 1):
    model.train()
    running_loss = 0.0
    correct_train, total_train = 0, 0

    # Training phase
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()

        # Gradient Clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train

    # Validation phase
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = 100 * correct_val / total_val
    scheduler.step()

    # Print epoch summary
    print(f"Epoch [{epoch}/{num_epochs}]")
    print(f"   Loss: {running_loss:.4f}")
    print(f"   Train Accuracy: {train_accuracy:.2f}%")
    print(f"   Validation Accuracy: {val_accuracy:.2f}%\n")

    # Save model every 5 epochs
    if (epoch) % save_interval == 0:
        save_path = f"affectnet_convnext_epoch{epoch}.pt"
        torch.save(model.state_dict(), save_path)
        print(f"Model saved: {save_path}\n")

    # Early stopping condition
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= early_stopping_patience:
        print(f"Early stopping triggered. Best validation accuracy: {best_val_acc:.2f}%")
        break

# Save final model
torch.save(model.state_dict(), "affectnet_convnext_final.pt")
print("\nTraining complete! Final model saved.")



Using device: cuda
Class Weights: tensor([ 7.5106,  9.8746,  7.5106,  7.5106,  7.5106,  7.5106,  7.5106, 10.0141],
       device='cuda:0')


Downloading: "https://download.pytorch.org/models/convnext_small-0c510722.pth" to /root/.cache/torch/hub/checkpoints/convnext_small-0c510722.pth
100%|██████████| 192M/192M [00:01<00:00, 160MB/s]


Checkpoint successfully loaded!

Continuing Training with Improved Regularization...

Epoch [26/30]
   Loss: 570.8982
   Train Accuracy: 70.79%
   Validation Accuracy: 58.88%

Epoch [27/30]
   Loss: 474.4691
   Train Accuracy: 71.84%
   Validation Accuracy: 58.75%

Epoch [28/30]
   Loss: 451.3524
   Train Accuracy: 73.29%
   Validation Accuracy: 58.75%

Epoch [29/30]
   Loss: 434.6409
   Train Accuracy: 73.96%
   Validation Accuracy: 59.62%

Epoch [30/30]
   Loss: 423.7413
   Train Accuracy: 74.71%
   Validation Accuracy: 58.75%

Model saved: affectnet_convnext_epoch30.pt


Training complete! Final model saved.


In [7]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# Set device (CPU/GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define standard normalization
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load AffectNet Validation Set
val_data_path = "/content/affectnet/AffectNet/val"
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)

# Load Trained Model
model = models.convnext_small()
model.classifier = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Dropout(p=0.4),
    torch.nn.Linear(768, 8)
)

checkpoint_path = "/content/affectnet_convnext_epoch30.pt"
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint)
model = model.to(device)
model.eval()

# Evaluation
y_true, y_pred = [], []
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

# Print Metrics
print("\nClassification Report:\n", classification_report(y_true, y_pred, target_names=val_dataset.classes))
print("\nConfusion Matrix:\n", confusion_matrix(y_true, y_pred))



Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.52      0.54       100
           1       0.60      0.56      0.58       100
           2       0.72      0.57      0.64       100
           3       0.67      0.75      0.71       100
           4       0.67      0.68      0.67       100
           5       0.56      0.53      0.55       100
           6       0.44      0.47      0.46       100
           7       0.50      0.61      0.55       100

    accuracy                           0.59       800
   macro avg       0.59      0.59      0.59       800
weighted avg       0.59      0.59      0.59       800


Confusion Matrix:
 [[52 11  4  1  6  5 11 10]
 [14 56  2  2 10  2  9  5]
 [ 0  6 57  4  6 20  6  1]
 [ 0  3  0 75  0  2  3 17]
 [ 7  7  1  2 68  1 13  1]
 [ 4  4 13  5  4 53 10  7]
 [ 7  5  1  3  8  9 47 20]
 [ 7  2  1 20  0  2  7 61]]


Class 3 has the best performance (F1-score = 0.71), indicating the model identifies it well.

Class 6 has the worst performance (F1-score = 0.46), showing it is frequently misclassified.

Precision is better than recall for some classes, meaning the model makes good predictions when it's confident, but it struggles to detect some classes.

Confusion matrix shows common misclassifications:
Class 0 → Misclassified as Class 1 & 6
Class 6 → Frequently confused with 4 & 7
Class 7 → Confused with 3 & 6


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from PIL import Image
import os

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Improved Data Augmentation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.2, 0.2), shear=10),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=3)], p=0.3),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load dataset
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)

# Compute class weights
class_counts = Counter(train_dataset.targets)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[i] for i in range(len(class_counts))]

# Increase weight for weak classes
weights[6] *= 1.5
weights[7] *= 1.3
weights = torch.tensor(weights, dtype=torch.float).to(device)

# Load data
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)

# Load ConvNeXt-Small
model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)
model.classifier[2] = nn.Linear(model.classifier[2].in_features, 8)
model = model.to(device)

# Load Checkpoint


# Load the previously trained model checkpoint (Resume from epoch 25)
checkpoint_path = "/content/affectnet_convnext_epoch30.pt"
checkpoint = torch.load(checkpoint_path)

# **Load checkpoint with `strict=False` to ignore classifier mismatches**
model.load_state_dict(checkpoint, strict=False)

print("Checkpoint successfully loaded!")

# Define optimizer & scheduler
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.AdamW(model.parameters(), lr=1e-6, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-7)

# Fine-tune for 10 more epochs
for epoch in range(31, 41):
    model.train()
    running_loss, correct_train, total_train = 0.0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train
    scheduler.step()

    print(f"Epoch [{epoch}/40], Loss: {running_loss:.4f}, Train Acc: {train_accuracy:.2f}%")

    if epoch % 5 == 0:
        torch.save(model.state_dict(), f"affectnet_convnext_epoch{epoch}.pt")

torch.save(model.state_dict(), "affectnet_convnext_final.pt")


Checkpoint successfully loaded!
