In [1]:
!pip install kaggle --upgrade



Collecting kaggle
  Downloading kaggle-1.7.4.2-py3-none-any.whl.metadata (16 kB)
Downloading kaggle-1.7.4.2-py3-none-any.whl (173 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.2/173.2 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.6.17
    Uninstalling kaggle-1.6.17:
      Successfully uninstalled kaggle-1.6.17
Successfully installed kaggle-1.7.4.2


In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d thienkhonghoc/affectnet -p /content

Dataset URL: https://www.kaggle.com/datasets/thienkhonghoc/affectnet
License(s): unknown


In [4]:
!unzip -q /content/affectnet.zip -d /content/affectnet > /dev/null 2>&1

In [5]:
!pip install torch torchvision timm matplotlib tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
#Lower Learning Rate (1e-6)
#Better Regularization (dropout=0.5, label_smoothing=0.3)
#More Controlled Data Augmentation
#Stochastic Weight Averaging

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from torch.cuda.amp import autocast, GradScaler
import numpy as np

# Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Optimized Data Augmentation (Less Aggressive)
transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomGrayscale(p=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load Dataset
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)

# Compute Class Weights
class_counts = Counter(train_dataset.targets)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
weights[6] *= 1.5
weights[7] *= 1.3
weights = torch.tensor(weights, dtype=torch.float).to(device)

# Load Data (Batch Size Adjusted)
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# Load Model (ConvNeXt-Small)
model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)

# Load Checkpoint
checkpoint_path = "/content/affectnet_convnext_epoch50.pt"
checkpoint = torch.load(checkpoint_path, map_location=device)

# Remove classifier keys from checkpoint (to prevent mismatches)
for key in list(checkpoint.keys()):
    if "classifier" in key:
        del checkpoint[key]

# Load checkpoint into model
model.load_state_dict(checkpoint, strict=False)

# Modify Classifier AFTER Loading Checkpoint
model.classifier[2] = nn.Sequential(
    nn.Dropout(0.5),  # 🔹 Increased Dropout
    nn.Linear(model.classifier[2].in_features, 8)
)

print("Checkpoint successfully loaded!")

# Move Model to Device
model = model.to(device)

# Define Loss, Optimizer & Scheduler
criterion = nn.CrossEntropyLoss(weight=weights, label_smoothing=0.3)
optimizer = optim.AdamW(model.parameters(), lr=1e-6, weight_decay=1e-6)

# Dynamic Learning Rate (ReduceLROnPlateau)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

# Stochastic Weight Averaging (SWA)
swa_model = optim.swa_utils.AveragedModel(model)
swa_scheduler = optim.swa_utils.SWALR(optimizer, swa_lr=5e-7, anneal_strategy="cos", anneal_epochs=5)

# Mixed Precision Training
scaler = GradScaler()

# Fine-tune for 10 More Epochs (51-60)
best_val_acc = 0.0
early_stopping_patience = 3
epochs_without_improvement = 0

print("\nFine-tuning with Stronger Regularization and Stability...\n")

for epoch in range(51, 61):
    model.train()
    running_loss, correct_train, total_train = 0.0, 0, 0

    optimizer.zero_grad()

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward Pass with Mixed Precision
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backpropagation with Mixed Precision
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train

    # Validation Phase
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = 100 * correct_val / total_val

    # Fix: Move `scheduler.step(val_accuracy)` Below Validation Phase
    scheduler.step(val_accuracy)

    print(f"Epoch [{epoch}/60], Loss: {running_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

    # Save Model Every 5 Epochs
    if epoch % 5 == 0:
        torch.save(model.state_dict(), f"affectnet_convnext_epoch{epoch}.pt")
        print(f"Model saved: affectnet_convnext_epoch{epoch}.pt")

    #  Early Stopping
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= early_stopping_patience:
        print(f"Early stopping triggered. Best validation accuracy: {best_val_acc:.2f}%")
        break


Checkpoint successfully loaded!

Fine-tuning with Stronger Regularization and Stability...



  scaler = GradScaler()
  with autocast():


Epoch [51/60], Loss: 586.3003, Train Acc: 29.00%, Val Acc: 54.62%
Epoch [52/60], Loss: 508.5014, Train Acc: 52.75%, Val Acc: 57.50%
Epoch [53/60], Loss: 473.3962, Train Acc: 63.02%, Val Acc: 58.38%
Epoch [54/60], Loss: 457.1307, Train Acc: 67.58%, Val Acc: 58.75%
Epoch [55/60], Loss: 448.0179, Train Acc: 69.71%, Val Acc: 58.88%
Model saved: affectnet_convnext_epoch55.pt
Epoch [56/60], Loss: 442.8233, Train Acc: 71.13%, Val Acc: 58.50%
Epoch [57/60], Loss: 438.4933, Train Acc: 72.11%, Val Acc: 56.62%
Epoch [58/60], Loss: 435.3070, Train Acc: 73.06%, Val Acc: 58.38%
Early stopping triggered. Best validation accuracy: 58.88%


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from torch.cuda.amp import autocast, GradScaler
import numpy as np

# Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Optimized Data Augmentation (Prevent Overfitting)
transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.85, 1.0)),  # Avoid aggressive cropping
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(5),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.RandomGrayscale(p=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load Dataset
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)

# Compute Class Weights
class_counts = Counter(train_dataset.targets)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
weights[6] *= 1.5
weights[7] *= 1.3
weights = torch.tensor(weights, dtype=torch.float).to(device)

# Load Data (Batch Size Adjusted)
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# Load Model (ConvNeXt-Small)
model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)

# Load Checkpoint
checkpoint_path = "/content/affectnet_convnext_epoch55.pt"  # Resuming from last checkpoint
checkpoint = torch.load(checkpoint_path, map_location=device)

# Remove classifier keys from checkpoint (to prevent mismatches)
for key in list(checkpoint.keys()):
    if "classifier" in key:
        del checkpoint[key]

# Load checkpoint into model
model.load_state_dict(checkpoint, strict=False)

# Modify Classifier AFTER Loading Checkpoint
model.classifier[2] = nn.Sequential(
    nn.Dropout(0.6),  # 🔹 Increased Dropout
    nn.Linear(model.classifier[2].in_features, 8)
)

print("Checkpoint successfully loaded!")

# Move Model to Device
model = model.to(device)

# Define Loss, Optimizer & Scheduler
criterion = nn.CrossEntropyLoss(weight=weights, label_smoothing=0.3)
optimizer = optim.AdamW(model.parameters(), lr=1e-7, weight_decay=1e-5)  # 🔹 Lower LR, Higher weight decay

# Learning Rate Scheduler (Cosine Annealing with Warm Restarts)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=5, T_mult=2)

# Mixed Precision Training
scaler = GradScaler()

# Fine-tune for 10 More Epochs (56-65)
best_val_acc = 0.0
early_stopping_patience = 5  # 🔹 Increased patience
epochs_without_improvement = 0

print("\nFine-tuning with Better Regularization...\n")

for epoch in range(56, 66):
    model.train()
    running_loss, correct_train, total_train = 0.0, 0, 0

    optimizer.zero_grad()

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward Pass with Mixed Precision
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backpropagation with Mixed Precision
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train

    # Validation Phase
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = 100 * correct_val / total_val

    # Apply Scheduler Step After Validation
    scheduler.step()

    print(f"Epoch [{epoch}/65], Loss: {running_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

    # Save Model Every 5 Epochs
    if epoch % 5 == 0:
        torch.save(model.state_dict(), f"affectnet_convnext_epoch{epoch}.pt")
        print(f"Model saved: affectnet_convnext_epoch{epoch}.pt")

    # Early Stopping
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= early_stopping_patience:
        print(f"Early stopping triggered. Best validation accuracy: {best_val_acc:.2f}%")
        break

# Save Final Model
torch.save(model.state_dict(), "affectnet_convnext_final.pt")
print("\nTraining complete! Final model saved.")


Checkpoint successfully loaded!

Fine-tuning with Better Regularization...



  scaler = GradScaler()
  with autocast():


Epoch [56/65], Loss: 673.9786, Train Acc: 10.41%, Val Acc: 10.12%
Epoch [57/65], Loss: 655.7998, Train Acc: 13.08%, Val Acc: 17.38%
Epoch [58/65], Loss: 643.4017, Train Acc: 15.10%, Val Acc: 23.12%
Epoch [59/65], Loss: 633.9894, Train Acc: 17.13%, Val Acc: 26.50%
Epoch [60/65], Loss: 631.8518, Train Acc: 17.66%, Val Acc: 26.75%
Model saved: affectnet_convnext_epoch60.pt
Epoch [61/65], Loss: 622.4510, Train Acc: 19.74%, Val Acc: 34.50%
Epoch [62/65], Loss: 608.9430, Train Acc: 23.02%, Val Acc: 39.50%
Epoch [63/65], Loss: 596.3580, Train Acc: 26.29%, Val Acc: 42.38%
Epoch [64/65], Loss: 589.6599, Train Acc: 28.05%, Val Acc: 45.38%
Epoch [65/65], Loss: 581.9471, Train Acc: 30.44%, Val Acc: 47.00%
Model saved: affectnet_convnext_epoch65.pt

Training complete! Final model saved.


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from torch.cuda.amp import autocast, GradScaler
import numpy as np

# Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Augmentation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(8),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.RandomGrayscale(p=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load Dataset
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)

# Compute Class Weights
class_counts = Counter(train_dataset.targets)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
weights[6] *= 1.5
weights[7] *= 1.3
weights = torch.tensor(weights, dtype=torch.float).to(device)

# Load Data
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# Load Model (ConvNeXt-Small)
model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)

# Load Checkpoint from Epoch 65
checkpoint_path = "/content/affectnet_convnext_epoch65.pt"
checkpoint = torch.load(checkpoint_path, map_location=device)
for key in list(checkpoint.keys()):
    if "classifier" in key:
        del checkpoint[key]
model.load_state_dict(checkpoint, strict=False)

# Modify Classifier AFTER Loading Checkpoint
model.classifier[2] = nn.Sequential(
    nn.Dropout(0.5),  # Increased dropout for better generalization
    nn.Linear(model.classifier[2].in_features, 8)
)

print("Checkpoint successfully loaded!")

# Move Model to Device
model = model.to(device)

# Define Loss, Optimizer & Scheduler
criterion = nn.CrossEntropyLoss(weight=weights, label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=2e-5, weight_decay=5e-6)

# OneCycleLR Scheduler
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=5e-4,
                                                steps_per_epoch=len(train_loader),
                                                epochs=15)

# Mixed Precision Training
scaler = GradScaler()

# Fine-tune for 15 More Epochs (66-80)
best_val_acc = 0.0
early_stopping_patience = 3
epochs_without_improvement = 0

print("\nFine-tuning from Epoch 66-80...\n")

for epoch in range(66, 81):
    model.train()
    running_loss, correct_train, total_train = 0.0, 0, 0

    optimizer.zero_grad()

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train
    scheduler.step()

    # Validation Phase
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = 100 * correct_val / total_val

    print(f"Epoch [{epoch}/80], Loss: {running_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

    # Save Model Every 5 Epochs
    if epoch % 5 == 0:
        torch.save(model.state_dict(), f"affectnet_convnext_epoch{epoch}.pt")
        print(f"Model saved: affectnet_convnext_epoch{epoch}.pt")

    # Early Stopping
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= early_stopping_patience:
        print(f"Early stopping triggered. Best validation accuracy: {best_val_acc:.2f}%")
        break

# Save Final Model
torch.save(model.state_dict(), "affectnet_convnext_final.pt")
print("\nTraining complete! Final model saved.")


Checkpoint successfully loaded!

Fine-tuning from Epoch 66-80...



  scaler = GradScaler()
  with autocast():


Epoch [66/80], Loss: 345.5571, Train Acc: 67.06%, Val Acc: 58.88%
Epoch [67/80], Loss: 294.6591, Train Acc: 75.61%, Val Acc: 56.50%
Epoch [68/80], Loss: 282.5745, Train Acc: 77.54%, Val Acc: 57.12%
Epoch [69/80], Loss: 274.6962, Train Acc: 78.81%, Val Acc: 58.50%
Early stopping triggered. Best validation accuracy: 58.88%

Training complete! Final model saved.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from collections import Counter
from torch.cuda.amp import autocast, GradScaler
import numpy as np

# Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Augmentation (Slightly Stronger)
transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.75, 1.0)),  # Less cropping
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(6),  # Lower rotation
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomGrayscale(p=0.05),  # Less grayscale conversion
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load Dataset
train_data_path = "/content/affectnet/AffectNet/train"
val_data_path = "/content/affectnet/AffectNet/val"
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_data_path, transform=transform)

# Compute Class Weights
class_counts = Counter(train_dataset.targets)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
weights = torch.tensor(weights, dtype=torch.float).to(device)

# Load Data
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# Load Model (ConvNeXt-Small)
model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)

#  Load Checkpoint from Epoch 69
checkpoint_path = "/content/affectnet_convnext_epoch65.pt"
checkpoint = torch.load(checkpoint_path, map_location=device)
for key in list(checkpoint.keys()):
    if "classifier" in key:
        del checkpoint[key]
model.load_state_dict(checkpoint, strict=False)

#  Modify Classifier AFTER Loading Checkpoint
model.classifier[2] = nn.Sequential(
    nn.Dropout(0.3),  # 🔹 Reduced dropout
    nn.Linear(model.classifier[2].in_features, 8)
)

print("Checkpoint successfully loaded!")

#  Move Model to Device
model = model.to(device)

#  Define Loss, Optimizer & Scheduler
criterion = nn.CrossEntropyLoss(weight=weights, label_smoothing=0.05)
optimizer = optim.AdamW(model.parameters(), lr=3e-5, weight_decay=1e-6)

# OneCycleLR Scheduler
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=5e-4,
                                                steps_per_epoch=len(train_loader),
                                                epochs=10)

#  Mixed Precision Training
scaler = GradScaler()

#  Fine-tune for 10 More Epochs (70-80)
best_val_acc = 58.88  # Start from previous best
early_stopping_patience = 3
epochs_without_improvement = 0

print("\nFine-tuning from Epoch 70-80...\n")

for epoch in range(70, 81):
    model.train()
    running_loss, correct_train, total_train = 0.0, 0, 0

    optimizer.zero_grad()

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = 100 * correct_train / total_train
    scheduler.step()

    # Validation Phase
    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = 100 * correct_val / total_val

    print(f"Epoch [{epoch}/80], Loss: {running_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

    #  Save Model Every 5 Epochs
    if epoch % 5 == 0:
        torch.save(model.state_dict(), f"affectnet_convnext_epoch{epoch}.pt")
        print(f"Model saved: affectnet_convnext_epoch{epoch}.pt")

    #  Early Stopping
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= early_stopping_patience:
        print(f"Early stopping triggered. Best validation accuracy: {best_val_acc:.2f}%")
        break

#  Save Final Model
torch.save(model.state_dict(), "affectnet_convnext_final.pt")
print("\nTraining complete! Final model saved.")


Checkpoint successfully loaded!

Fine-tuning from Epoch 70-80...



  scaler = GradScaler()
  with autocast():


Epoch [70/80], Loss: 297.4104, Train Acc: 70.18%, Val Acc: 58.38%
Model saved: affectnet_convnext_epoch70.pt
Epoch [71/80], Loss: 238.0796, Train Acc: 77.74%, Val Acc: 58.00%
