In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from torchvision import models, transforms
from torchvision.datasets import ImageFolder

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [3]:
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])


In [5]:
train_dataset = ImageFolder("train", transform=train_transform)
test_dataset = ImageFolder("test", transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

num_classes = len(train_dataset.classes)
print("Classes:", train_dataset.classes)


Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [6]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(loader)


def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return all_labels, all_preds


In [7]:
def get_resnet18(num_classes, partial=True):
    model = models.resnet18(pretrained=True)

    if partial:
        for param in model.parameters():
            param.requires_grad = False

    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model.to(device)


In [8]:
results = {}

for mode in ["partial", "full"]:
    print(f"\nTraining with {mode} fine-tuning")

    model = get_resnet18(num_classes, partial=(mode=="partial"))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

    for epoch in range(5):
        loss = train_one_epoch(model, train_loader, optimizer, criterion)
        print(f"Epoch {epoch+1}: Loss = {loss:.4f}")

    y_true, y_pred = evaluate(model, test_loader)

    results[mode] = {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred, average="weighted"),
        "recall": recall_score(y_true, y_pred, average="weighted"),
        "f1": f1_score(y_true, y_pred, average="weighted"),
        "cm": confusion_matrix(y_true, y_pred)
    }



Training with partial fine-tuning




Epoch 1: Loss = 1.6268
Epoch 2: Loss = 1.5382
Epoch 3: Loss = 1.5147
Epoch 4: Loss = 1.5133
Epoch 5: Loss = 1.5008

Training with full fine-tuning




Epoch 1: Loss = 1.2890
Epoch 2: Loss = 1.1076
Epoch 3: Loss = 1.0368
Epoch 4: Loss = 0.9753
Epoch 5: Loss = 0.9369


In [9]:
for mode, metrics in results.items():
    print(f"\n{mode.upper()} FINE-TUNING")
    for k,v in metrics.items():
        if k != "cm":
            print(f"{k}: {v:.4f}")



PARTIAL FINE-TUNING
accuracy: 0.4287
precision: 0.4235
recall: 0.4287
f1: 0.4078

FULL FINE-TUNING
accuracy: 0.6364
precision: 0.6500
recall: 0.6364
f1: 0.6384


In [10]:
for mode, metrics in results.items():
    print(f"\n{mode.upper()} FINE-TUNING")
    for k,v in metrics.items():
        if k != "cm":
            print(f"{k}: {v:.4f}")



PARTIAL FINE-TUNING
accuracy: 0.4287
precision: 0.4235
recall: 0.4287
f1: 0.4078

FULL FINE-TUNING
accuracy: 0.6364
precision: 0.6500
recall: 0.6364
f1: 0.6384


### Model Choice
ResNet-18 was used for the Facial Expression Recognition task because facial expressions depend on small and subtle features such as eye shape, mouth movement, and facial muscle changes. The residual connections in ResNet-18 help the model learn these features effectively without training issues like degradation. Its moderate depth is suitable for facial datasets, as it is deep enough to capture important patterns while reducing the risk of overfitting. Using a pre-trained ResNet-18 also helps the model start with useful visual features learned from large image datasets.