In [1]:
import torchvision.transforms as transforms

train_transform = transforms.Compose([
    # Spatial augmentation
    transforms.RandomResizedCrop(
        224,
        scale=(0.8, 1.0),
        ratio=(0.9, 1.1)
    ),

    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=8),

    # Photometric augmentation
    transforms.ColorJitter(
        brightness=0.4,
        contrast=0.4,
        saturation=0.2,
        hue=0.05
    ),

    # Blur / noise (camera realism)
    transforms.RandomApply(
        [transforms.GaussianBlur(kernel_size=3)],
        p=0.2
    ),

    # Convert to tensor
    transforms.ToTensor(),

    # Normalization (VERY important)
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),

    # Regularization trick
    transforms.RandomErasing(
        p=0.25,
        scale=(0.02, 0.12),
        ratio=(0.3, 3.3),
        value='random'
    )
])


In [2]:
val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [3]:
import os
from torchvision import datasets
from torch.utils.data import DataLoader

DATA_PATH = r"C:\Users\Noureen\OneDrive\Documents\DL\Vision-Based-Driver-Fatigue-Monitoring-and-Alert-System\DATA\archive\Driver Drowsiness Dataset (DDD)\DATA_PREPROCESSED"

train_ds = datasets.ImageFolder(
    root=os.path.join(DATA_PATH, "train"),
    transform=train_transform
)

val_ds = datasets.ImageFolder(
    root=os.path.join(DATA_PATH, "val"),
    transform=val_test_transform
)

train_loader = DataLoader(
    train_ds,
    batch_size=32,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_loader = DataLoader(
    val_ds,
    batch_size=32,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [4]:
import torch
import torch.nn as nn
from torchvision import models
from torchvision.models import ResNet50_Weights

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load ResNet50 with official pretrained weights (new API)
model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

# Freeze backbone
for param in model.parameters():
    param.requires_grad = False

# Replace classifier head
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, 2)
)

model = model.to(device)


In [5]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(
    model.fc.parameters(),
    lr=1e-4,
    weight_decay=1e-4   # IMPORTANT
)


In [8]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.3,
    patience=3
)


In [None]:
from tqdm import tqdm

EPOCHS = 20
best_val_loss = float("inf")

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0

    for imgs, labels in tqdm(train_loader):
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    # Validation
    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_loss /= len(val_loader)
    val_acc = correct / total

    scheduler.step(val_loss)

    print(f"""
    Epoch {epoch+1}/{EPOCHS}
    Train Loss: {train_loss:.4f}
    Val Loss:   {val_loss:.4f}
    Val Acc:    {val_acc:.4f}
    """)

    # Early stopping checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_model.pth")


  super().__init__(loader)
 19%|█▊        | 170/915 [07:35<29:40,  2.39s/it]

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader
import os

TEST_PATH = os.path.join(DATA_PATH, "test")

test_ds = datasets.ImageFolder(
    root=TEST_PATH,
    transform=val_test_transform   # NO augmentation on test
)

test_loader = DataLoader(
    test_ds,
    batch_size=32,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

print("Test samples:", len(test_ds))


In [None]:
model.load_state_dict(torch.load("best_model.pth", map_location=device))
model.to(device)
model.eval()


In [None]:
import torch

test_loss = 0
correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        outputs = model(imgs)
        loss = criterion(outputs, labels)

        test_loss += loss.item()

        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_loss /= len(test_loader)
test_acc = correct / total

print(f"""
========================
TEST RESULTS
========================
Test Loss: {test_loss:.4f}
Test Acc:  {test_acc:.4f}
""")
