<a href="https://colab.research.google.com/github/RiyanMak/AIM25/blob/Training_pipeline/week5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.model_selection import KFold
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score
import seaborn as sns
from tqdm.notebook import tqdm
import torchvision.transforms as transforms
import kagglehub


# Import from Week 4 file
import week4
from week4 import FacialExpressionCNN

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class FERDataset(Dataset):

    def __init__(self, csv_file, transform=None):
        """
        FER2013 Dataset class
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.test_data = pd.read_csv("/content/test_labels.csv")
        self.train_data = pd.read_csv("/content/train_labels.csv")
        self.fer_data = pd.concat([self.test_data, self.train_data])
        self.transform = transform

        # FER2013 has 7 emotion classes
        self.class_names = {
            0: 'Angry',
            1: 'Disgust',
            2: 'Fear',
            3: 'Happy',
            4: 'Sad',
            5: 'Surprise',
            6: 'Neutral'
        }

    def __len__(self):
        return len(self.fer_data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # FER dataset has pixels stored as space-separated values in a string
        pixels = self.fer_data.iloc[idx, 1].split()
        pixels = np.array(pixels, dtype='float32').reshape(48, 48)
        image = Image.fromarray(pixels.astype('uint8'), 'L')  # 'L' for grayscale

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        # Get label
        label = self.fer_data.iloc[idx, 0]

        return image, label

Using device: cpu


In [10]:
def load_fer_dataset(csv_path):
    # Define transformations
    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])

    val_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])

    # dataset
    full_dataset = FERDataset(csv_file=csv_path, transform=train_transform)
    train_size = int(0.8 * len(full_dataset)) # Split dataset
    val_size = len(full_dataset) - train_size

    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
    # Update validation transforms
    val_dataset.dataset.transform = val_transform
    # make data loaders
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

    return train_loader, val_loader

In [11]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    progress_bar = tqdm(train_loader, desc='Training')

    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)

        # set gradient to zero
        optimizer.zero_grad()

        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        # backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # done by gpt: progress bar
        progress_bar.set_postfix({
            'loss': running_loss / total,
            'acc': 100 * correct / total
        })

    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total

    return epoch_loss, epoch_acc

def validate_model(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc='Validation'):
            images, labels = images.to(device), labels.to(device)

            # forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            # stat/prob
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Store predictions for metrics
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # metrics
    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return epoch_loss, epoch_acc, f1, all_preds, all_labels

In [None]:
def cross_validation(model_class, dataset, fold_count=5, num_epochs=10, device=device):
    kf = KFold(n_splits=fold_count, shuffle=True, random_state=42)
    fold_results = []

    dataset_size = len(dataset)
    indices = list(range(dataset_size))

    for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
        print(f"Fold {fold+1}/{fold_count}")

        # Create data samplers
        train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
        val_sampler = torch.utils.data.SubsetRandomSampler(val_idx)

        # Create data loaders
        train_loader = DataLoader(dataset, batch_size=64, sampler=train_sampler)
        val_loader = DataLoader(dataset, batch_size=64, sampler=val_sampler)

        model = model_class(num_classes=7).to(device)
        model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1).to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)

        best_val_f1 = 0.0

        for epoch in range(num_epochs):
            # Train
            train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)

            # Validate
            val_loss, val_acc, val_f1, _, _ = validate_model(model, val_loader, criterion, device)

            # Scheduler step
            scheduler.step(val_loss)

            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, "
                  f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%, Val F1: {val_f1:.4f}")

            # Save best model
            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
                torch.save(model.state_dict(), f"model_fold{fold+1}.pt")

        # Final validation with best model
        model.load_state_dict(torch.load(f"model_fold{fold+1}.pt"))
        _, val_acc, val_f1, preds, labels = validate_model(model, val_loader, criterion, device)

        # Save fold results
        fold_results.append({
            'fold': fold+1,
            'accuracy': val_acc,
            'f1': val_f1,
            'confusion_matrix': confusion_matrix(labels, preds)
        })

        print(f"Fold {fold+1} Final - Accuracy: {val_acc:.2f}%, F1: {val_f1:.4f}")

    return fold_results

In [12]:
def train_fer_model(csv_path, num_epochs=20):
    """Train facial expression recognition model on FER dataset"""
    train_loader, val_loader = load_fer_dataset(csv_path)

    # Initialize model, loss function, and optimizer using the imported model from Week 4, but adjust for grayscale input
    model = FacialExpressionCNN(num_classes=7).to(device)
    model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1).to(device)
    model.flat_features = 512 * 3 * 3

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)

    # Training loop
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'val_f1': []
    }

    best_val_f1 = 0.0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")

        # Train for one epoch
        train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)

        # Validate
        val_loss, val_acc, val_f1, _, _ = validate_model(model, val_loader, criterion, device)

        # Update learning rate
        scheduler.step(val_loss)

        # Save history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_f1'].append(val_f1)

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%, Val F1: {val_f1:.4f}")

        # Save best model
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_f1': val_f1,
                'val_acc': val_acc,
            }, 'best_fer_model.pt')
            print(f"Model saved with F1 score: {val_f1:.4f}")

    # Final evaluation
    model.load_state_dict(torch.load('best_fer_model.pt')['model_state_dict'])
    _, val_acc, val_f1, all_preds, all_labels = validate_model(model, val_loader, criterion, device)

    # Generate classification report
    report = classification_report(all_labels, all_preds, target_names=[
        'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'
    ])

    print("\nFinal Model Performance:")
    print(f"Accuracy: {val_acc:.2f}%, F1 Score: {val_f1:.4f}")
    print("\nClassification Report:")
    print(report)

    # Plot confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'],
                yticklabels=['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'])
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()

    return model, history

In [13]:
def save_model(model, path='facial_expression_model.pt'):
    """Save model to disk"""
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

def load_model(path='facial_expression_model.pt'):
    """Load model from disk"""
    model = FacialExpressionCNN(num_classes=7)
    model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
    model.flat_features = 512 * 3 * 3

    model.load_state_dict(torch.load(path))
    model.eval()  # Set to evaluation mode
    return model

In [14]:
def evaluate_model(model, val_loader, device):
    """Evaluate model performance on validation set"""
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc='Evaluating'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # metrics
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')

    #report
    report = classification_report(all_labels, all_preds, target_names=[
        'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'
    ])

    # Confusion matrix, not needed at this time
    # cm = confusion_matrix(all_labels, all_preds)

    return {
        'accuracy': accuracy,
        'f1': f1,
        'report': report,
        'confusion_matrix': cm
    }

In [29]:
import kagglehub

if __name__ == "__main__":
    # Download FER dataset from Kaggle via kagglehub
    path = kagglehub.dataset_download("nicolejyt/facialexpressionrecognition")
    print("Path to dataset files:", path)

    # Use the CSV from the downloaded path
    csv_file_path = path + "/fer2013.csv"

    # Define transforms
    transform = transforms.Compose([
        transforms.ToTensor()
    ])

    # Load dataset
    dataset = FERDataset(csv_file_path, transform=transform)

    # Split into train and validation
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

    # Initialize model, optimizer, loss
    model = FacialExpressionCNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Training loop
    num_epochs = 5
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

    # Evaluation
    results = evaluate_model(model, val_loader, device)
    print("Validation Accuracy:", results['accuracy'])
    print("F1 Score:", results['f1'])
    print("Classification Report:")
    print(results['report'])


Path to dataset files: /kaggle/input/facialexpressionrecognition


FileNotFoundError: [Errno 2] No such file or directory: '/content/test_labels.csv'

FileNotFoundError: [Errno 2] No such file or directory: 'test_labels.csv'