## Deep Learning Facial Expression Model Building
Ethan Assefa, Thomas Burrell, Tatev Gomstyan

## Environment Set-Up

Begin by loading packages needed and creating pipeline to bring in data:

In [2]:
# necessary packages
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import numpy as np
#import matplotlib.pyplot as plt
from torchvision import transforms, datasets
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import os
from torch.utils.data import Dataset
from PIL import Image
from torch.utils.data import DataLoader
from tqdm import tqdm  # Import tqdm for the progress bar

In [3]:
# Check if GPU is available and set the default device to GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f'GPU is available: {torch.cuda.get_device_name(0)}')
else:
    device = torch.device("cpu")
    print('GPU is not available, using CPU instead.')

GPU is available: NVIDIA GeForce RTX 4090


In [None]:
TRAIN_DIR = ' DS6050 Final Project/face_expression_archive/images/train'
TEST_DIR = ' DS6050 Final Project/face_expression_archive/images/validation'

In [5]:
# collects image file and organizes for processing
def createdataframe(dir):
    image_paths = []
    labels = []
    for label in os.listdir(dir):
        for imagename in os.listdir(os.path.join(dir, label)):
            image_paths.append(os.path.join(dir, label, imagename))
            labels.append(label)
        print(label, "completed")
    return image_paths, labels

In [6]:
# Brings in training data
train = pd.DataFrame()
train['image'], train['label'] = createdataframe(TRAIN_DIR)
print(train.head(3))

angry completed
disgust completed
fear completed
happy completed
neutral completed
sad completed
surprise completed
                                               image  label
0  C:/Users/affes/OneDrive/Documents/UVA SDS Spri...  angry
1  C:/Users/affes/OneDrive/Documents/UVA SDS Spri...  angry
2  C:/Users/affes/OneDrive/Documents/UVA SDS Spri...  angry


In [7]:
# Brings in testing data
test = pd.DataFrame()
test['image'], test['label'] = createdataframe(TEST_DIR)
print(test.head(3))

angry completed
disgust completed
fear completed
happy completed
neutral completed
sad completed
surprise completed
                                               image  label
0  C:/Users/affes/OneDrive/Documents/UVA SDS Spri...  angry
1  C:/Users/affes/OneDrive/Documents/UVA SDS Spri...  angry
2  C:/Users/affes/OneDrive/Documents/UVA SDS Spri...  angry


In [None]:
# Path to check
path_to_check = ' DS6050 Final Project/face_expression_archive/images/train'

# Iterate over files and try to open them
for dirpath, dirnames, filenames in os.walk(path_to_check):
    for filename in filenames:
        filepath = os.path.join(dirpath, filename)
        try:
            with open(filepath, 'rb') as f:
                pass  # File can be opened
        except Exception as e:
            print(f"Cannot open file {filepath}: {e}")

## Preprocessing Data

Two key methods for preprocessing:

1. Data augmentation
  - This helps to create a more robust model by artificially expanding the dataset using variations of the training data through transformations like rotation, zoom, flips, etc. It's particularly effective in scenarios like facial expression recognition, where expressions can vary widely in different lighting, angles, and facial positions.
2. Feature Standardization
  - Standardizing images by scaling pixel values to have mean 0 and variance 1 helps in speeding up the training process and leads to faster convergence.


In [9]:
# Define transformations to the training data for data augmentation
data_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(), # Randomly flip images horizontally
    transforms.RandomRotation(10), # Randomly rotate images by up to 10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Randomly jitter color
    transforms.RandomResizedCrop(224), # Randomly crop and resize images to 224x224
    transforms.ToTensor(), # Convert images to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Standardize images
])

# Function to create specific emotion dataset:
def prepare_emotion_dataset(dataframe, target_emotion):
    # Create hard copy of dataframe
    ouputs = dataframe.copy()
    # target_emotion should be the encoded label for the desired emotion
    # Add a new binary column for the target emotion
    ouputs['target'] = (ouputs['label'] == target_emotion).astype(int)
    return ouputs

# Class for DataLoader setup, use the 'target' column for labels
class FacialExpressionDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]  # Where first column is image path
        image = Image.open(img_path).convert('RGB')
        label = self.dataframe.iloc[idx, 2]  # Where 'target' is in the third column

        if self.transform:
            image = self.transform(image)

        return image, label

In [10]:
# Before having the emotion-specific models, would need to have the dataset with all labels numerically coded
# Initialize label encoder
label_encoder = LabelEncoder()
# Properly encode the labels
train['label'] = label_encoder.fit_transform(train['label'])
test['label'] = label_encoder.transform(test['label'])

In [11]:
# Check encoded labels
print("Encoded Train Labels:", label_encoder.classes_)
print("Test Data after Encoding:", test['label'].unique())

# Create list of emotions
emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

# Further checks before preparing datasets
for emotion in emotions:
    emotion_label = label_encoder.transform([emotion])[0]
    print(f"Encoding for {emotion}: {emotion_label}")
    # Prepare datasets

Encoded Train Labels: ['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']
Test Data after Encoding: [0 1 2 3 4 5 6]
Encoding for angry: 0
Encoding for disgust: 1
Encoding for fear: 2
Encoding for happy: 3
Encoding for neutral: 4
Encoding for sad: 5
Encoding for surprise: 6


In [12]:
# Create function for piecemeal training and test sets (one for each emotion)
def piecemeal_train_test(emotion, batch_num):
    # Determine emotion coding
    emotion_lbl = label_encoder.transform([emotion])[0]

    # Train data
    train_dataset = prepare_emotion_dataset(train, emotion_lbl)
    train_dataset = FacialExpressionDataset(dataframe=train_dataset, transform=data_transforms)
    train_loader = DataLoader(train_dataset, batch_size=batch_num, shuffle=True)

    # Test data
    test_dataset = prepare_emotion_dataset(test, emotion_lbl)
    test_dataset = FacialExpressionDataset(dataframe=test_dataset, transform=transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]))
    test_loader = DataLoader(test_dataset, batch_size=batch_num, shuffle=False)
    
    return train_loader, test_loader

## Model Generation
We create individual emotion models to focus on learning specific emotions, then we create a multi-class model for all emotions to focus on general learning. These will all be combined in an ensemble model later on. Three methods incorporated in the model architecture:

1. Dropout layer
  - Dropout is a form of regularization technique that helps prevent overfitting in neural networks. It does this by randomly setting a fraction of input units to 0 at each update during training time, which helps to make the model more robust as it cannot rely on any single set of features.
2. Regularization Techniques
  - Besides dropout, other regularization techniques include L1 and L2 regularization, which add a penalty on the size of coefficients. L2 regularization (weight decay in deep learning) is especially common.
3. Batch Normalization
  - This technique normalizes the input layer by adjusting and scaling activations. It allows each layer of a network to learn by itself a little bit more independently of other layers, which can speed up training and improve the final performance of the network.

### Individual Emotion Models
We begin by creating, training, and testing seven seperate models - one for each emotion class.

In [13]:
# Class for the emotion model assuming a batch size of 32
class EmotionModel(nn.Module):
    def __init__(self):
        super(EmotionModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32), # Batch normalization
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(0.25), # 25% dropout
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(0.25) # 25% dropout
        )
        self.classifier = nn.Sequential(
            nn.Linear(64 * 56 * 56, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5), # 50% dropout
            nn.Linear(256, 1)  # Binary classification for specific emotion vs not
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.classifier(x)
        return x

# Function to evaluate the model on the test set
def evaluate_model(model, test_loader, device):
    model.eval()  # Set the model to evaluation mode
    total = 0
    correct = 0

    with torch.no_grad():  # Disable gradient calculation for evaluation, which saves memory and computations
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device).float()
            outputs = model(images)
            predicted = torch.sigmoid(outputs).round()  # Apply sigmoid and round to get binary predictions
            total += labels.size(0)
            correct += (predicted == labels.unsqueeze(1)).sum().item()

    accuracy = 100 * correct / total
    return accuracy

In [59]:
# Set device as GPU if avaliable
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define parameters
num_epochs = 30
batch_size = 32
learn_rate = 0.00001
weight_decay = 1e-5

# Define the 7 emotion categories
emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

# Create dictionary of models
models = {}
# Dictionary to store accuracies
accuracies = {}

# Automate training for each emotion
for emotion in emotions:
    train_loader, test_loader = piecemeal_train_test(emotion, batch_size)
    model = EmotionModel().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate, weight_decay=weight_decay)
    criterion = nn.BCEWithLogitsLoss()

    print(f"Training for emotion: {emotion}")
    for epoch in range(num_epochs):
        epoch_loss = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)  # Progress bar for each epoch
        for images, labels in progress_bar:
            images = images.to(device)
            labels = labels.to(device).float()
            outputs = model(images)
            labels = labels.unsqueeze(1)
            loss = criterion(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            progress_bar.set_postfix(loss=epoch_loss/len(train_loader))  # Update progress bar with average loss

    # After training, evaluate on the test set
    accuracy = evaluate_model(model, test_loader, device)
    accuracies[emotion] = accuracy
    print(f"Testing for emotion: {emotion} - Accuracy: {accuracy:.2f}%")
    
    # Add emotion model to dictionary for all the emotions
    models[emotion] = model

Training for emotion: angry


                                                                           

Testing for emotion: angry - Accuracy: 86.41%
Training for emotion: disgust


                                                                            

Testing for emotion: disgust - Accuracy: 98.43%
Training for emotion: fear


                                                                           

Testing for emotion: fear - Accuracy: 85.59%
Training for emotion: happy


                                                                           

Testing for emotion: happy - Accuracy: 74.17%
Training for emotion: neutral


                                                                           

Testing for emotion: neutral - Accuracy: 82.79%
Training for emotion: sad


                                                                           

Testing for emotion: sad - Accuracy: 83.88%
Training for emotion: surprise


                                                                           

Testing for emotion: surprise - Accuracy: 88.72%


### Multi-class General Model
We then create a single model for classifying all seven emotions for general learning.

In [14]:
class MultiClassCNN(nn.Module):
    def __init__(self, num_classes=7):
        super(MultiClassCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(64),  # Batch normalization layer after convolution
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=0.3),  # Dropout layer after pooling

            nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(128),  # Batch normalization layer after convolution
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=0.4),  # Increased dropout rate for deeper layers

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),  # Batch normalization layer after convolution
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=0.5)  # Further increased dropout rate for even deeper layers
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 28 * 28, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),  # Dropout before the final layer
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.classifier(x)
        return x

In [15]:
class FullTestDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]  # Assuming the first column is images
        image = Image.open(img_path).convert('RGB')  # Convert the image file path to a PIL image
        label = self.dataframe.iloc[idx, 1]  # Assuming the second column is labels
        if self.transform:
            image = self.transform(image)
        return image, label

# Transformations
test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# Create the DataLoader for the full test set
full_test_dataset = FullTestDataset(test, transform=test_transform)
full_test_loader = DataLoader(full_test_dataset, batch_size=32, shuffle=False)

# Create the DataLoader for the full train set
full_train_dataset = FullTestDataset(train, transform=data_transforms)
full_train_loader = DataLoader(full_train_dataset, batch_size=32, shuffle=True)

In [19]:
# Set device as GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define training parameters
num_epochs = 50
batch_size = 64
learn_rate = 0.001
weight_decay = 1e-4

# Prepare your data loaders
#full_train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#full_test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Instantiate and set up the model
multimodel = MultiClassCNN(num_classes=7).to(device)
optimizer = torch.optim.Adam(multimodel.parameters(), lr=learn_rate, weight_decay=weight_decay) # Added weight decay for L2 regularization
criterion = nn.CrossEntropyLoss()

# Train the model
print("Training the multi-class model")
for epoch in range(num_epochs):
    multimodel.train()
    epoch_loss = 0
    progress_bar = tqdm(full_train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    for images, labels in progress_bar:
        images = images.to(device)
        labels = labels.to(device).long()  # Convert labels to long

        optimizer.zero_grad()
        outputs = multimodel(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        progress_bar.set_postfix(loss=epoch_loss / len(full_train_loader))

# Evaluate the model
def evaluate_model(model, full_test_loader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for images, labels in full_test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    print(f"Testing accuracy: {accuracy * 100:.2f}%")

evaluate_model(multimodel, full_test_loader)

Training the multi-class model


                                                                          

Testing accuracy: 42.15%


In [23]:
def evaluate_on_full_test(model, test_loader, device):
    model.to(device)
    model.eval()
    true_labels = []
    predicted_labels = []
    all_probabilities = []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)  # Getting outputs from the model

            # Assuming outputs are logits, compute probabilities and indices
            probabilities = torch.softmax(outputs, dim=1)
            predicted_indices = torch.argmax(probabilities, dim=1)

            # Storing results
            predicted_labels.extend(predicted_indices.cpu().tolist())
            true_labels.extend(labels.tolist())
            all_probabilities.extend(probabilities.cpu().numpy())  # Collecting probabilities for analysis

    # Creating a DataFrame for probabilities
    probabilities_df = pd.DataFrame(all_probabilities, columns=[f"Class_{i}" for i in range(probabilities.shape[1])])
    probabilities_df['True Label'] = true_labels
    probabilities_df['Predicted Label'] = predicted_labels

    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f"Accuracy of the model on the full test set: {accuracy * 100:.2f}%")
    return accuracy, probabilities_df

# Usage
accuracy, probabilities_df = evaluate_on_full_test(multimodel, full_test_loader, device)
#print(probabilities_df.head())

probabilities_df

Accuracy of the model on the full test set: 42.15%


Unnamed: 0,Class_0,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,True Label,Predicted Label
0,0.143050,0.013495,0.131041,0.231398,0.186566,0.234237,0.060213,0,5
1,0.123385,0.016752,0.149738,0.101247,0.265359,0.259672,0.083847,0,4
2,0.127057,0.020709,0.158809,0.264156,0.183652,0.164566,0.081050,0,3
3,0.194167,0.033447,0.170624,0.029215,0.287957,0.244955,0.039636,0,4
4,0.288995,0.039059,0.170677,0.046442,0.189329,0.209056,0.056441,0,0
...,...,...,...,...,...,...,...,...,...
7061,0.010065,0.001867,0.108806,0.005673,0.007977,0.007648,0.857963,6,6
7062,0.135805,0.014832,0.134449,0.315266,0.158279,0.195255,0.046114,6,3
7063,0.140139,0.015285,0.214504,0.116600,0.132359,0.152732,0.228381,6,6
7064,0.141508,0.014625,0.143188,0.201710,0.208087,0.221334,0.069548,6,5


In [None]:
# Save the multiclass model
torch.save(multimodel.state_dict(), f' DS6050 Final Project/multiclass_model_v2.pth')
# Full model details
torch.save({
        'model_state_dict': multimodel.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, f' DS6050 Final Project/multiclass_model_v2_full.pth')


## Ensemble Methods for Models
In ensemble learning, the goal is to combine different models to improve the overall performance and reliability of predictions. Two common methods for combining models in an ensemble are voting mechanisms and averaging approaches. Each has its own benefits and drawbacks, and the choice of which to use can depend on the specifics of the problem and the models involved.

Average Mechanism: This involves averaging the outputs of the models, typically their predicted probabilities (after applying a sigmoid function). The emotion corresponding to the highest average probability is chosen. This method can provide a more nuanced view as it considers the confidence levels of each model's predictions. 

### Ensemble Model (Single-Emotion and Multi-Class Models)
This hybrid ensemble model can potentially improve performance by balancing the broad learning capability of a multi-class model with the focused insights of binary classifiers.

In [105]:
class MultiSingleEmotionEnsemble(nn.Module):
    def __init__(self, emotion_models, multi_class_model, weights):
        super(MultiSingleEmotionEnsemble, self).__init__()
        self.emotion_models = emotion_models  # Dictionary of binary models
        self.multi_class_model = multi_class_model  # Multi-class model
        self.weights = weights  # Weights for combining probabilities
        self.emotions = list(emotion_models.keys())  # List of emotion names

    def forward(self, x):
        # Multi-class model probabilities
        self.multi_class_model.eval()
        with torch.no_grad():
            mc_probs = torch.softmax(self.multi_class_model(x), dim=1) * self.weights['multi_class']

        # Binary model probabilities
        combined_probs = mc_probs
        for emotion, model in self.emotion_models.items():
            model.eval()
            with torch.no_grad():
                output = torch.sigmoid(model(x)).unsqueeze(1)  # Ensure it's the right shape
                combined_probs[:, self.emotions.index(emotion)] += output.squeeze() * self.weights['binary'][emotion]

        # Normalize probabilities to ensure they sum to 1 across classes
        combined_probs /= combined_probs.sum(dim=1, keepdim=True)

        # Find the index of the highest probability for each image/sample
        predicted_indices = torch.argmax(combined_probs, dim=1)
        probabilities_df = pd.DataFrame(combined_probs.detach().cpu().numpy(), columns=self.emotions)

        return predicted_indices, probabilities_df

# Weights configuration
weights = {
    'multi_class': 0.2,
    'binary': {emotion: 0.8 / len(models) for emotion in models}  # Evenly distribute the remaining weight
}

# Create ensemble model including the multi-class model
mult_sing_ensemble_model = MultiSingleEmotionEnsemble(models, model, weights)

In [106]:
def evaluate_ensemble_on_full_test(ensemble_model, test_loader, device):
    ensemble_model.to(device)
    ensemble_model.eval()
    true_labels = []
    predicted_labels = []
    all_probabilities_df = pd.DataFrame()  # Initialize an empty DataFrame to store all probabilities

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            predicted_indices, probabilities_df = ensemble_model(images)  # Get predictions and probabilities
            predicted_indices = predicted_indices.cpu().tolist()
            predicted_labels.extend(predicted_indices)
            true_labels.extend(labels.tolist())

            # Add true and predicted labels to the probabilities DataFrame
            probabilities_df['True Label'] = labels.tolist()
            probabilities_df['Predicted Label'] = predicted_indices
            
            # Concatenate this batch's results with the main DataFrame
            all_probabilities_df = pd.concat([all_probabilities_df, probabilities_df], ignore_index=True)

    accuracy = accuracy_score(true_labels, predicted_labels)
    return accuracy, all_probabilities_df

# Evaluation
accuracy, probabilities_df = evaluate_ensemble_on_full_test(mult_sing_ensemble_model, full_test_loader, device)
print(f"Accuracy of the ensemble model on the full test set: {accuracy*100:.2f}%")
probabilities_df

Accuracy of the ensemble model on the full test set: 45.08%


Unnamed: 0,angry,disgust,fear,happy,neutral,sad,surprise,True Label,Predicted Label
0,0.160657,0.014053,0.114196,0.148429,0.261302,0.209226,0.092137,0,4
1,0.144125,0.018937,0.253209,0.038171,0.157364,0.173942,0.214251,0,2
2,0.150291,0.043025,0.172330,0.173178,0.152052,0.135016,0.174108,0,6
3,0.284856,0.041949,0.201904,0.052841,0.139574,0.222749,0.056127,0,0
4,0.263945,0.028938,0.166002,0.051797,0.162665,0.172206,0.154447,0,0
...,...,...,...,...,...,...,...,...,...
7061,0.069161,0.005777,0.143585,0.033606,0.062796,0.030331,0.654744,6,6
7062,0.123276,0.016987,0.082290,0.407092,0.140708,0.137811,0.091835,6,3
7063,0.138383,0.007934,0.184557,0.075307,0.090566,0.118299,0.384953,6,6
7064,0.120304,0.012295,0.153919,0.120829,0.251153,0.171304,0.170194,6,4


In [None]:
# Save the probabilities_df as csv
probabilities_df.to_csv(' DS6050 Final Project/multi_sing_ensemble_probabilities.csv')

In [None]:
# Save the multiclass model
torch.save(model.state_dict(), f' DS6050 Final Project/multiclass_model.pth')
# Full model details
torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, f' DS6050 Final Project/multiclass_model_full.pth')


### Ensemble Model (Single-Emotion Models Only)
Given that each model is trained specifically to recognize the presence of one emotion, the average mechanism might be more appropriate. It leverages the confidence levels of each model in predicting the presence of its respective emotion, allowing for a more balanced decision-making process when multiple models might be slightly confident.

In [68]:
class EmotionEnsemble(nn.Module):
    def __init__(self, emotion_models):
        super(EmotionEnsemble, self).__init__()
        self.emotion_models = emotion_models  # A dictionary of models, each trained for a specific emotion
        self.emotions = list(emotion_models.keys())  # Keep a list of emotion names

    def forward(self, x):
        # Store each model's output (probabilities)
        outputs = []
        for emotion, model in self.emotion_models.items():
            model.eval()  # Ensure the model is in eval mode
            with torch.no_grad():
                output = torch.sigmoid(model(x))  # Apply sigmoid to convert logits to probabilities
                outputs.append(output)

        # Stack all model outputs vertically and squeeze to remove extra dimensions
        outputs = torch.stack(outputs, dim=1).squeeze(2)  # Remove the extra dimension
        
        # Create DataFrame for outputs to visualize probabilities easily
        probabilities_df = pd.DataFrame(outputs.detach().cpu().numpy(), columns=self.emotions)

        # Find the index of the highest probability for each image/sample
        predicted_indices = torch.argmax(outputs, dim=1)

        return predicted_indices, probabilities_df

In [69]:
# Assuming 'models' is a dictionary with each emotion as a key and its respective trained model as the value
ensemble_model = EmotionEnsemble(models)

In [76]:
# Create method to evalaute ensemble on the full test set
def evaluate_ensemble_on_full_test(ensemble_model, test_loader, device):
    ensemble_model.to(device)
    ensemble_model.eval()
    true_labels = []
    predicted_labels = []
    all_probabilities_df = pd.DataFrame()  # Initialize an empty DataFrame to store all probabilities

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            predicted_indices, probabilities_df = ensemble_model(images)  # Get predictions and probabilities
            predicted_indices = predicted_indices.cpu().tolist()
            predicted_labels.extend(predicted_indices)
            true_labels.extend(labels.tolist())

            # Add true and predicted labels to the probabilities DataFrame
            probabilities_df['True Label'] = labels.tolist()
            probabilities_df['Predicted Label'] = predicted_indices
            
            # Concatenate this batch's results with the main DataFrame
            all_probabilities_df = pd.concat([all_probabilities_df, probabilities_df], ignore_index=True)

    accuracy = accuracy_score(true_labels, predicted_labels)
    return accuracy, all_probabilities_df

# Assuming 'ensemble_model' is your ensemble model and 'device' is your computation device
accuracy, probabilities_df = evaluate_ensemble_on_full_test(ensemble_model, full_test_loader, device)
print(f"Accuracy of the ensemble model on the full test set: {accuracy*100:.2f}%")

Accuracy of the ensemble model on the full test set: 40.16%


In [73]:
probabilities_df

Unnamed: 0,angry,disgust,fear,happy,neutral,sad,surprise,True Label,Predicted Label
0,0.280022,0.037069,0.181467,0.300229,0.306570,0.287575,0.185023,0,4
1,0.282884,0.049432,0.364739,0.048894,0.186987,0.227994,0.417891,0,6
2,0.247936,0.063992,0.302449,0.296723,0.225163,0.202377,0.424013,0,6
3,0.297718,0.049174,0.234457,0.102684,0.205566,0.245725,0.112290,0,0
4,0.316794,0.034164,0.214052,0.089469,0.230756,0.187097,0.399400,0,6
...,...,...,...,...,...,...,...,...,...
7061,0.217757,0.018629,0.360000,0.099596,0.193861,0.093551,0.496610,6,6
7062,0.265075,0.042124,0.161999,0.264774,0.330143,0.321558,0.254835,6,4
7063,0.259494,0.021643,0.219369,0.148707,0.147564,0.220532,0.358364,6,6
7064,0.205053,0.035980,0.279591,0.182811,0.285887,0.196963,0.341128,6,6


In [None]:
# Save the probabilities_df as csv
probabilities_df.to_csv(' DS6050 Final Project/sing_only_ensemble_probabilities.csv')

In [None]:
# Save individual models
for emotion, model in models.items():
    torch.save(model.state_dict(), f' DS6050 Final Project/{emotion}_model.pth')

# Optionally save other training parameters
for emotion, model in models.items():
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, f' DS6050 Final Project/{emotion}_model_full.pth')

# Save the ensemble model
# Note: Ensure that the ensemble model is initialized and used in a way that it can be reconstructed from individual models
torch.save({emotion: model.state_dict() for emotion, model in models.items()}, ' DS6050 Final Project/ensemble_model.pth')

## Stacking
Simple averaging for the ensemble model hasn't worked too well for our accuracy. We now try model stacking, where we use a second-level model (the stacker) to combine the predictions of multiple base models. Stacking might yield better performance, as it can robustly integrate learnings from multiple diverse models.
- We implement cross-validation to better utilize our limited data.

In [81]:
def create_stacking_features(models, loader, device):
    stack_x = []
    stack_y = []
    for images, labels in loader:
        images = images.to(device)
        features = []
        for model in models.values():
            model.eval()
            with torch.no_grad():
                output = torch.sigmoid(model(images)).cpu().numpy()  # Ensure output is in [0,1]
                # Ensure the output is always reshaped to (batch_size, 1)
                output = output.reshape(-1, 1)  # Reshape to avoid dimension mismatch
                features.append(output)
        # Stack features horizontally
        features = np.hstack(features)  # This should now be safe
        stack_x.append(features)
        stack_y.append(labels.numpy())
    return np.vstack(stack_x), np.hstack(stack_y)

# Create the DataLoader for the full train set
full_train_dataset = FullTestDataset(train, transform=data_transforms)
full_train_loader = DataLoader(full_train_dataset, batch_size=32, shuffle=False)

# Assuming `full_train_loader` is your DataLoader for the training data
stack_x, stack_y = create_stacking_features(models, full_train_loader, device)

In [None]:
# Assuming `model_accuracies` is a dict with model names as keys and their validation accuracies as values
weights = np.array([model_accuracies[model_name] for model_name in sorted(model_accuracies)])
weighted_features = features * weights / weights.sum()  # Normalize weights to sum to 1

stack_x.append(weighted_features)

In [91]:
from sklearn.ensemble import GradientBoostingClassifier

stacker = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
cv_scores = cross_val_score(stacker, stack_x, stack_y, cv=6)  # Using 6-fold cross-validation
print(f"Cross-validated accuracy scores: {cv_scores}")
print(f"Mean CV accuracy: {np.mean(cv_scores) * 100:.2f}%")


Cross-validated accuracy scores: [0.29746045 0.30391341 0.30745212 0.30918176 0.2989798  0.30314387]
Mean CV accuracy: 30.34%


In [87]:
# Initialize the logistic regression stacker
stacker = LogisticRegression(max_iter=1000)

# Perform cross-validation
cv_scores = cross_val_score(stacker, stack_x, stack_y, cv=6)  # Using 5-fold cross-validation
print(f"Cross-validated accuracy scores: {cv_scores}")
print(f"Mean CV accuracy: {np.mean(cv_scores) * 100:.2f}%")

Cross-validated accuracy scores: [0.30412157 0.30974188 0.3143214  0.31355403 0.30897356 0.30439309]
Mean CV accuracy: 30.92%


In [88]:
# Split the stacking features into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(stack_x, stack_y, test_size=0.2, random_state=42)

In [89]:
stacker = LogisticRegression(max_iter=1000)
stacker.fit(X_train, y_train)

# Validate the stacker model
stacker_score = stacker.score(X_val, y_val)
print(f"Validation accuracy of stacker model: {stacker_score * 100:.2f}%")


Validation accuracy of stacker model: 31.64%


In [90]:
# Prepare test set features for the stacker
test_stack_x, test_stack_y = create_stacking_features(models, full_test_loader, device)

# Predict and evaluate
test_predictions = stacker.predict(test_stack_x)
test_accuracy = accuracy_score(test_stack_y, test_predictions)
print(f"Test accuracy of the stacked model: {test_accuracy * 100:.2f}%")

Test accuracy of the stacked model: 40.83%


## Save Models
Saving and loading models in PyTorch is straightforward using torch.save and torch.load. This is especially useful for large models or when training takes a significant amount of time. You can save not just the model weights but also the optimizer state, epoch details, and any other metadata you might find useful for resuming training or for inference later.

Here we save our individual models as well as the ensemble model:

In [None]:
# Save individual models
for emotion, model in models.items():
    torch.save(model.state_dict(), f' DS6050 Final Project/{emotion}_model.pth')

# Optionally save other training parameters
for emotion, model in models.items():
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, f' DS6050 Final Project/{emotion}_model_full.pth')

# Save the ensemble model
# Note: Ensure that the ensemble model is initialized and used in a way that it can be reconstructed from individual models
torch.save({emotion: model.state_dict() for emotion, model in models.items()}, ' DS6050 Final Project/ensemble_model.pth')

## Load Models (if needed)
If needed later, we can now load the model objects from our environment:

In [None]:
# Load individual models
loaded_models = {}
for emotion in emotions:
    model = EmotionModel()  # Ensure the model architecture is defined or imported
    model.load_state_dict(torch.load(f' DS6050 Final Project/{emotion}_model.pth'))
    model.eval()
    loaded_models[emotion] = model

# Load the ensemble model
ensemble_state_dicts = torch.load(' DS6050 Final Project/ensemble_model.pth')
ensemble_models = {emotion: EmotionModel().load_state_dict(state) for emotion, state in ensemble_state_dicts.items()}
ensemble_model = EmotionEnsemble(ensemble_models)
ensemble_model.eval()  # Set to evaluation mode if only for inference