In [1]:
from zipfile import ZipFile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler

from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.optim as optim
import numpy as np
from torch.utils.data import Subset
import matplotlib.pyplot as plt
import os
import cv2
import shutil
from pathlib import Path

from PIL import Image
import matplotlib.pyplot as plt
use_cuda = True
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Define class EmotionDataset

class EmotionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform

        # Map each emotion to indices from 0 ~ 6
        self.classes = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        self.images = []
        self.labels = []

        for label in self.classes:
            class_dir = os.path.join(self.root_dir, label)
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                self.images.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path)
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        # Map label to index
        label_idx = self.class_to_idx[label]
        return image, label_idx

In [3]:
# Get indicies of the targeted classes
def get_relevant_indices(dataset, classes, target_classes):
    # dataset = [(img1, 0), (img2, 1), (img3, 2), (img4, 0), (img5, 1)]

    """ Return the indices for datapoints in the dataset that belongs to the
    desired target classes, a subset of all possible classes.

    Args:
        dataset: Dataset object
        classes: A list of strings denoting the name of each class
        target_classes: A list of strings denoting the name of desired classes
                        Should be a subset of the 'classes'
    Returns:
        indices: list of indices that have labels corresponding to one of the
                 target classes
    """
    indices = []
    for i in range(len(dataset)):
        # Check if the label is in the target classes
        label_index = dataset[i][1] # ex: 3
        label_class = classes[label_index] # ex: 'A'
        if label_class in target_classes:
            indices.append(i)

    # if target class being class0 & class1, then indices = [0, 1, 3, 4]
    return indices

In [4]:
# Load and split data
def get_data_loader(data_directory, target_classes, batch_size):
    """ Loads images of facial emotions, splits the data into training, validation
    and testing datasets. Returns data loaders for the three preprocessed datasets.

    Args:
        target_classes: A list of strings denoting the name of the desired
                        classes. Should be a subset of the argument 'classes'
        batch_size: A int representing the number of samples per batch

    Returns:
        train_loader: iterable training dataset organized according to batch size
        val_loader: iterable validation dataset organized according to batch size
        test_loader: iterable testing dataset organized according to batch size
        classes: A list of strings denoting the name of each class
    """
    # Letters for classification
    classes = ('angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised')

    ########################################################################
    # The output of torchvision datasets are PILImage images of range [0, 1].
    # We transform them to Tensors of normalized range [-1, 1].
    transform = transforms.Compose(
         [transforms.Grayscale(1),
          transforms.Resize((48, 48)),
         transforms.ToTensor(),
         transforms.Normalize((0.5), (0.5))])

    ########################################################################
    # Load Gesture training data using the defined gesture dataset class above
    dataset = EmotionDataset(root_dir=data_directory, transform=transform)

    # Get the list of indices to sample from
    relevant_indices = get_relevant_indices(dataset, classes, target_classes)

    # Split into 60% train, 20% validation and 20% testing (since folder contains all data available)
    np.random.seed(1000) # Fixed numpy random seed for reproducible shuffling
    np.random.shuffle(relevant_indices)

    total_size = len(relevant_indices)

    train_split = int(total_size * 0.7)  # 70% for training
    val_split = int(total_size * 0.15)    # 15% for validation
    test_split = total_size - train_split - val_split  # Remaining 15% for testing

    # split into training and validation indices
    relevant_train_indices = relevant_indices[:train_split]
    relevant_val_indices = relevant_indices[train_split:train_split + val_split]
    relevant_test_indices = relevant_indices[train_split + val_split:]

    ########################################################################
    train_sampler = SubsetRandomSampler(relevant_train_indices)
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                               num_workers=1, sampler=train_sampler)

    val_sampler = SubsetRandomSampler(relevant_val_indices)
    val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                              num_workers=1, sampler=val_sampler)

    test_sampler = SubsetRandomSampler(relevant_test_indices)
    test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                              num_workers=1, sampler=test_sampler)

    return train_loader, val_loader, test_loader, classes

In [5]:
# Load the data in
train_loader, val_loader, test_loader, classes = get_data_loader(
    data_directory = '/content/drive/My Drive/APS360 Tut & Labs/Processed_Dataset',
    target_classes=['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised'],
    batch_size=1) # 1 image per batch

print("Dataset loaded successfully.")

Dataset loaded successfully.


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class EmotionRecognition(nn.Module):
    def __init__(self, num_classes=6):
        super(EmotionRecognition, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)

        # Pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 6 * 6, 512)
        self.fc2 = nn.Linear(512, num_classes)

        # Dropout layer
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.view(-1, 128 * 6 * 6)  # Flatten the tensor

        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x


model = EmotionRecognition(num_classes=6)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [7]:
#Get model name for checkpointing
def get_model_name(name, batch_size, learning_rate, epoch):
   path = "model_{0}_bs{1}_lr{2}_epoch{3}".format(name, batch_size,
                                                  learning_rate, epoch)
   return path

In [9]:
def get_accuracy(model, data_loader):
    #initialize counters
    correct = 0
    total = 0
    for imgs, labels in data_loader:
        #Enable GPU if available
        if use_cuda and torch.cuda.is_available():
          imgs = imgs.cuda()
          labels = labels.cuda()

        output = model(imgs)
        #select index with maximum prediction score
        pred = output.max(1, keepdim=True)[1]
        correct += pred.eq(labels.view_as(pred)).sum().item()
        total += imgs.shape[0]
    #Return accuracy
    return correct / total

In [10]:
def plot_training_curve(path, num_epochs, train_acc, val_acc):
    plt.title("Training Curve")
    n = len(num_epochs) # number of epochs
    plt.plot(range(1,n+1), train_acc, label="Train")
    plt.plot(range(1,n+1), val_acc, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Error")
    plt.legend(loc='best')
    plt.show()

In [21]:
def train(model, train_loader, val_loader, batch_size=64, num_epochs=30,
          learn_rate=0.001, model_name='EmotionRecognition'):
    #Set seed
    torch.manual_seed(1700)

    #Set loss function and optimizer
    #I chose to use cross entropy loss for the loss function because
    #we have been taught that this is generally the best performing
    #loss function for multiclass classification problems.
    #I chose to use Adam as the optimizer because according to my
    #research, Adam generally provides very good results without
    #significant fine tuning.
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learn_rate)
    emotion_tracker = np.zeros(7)
    percentage_tracker = np.zeros(7)
    emotions = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']

    #populate accuracy arrays with zeroes
    train_acc = np.zeros(num_epochs)
    val_acc = np.zeros(num_epochs)

    #initialize iteration counter
    n = 0
    for epoch in range(num_epochs):
        for images, labels in iter(train_loader):
            #Enable GPU if available
            if use_cuda and torch.cuda.is_available():
              images = images.cuda()
              labels = labels.cuda()

            #perform forwards pass
            out = model(images)
            #Compute loss using Cross Entropy loss function
            loss = criterion(out, labels)
            tracker_index = torch.argmax(out)
            emotion_tracker[tracker_index] += 1

            #backward pass to calculate and update parameters.
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            #increment iteration counter
            n += 1

        # track training and validation accuracy
        train_acc[epoch] = get_accuracy(model, train_loader)
        val_acc[epoch] = get_accuracy(model, val_loader)

        #output accuracy
        print(("Epoch: {} | Training acc: {} |" + "Validation acc: {}"
        ).format(epoch, train_acc[epoch], val_acc[epoch]))

        #checkpoint model
        model_path = get_model_name(model_name, batch_size, learn_rate,
                                    epoch)
        torch.save(model.state_dict(), model_path)

    epochs = np.arange(1, num_epochs + 1)


    plot_training_curve(model_path, epochs, train_acc, val_acc)

    # output percentages:
    total = torch.sum(emotion_tracker)
    for e in range(len(emotion_tracker)):
      percentage_tracker[e] = float(emotion_tracker[e]/total)
      print(emotions[e], ":", f"{percentage_tracker[e]:.2%}")

    print("Prediction:", emotions[np.argmax(percentage_tracker)], "with", f"{(percentage_tracker[np.argmax(percentage_tracker)]):.2%}", "confidence.")

In [17]:
# test
import torch
import numpy as np

emotion_tracker = torch.tensor([6., 2., 5., 5., 3., 1., 0.])
emotions = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']
percentage_tracker = np.zeros(7)

total = torch.sum(emotion_tracker)
for e in range(len(emotion_tracker)):
      percentage_tracker[e] = float(emotion_tracker[e]/total)
      print(emotions[e], ":", f"{percentage_tracker[e]:.2%}")

print("Prediction:", emotions[np.argmax(percentage_tracker)], "with", f"{(percentage_tracker[np.argmax(percentage_tracker)]):.2%}", "confidence.")

angry : 27.27%
disgusted : 9.09%
fearful : 22.73%
happy : 22.73%
neutral : 13.64%
sad : 4.55%
surprised : 0.00%
Prediction: angry with 27.27% confidence.


In [22]:
if use_cuda and torch.cuda.is_available():
  model.cuda()

train(model, train_loader, val_loader, num_epochs = 30)


[1. 0. 0. 0. 0. 0. 0.]
[1. 0. 0. 1. 0. 0. 0.]
[1. 0. 1. 1. 0. 0. 0.]
[1. 0. 1. 1. 1. 0. 0.]
[1. 0. 1. 2. 1. 0. 0.]
[1. 1. 1. 2. 1. 0. 0.]
[1. 1. 1. 3. 1. 0. 0.]
[2. 1. 1. 3. 1. 0. 0.]
[3. 1. 1. 3. 1. 0. 0.]
[3. 1. 2. 3. 1. 0. 0.]
[3. 2. 2. 3. 1. 0. 0.]
[4. 2. 2. 3. 1. 0. 0.]
[4. 2. 2. 4. 1. 0. 0.]
[4. 2. 2. 4. 2. 0. 0.]
[4. 2. 2. 4. 2. 1. 0.]
[5. 2. 2. 4. 2. 1. 0.]
[5. 2. 2. 4. 3. 1. 0.]
[5. 2. 3. 4. 3. 1. 0.]
[5. 2. 3. 5. 3. 1. 0.]
[6. 2. 3. 5. 3. 1. 0.]
[6. 2. 4. 5. 3. 1. 0.]
[6. 2. 5. 5. 3. 1. 0.]


IndexError: Target 6 is out of bounds.