## !! The final result should be only a runnable .py file !!

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from matplotlib import pyplot as plt
import pickle
import numpy as np

# 0. Data Pre-processing

In [3]:
data_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # turn the graph to single color channel
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485], std=[0.229])  # normalize
])

train_dataset = datasets.ImageFolder(
    '../dataset/train', transform=data_transforms)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True, num_workers=8)

test_dataset = datasets.ImageFolder(
    '../dataset/test', transform=data_transforms)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False, num_workers=8)

print(train_dataset.classes)

['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


# 1. Model

In [4]:
# reference: AlexNet
class EmotionCNN(nn.Module):
    def __init__(self, num_classes):
        super(EmotionCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=0),
            nn.ReLU(inplace=True), # inplace: override
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x
    

# initialize model, loss-function and optimizer
model = EmotionCNN(num_classes=7)  # FER-2013 has 7 emotion class
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00025)

In [5]:
# select device
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("using cuda")
#elif torch.backends.mps.is_available():
#    device = torch.device("mps")
#    print("using mac mps")
else:
    device = torch.device("cpu")
    print("using cpu")

using cuda


In [6]:
# training model
num_epochs = 500
model.to(device)

loss_history = []

process = tqdm(range(num_epochs), bar_format='{l_bar}{bar:25}{r_bar}{bar:-25b}', colour='green', ascii='░▒█', unit='epoch')
for epoch in process:
    model.train()
    running_loss = 0.0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    avg_loss = f"{(running_loss / len(train_loader)):5f}"
    loss_history.append(avg_loss)
    process.set_description(f"loss[-5:] = {loss_history[-5:]}")
    # print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")
    

loss[-5:] = ['0.019451', '0.020594', '0.015557', '0.030212', '0.022792']: 100%|[32m█████████████████████████[0m| 500/500 [2:31:19<00:00, 18.16s/it][32m[0m  


In [10]:
# save the pth file
torch.save(model.state_dict(), '8FC 00025LR emotion_cnn.pth')

with open('train_loss_history_8FC.pkl', 'wb') as f:
    pickle.dump(loss_history, f)

In [15]:
print(loss_history)

['1.834306', '1.810011', '1.794861', '1.719273', '1.654261', '1.605301', '1.517140', '1.442505', '1.410916', '1.370598', '1.309154', '1.282325', '1.263506', '1.237558', '1.196351', '1.145569', '1.109914', '1.094332', '1.062397', '1.013903', '1.032368', '0.952754', '0.908776', '0.897559', '0.846438', '0.788651', '0.761550', '0.773488', '0.803451', '0.731214', '0.736486', '0.610556', '0.587876', '0.611627', '0.524746', '0.560807', '0.492130', '0.461310', '0.408537', '0.389605', '0.352084', '0.385674', '0.302500', '0.263386', '0.250489', '0.263304', '0.225821', '0.304147', '0.239438', '0.191229', '0.192291', '0.179039', '0.162417', '0.156715', '0.203015', '0.141666', '0.168112', '0.169700', '0.137356', '0.121497', '0.188677', '0.140741', '0.107780', '0.133143', '0.156949', '0.159587', '0.123442', '0.096552', '0.087671', '0.073089', '0.061042', '0.061153', '0.070395', '0.064030', '0.128174', '0.092593', '0.102060', '0.147427', '0.082250', '0.077471', '0.060944', '0.054862', '0.048410', '0.

In [16]:
# evaluate model

model = EmotionCNN(num_classes=7)
model.load_state_dict(torch.load('emotion_cnn.pth'))
model.to(device)
model.eval()

correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1) # predicted is the emotion index
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy}%")


Test Accuracy: 62.98411813875732%
