# Imports

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import PIL
from PIL import Image
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from tqdm import tqdm
import cv2

# Architecture

In [None]:
# Deep Emotion Architecture
class DeepEmotion(nn.Module):
    def __init__(self, num_classes, regularization_lambda=0.001):
        super(DeepEmotion, self).__init__()

        self.regularization_lambda = regularization_lambda

        self.conv1 = nn.Conv2d(1, 10, kernel_size=3)
        self.conv2 = nn.Conv2d(10, 10, kernel_size=3)
        self.conv3 = nn.Conv2d(10, 10, kernel_size=3)
        self.conv4 = nn.Conv2d(10, 10, kernel_size=3)

        self.maxpool2 = nn.MaxPool2d(2, 2)
        self.maxpool4 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(810, 50)
        self.fc2 = nn.Linear(50, num_classes)
        

        self.local_net = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            nn.MaxPool2d(2, 2),
            nn.ReLU(True),

            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, 2),
            nn.ReLU(True),
        )

        self.local_fc = nn.Sequential(
            nn.Linear(640, 32),
            nn.ReLU(True),
            nn.Linear(32, 3*2),
        )

        self.local_fc[2].weight.data.zero_()
        self.local_fc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))


    def stn(self, x):
        xs = self.local_net(x)
        xs = xs.view(-1, 640)       # 10 * 3 * 3
        theta = self.local_fc(xs)
        theta = theta.view(-1, 2, 3)

        # grid = F.affine_grid(theta, x.size(), align_corners=True)
        # x = F.grid_sample(x, grid, align_corners=True)
        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)

        return x

    def calculate_regularization_loss(self):
        # Calculate the L2 regularization loss for the weights in the last two fully-connected layers
        regularization_loss = 0.0
        for param in self.fc1.parameters():
            regularization_loss += torch.norm(param, p=2)  # L2 norm
        for param in self.fc2.parameters():
            regularization_loss += torch.norm(param, p=2)  # L2 norm

        return regularization_loss

    def compute_loss(self, outputs, labels):
        # Cross-entropy loss
        classification_loss = nn.CrossEntropyLoss()(outputs, labels)

        # L2 regularization loss
        # regularization_loss = self.calculate_regularization_loss()

        # Total loss with regularization
        # total_loss = classification_loss + self.regularization_lambda * regularization_loss

        return classification_loss

    def forward(self, x):
        grid = self.stn(x)
        localization_grid_resized = F.interpolate(grid, size=(9, 9), mode='bilinear', align_corners=False)
        x = F.relu(self.conv1(x))
        x = self.conv2(x)
        x = F.relu(self.maxpool2(x))

        x = F.relu(self.conv3(x))
        x = self.conv4(x)
        x = F.relu(self.maxpool4(x))

        x = F.dropout(x)

        x = x * localization_grid_resized

        x = x.view(-1, 810)
        x = self.fc1(x)
        x = self.fc2(x)

        # x = F.softmax(x, dim=1)

        return x


# Dataset

In [None]:
class FERDataset(Dataset):
    def __init__(self, csv_data, transform, train=True):
        self.data = pd.read_csv(csv_data)
        self.transform = transform
        self.train = train

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        if self.train:
            pixels = self.data.iloc[idx, 1].split()
            pixels = np.array(pixels, dtype=np.uint8).reshape(48, 48)
    
            image = Image.fromarray(pixels)
    
            label = int(self.data.iloc[idx, 0])
    
            if self.transform:
                image = self.transform(image)
    
            return image, label

        pixels = self.data.iloc[idx, 0].split()
        pixels = np.array(pixels, dtype=np.uint8).reshape(48, 48)

        image = Image.fromarray(pixels)

        if self.transform:
            image = self.transform(image)

        return image


In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=0.5, std=0.5)
])

# DataLoader

In [None]:
def generate_TrainValTest_dataloaders(trpath='', tstpath='', batch_size=32):
    Train_fer_dataset = FERDataset(csv_data=trpath, transform=transform)
    
    train_size = int(0.8 * len(Train_fer_dataset))
    val_size = len(Train_fer_dataset) - train_size

    Train_fer_dataset, Val_fer_dataset = random_split(Train_fer_dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))

    Test_fer_dataset = FERDataset(csv_data=tstpath, transform=transform, train=False)

    TrainDataLoader = DataLoader(Train_fer_dataset, batch_size=batch_size, shuffle=True)
    ValDataLoader = DataLoader(Val_fer_dataset, batch_size=batch_size)
    TestDataLoader = DataLoader(Test_fer_dataset, batch_size=batch_size)
    return TrainDataLoader, ValDataLoader, TestDataLoader


In [None]:
emotion_dict = {
    0:'Angry',
    1:'Disgust',
    2:'Fear',
    3:'Happy',
    4:'Sad',
    5:'Surprise',
    6:'Neutral',
}

# View Images with Labels

In [None]:
train_features, train_labels = next(iter(train))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {emotion_dict[label.item()]}")

# Training

In [None]:
def Train(epochs, trainloader, valloader, optimizer, model):
    print("===================================Start Training===================================")

    for epoch in range(epochs):
        train_loss = 0
        validation_loss = 0
        train_correct = 0
        val_correct = 0

        model.train()
        for data, labels in tqdm(trainloader, desc=f"Epoch {epoch + 1}/{epochs} (Training)"):
            optimizer.zero_grad()
            outputs = model(data)
            loss = model.compute_loss(outputs, labels)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            train_correct += torch.sum(preds == labels.data)


        model.eval()
        for data, labels in tqdm(valloader, desc=f"Epoch {epoch + 1}/{epochs} (Validation)"):
            val_outputs = model(data)

            val_loss = model.compute_loss(val_outputs, labels)

            validation_loss += val_loss.item()
            _, val_preds = torch.max(val_outputs, 1)
            val_correct += torch.sum(val_preds == labels.data)

        train_loss = train_loss / len(train_loader)
        train_acc = train_correct.double() / len(train_loader.dataset)
        validation_loss = validation_loss / len(val_loader)
        val_acc = val_correct.double() / len(val_loader.dataset)
        
        print('Epoch: {} \tTraining Loss: {:.8f} \tValidation Loss {:.8f} \tTraining Accuracy {:.3f}% \tValidation Accuracy {:.3f}%'
              .format(epoch + 1, train_loss, validation_loss, train_acc * 100, val_acc * 100))

    torch.save(model.state_dict(), 'deep_emotion-{}.pt'.format(epochs))
    print("===================================Training Finished===================================")


num_classes = 7
epochs = 50
train_loader, val_loader, test_loader = generate_TrainValTest_dataloaders('data/train.csv', 'data/test.csv')
model = DeepEmotion(num_classes=num_classes, regularization_lambda=0.001)
optimizer = optim.SGD(model.parameters(), lr=0.001)

Train(epochs, train_loader, val_loader, optimizer, model)

## Need to break down line by line and output each line

In [None]:
num_classes = 7
model = DeepEmotion(num_classes)
model.load_state_dict(torch.load('checkpoints/deep_emotion-100.pt', map_location=torch.device('cpu')))
frame = cv2.imread('veryangryface.jpeg')
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(gray, 1.1, 4)
for x, y, w, h in faces:
    roi_gray = gray[y:y + h, x:x + w]
    roi_color = frame[y:y + h, x:x + w]
    cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
    facesC = faceCascade.detectMultiScale(roi_gray)
    if len(facesC) == 0:
        print("Face not detected!")
    else:
        print("Found Face!")
        for (ex, ey, ew, eh) in facesC:
            face_roi = roi_color[ey:ey + eh, ex:ex + ew]
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
plt.imshow(face_roi)
face_roi.shape
gray = cv2.cvtColor(face_roi, cv2.COLOR_BGR2GRAY)
gray.shape
final_image = cv2.resize(gray, (48, 48))
final_image.shape
final_image = np.expand_dims(final_image, axis=0)
final_image.shape
final_image = np.expand_dims(final_image, axis=0)
final_image.shape
final_image = final_image/255.0
torchTensor = torch.from_numpy(final_image)
torchTensor = torchTensor.type(torch.FloatTensor)
output = model(torchTensor)
pred = F.softmax(output, dim=1)
print(pred)
index_pred = torch.argmax(pred)
emotion_dict[index_pred.item()]


# Live Demo

# LiveCam demo
# Load the Haar Cascade classifier for face detection
path = 'haarcascade_frontalface_default.xml'
faceCascade = cv2.CascadeClassifier(path)

# Initialize the model for emotion recognition (replace 'model' with your actual model)
num_classes = 7
model = DeepEmotion(num_classes)
model.load_state_dict(torch.load('checkpoints/deep_emotion-100.pt', map_location=torch.device('cpu')))
model.eval()

# Initialize the webcam
cap = cv2.VideoCapture(0)  # Use 0 for the default webcam, change to 1 if you have an additional camera

if not cap.isOpened():
    raise IOError("Cannot open webcam")

while True:
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Face detection
    faces = faceCascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    for x, y, w, h in faces:
        roi_gray = gray[y:y + h, x:x + w]
        roi_color = frame[y:y + h, x:x + w]

        # Emotion recognition (process the ROI)
        gray_face = cv2.cvtColor(roi_color, cv2.COLOR_BGR2GRAY)
        final_image = cv2.resize(gray_face, (48, 48))
        final_image = np.expand_dims(final_image, axis=0)
        final_image = np.expand_dims(final_image, axis=0)
        final_image = final_image / 255.0

        torchTensor = torch.from_numpy(final_image)
        torchTensor = torchTensor.type(torch.FloatTensor)

        output = model(torchTensor)
        pred = F.softmax(output, dim=1)

        index_pred = torch.argmax(pred)

        # Draw bounding box around the detected face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

        # Draw emotion label on the frame
        emotion_label = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
        emotion = emotion_label[index_pred.item()]
        cv2.putText(frame, emotion, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    # Display the frame
    cv2.imshow('Emotion Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
