In [None]:
import os
import torch
import cv2 as cv
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
from sklearn.metrics import classification_report

import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader

In [None]:
label_class = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'neutral', 5: 'sad', 6: 'surprise'}
class_label = {v: k for k, v in label_class.items()}
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
RESIZE = 48
BATCH_SIZE = 128
PATH = 'D:\AI Courses\Semester 3\Computer Vision\Term Project\model'

In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

data_transforms = {
    'Model': {
        'train':
    transforms.Compose([
        transforms.Resize((RESIZE, RESIZE)),
        transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(20),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.4),
        transforms.ToTensor(),
        normalize
    ]),
    'other':
    transforms.Compose([
        transforms.Resize((RESIZE, RESIZE)),
        transforms.ToTensor(),
        normalize
    ])
    },
    'Others': {
        'train':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(20),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.4),
        transforms.ToTensor(),
        normalize
    ]),
    'other':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize
    ])
    }
}

In [None]:
train_base_path = 'images/train/'
val_base_path = 'images/validation/'

classes = list(label_class.values())

tr_paths, tr_lbls = [], []
val_paths, val_lbls = [], []

for each in classes:

    full_path = train_base_path + each + '/'
    num_items = os.listdir(full_path)
    for itm in num_items:
        tr_paths.append(full_path + itm)
        tr_lbls.append(class_label[each])
        
    full_path = val_base_path + each + '/'
    num_items = os.listdir(full_path)
    for itm in num_items:
        val_paths.append(full_path + itm)
        val_lbls.append(class_label[each])


tr_dict = {'img_id': tr_paths, 'label':tr_lbls}
val_dict = {'img_id': val_paths, 'label':val_lbls}

tr_df = pd.DataFrame(tr_dict)
val_df = pd.DataFrame(val_dict)

del tr_dict, val_dict, tr_lbls, val_lbls, tr_paths, val_paths, classes

In [None]:
tr_df.head()

In [None]:
val_df.head()

In [None]:
plt.bar(list(label_class.values()), list(tr_df['label'].value_counts().sort_index()))
plt.xlabel("Emotion Classes") 
plt.ylabel("No. of Samples per Emotion Class") 
plt.title("Emotion Classes and Their Number of Samples") 

In [None]:
class ImageDataset(Dataset):
    def __init__(self, df, transform=None, shuffle_data=True):

        self.df = df
        self.transform = transform
        if shuffle_data:
            self.df = self.df.sample(frac=1).reset_index(drop=True)

    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx) -> tuple:
        img = Image.open(self.df.iloc[idx, 0]).convert("RGB")
        lbl = self.df.iloc[idx, 1]

        if self.transform:
            img = self.transform(img)

        return img, lbl

In [None]:
tr_model_dt = ImageDataset(tr_df, data_transforms['Model']['train'])
val_model_dt = ImageDataset(val_df, data_transforms['Model']['other'])
tr_other_dt = ImageDataset(tr_df, data_transforms['Others']['train'])
val_other_dt = ImageDataset(val_df, data_transforms['Others']['other'])

tr_model_loader = DataLoader(tr_model_dt, batch_size=BATCH_SIZE, shuffle=True)
val_model_loader = DataLoader(val_model_dt, batch_size=BATCH_SIZE, shuffle=True)
tr_other_loader = DataLoader(tr_other_dt, batch_size=BATCH_SIZE, shuffle=True)
val_other_loader = DataLoader(val_other_dt, batch_size=BATCH_SIZE, shuffle=True)

dataloaders = {
    'Model': {
        'train': tr_model_loader,
        'validation': val_model_loader
    },
    'Others': {
        'train': tr_other_loader,
        'validation': val_other_loader
    }
}

In [None]:
num_samples = 5
random_indices = np.random.randint(0, len(tr_model_dt), num_samples)

# Plot the images
fig, axes = plt.subplots(1, num_samples, figsize=(15, 3))

for i, idx in enumerate(random_indices):
    image, label = tr_model_dt[idx]
    image = image.permute(1, 2, 0)
    axes[i].imshow(np.uint8(np.array(image)))
    axes[i].set_title(f"Emotion: {label_class[label]}")
    axes[i].axis('off')

plt.show()

In [None]:
del tr_model_loader, val_model_loader, tr_model_dt, val_model_dt, tr_other_loader, val_other_loader, tr_other_dt, val_other_dt

In [None]:
class Model(nn.Module):
    def __init__(self, num_features):
        
        super(Model, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(num_features, 128, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.1),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.1)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.1),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.1)
        )
        
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 3 * 3, 256, True),
            nn.BatchNorm1d(256),
            nn.Dropout1d(0.1),
            nn.Linear(256, 128, True),
            nn.BatchNorm1d(128),
            nn.Dropout1d(0.1),
            nn.Linear(128, 7, True),
        )


    
    def forward(self, x):

        # print(x.shape)
        x = self.layer1(x)
        # print(x.shape)
        x = self.layer2(x)
        # print(x.shape)
        x = self.fc(x)
        # print(x.shape)

        return x

In [None]:
def train_model(data, model, criterion, optimizer, num_epochs=3, return_loss_acc=True):
    
    if return_loss_acc:
        tr_val_history = {'train': [], 'validation': []}
        tr_val_acc_history = {'train': [], 'validation': []}
    
    for epoch in range(num_epochs):
        print('-' * 50)
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            with tqdm(data[phase], unit='batch', position=0, leave=True) as pbar:
                for img, lbl in pbar:

                    pbar.set_description(f"Epoch {epoch+1}")

                    img = img.to(device)
                    lbl = lbl.to(device)
                    outputs = model(img)
                    loss = criterion(outputs, lbl)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    _, preds = (torch.max(outputs, 1))
                    running_loss += loss.item()
                    running_corrects += torch.sum(preds == lbl.data)
                    pbar.set_postfix(loss=loss.item() / BATCH_SIZE, accuracy=torch.sum(preds == lbl.data).item() / BATCH_SIZE)

            epoch_loss = running_loss / len(data[phase])
            epoch_acc = running_corrects.double() / len(data[phase])

            if return_loss_acc:
                tr_val_history[phase].append(epoch_loss)
                tr_val_acc_history[phase].append(epoch_acc.item())
            
            print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
                                                        epoch_loss,
                                                        epoch_acc))
            
    if return_loss_acc:
        return model, (tr_val_history, tr_val_acc_history)
    return model

In [None]:
model = Model(3).to(device)
opt = optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

EPOCH = 30

model_trained, history = train_model(dataloaders['Model'], model, criterion, opt, EPOCH)

In [None]:
def plot_loss_acc(history):
    plt.figure(figsize=(16, 8))
    plt.subplot(1, 2, 1)
    plt.plot(history[0]['train'], label='Train Loss')
    plt.plot(history[0]['validation'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(history[1]['train'], label='Train Accuracy')
    plt.plot(history[1]['validation'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy in %')
    plt.legend()

In [None]:
plot_loss_acc(history)

In [None]:
torch.save(model_trained, './model/model.pth')

In [None]:
def renderer(model, data):

    face_classifier = cv.CascadeClassifier('haarcascade_frontalface_default.xml')
    model.eval()
    model.to(device)



    cap = cv.VideoCapture(0)

    while True:
        _, frame = cap.read()
        faces = face_classifier.detectMultiScale(frame)
        for (x,y,w,h) in faces:
            cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 255), 2)
            roi = frame[y:y + h, x:x + w]
            roi = cv.resize(roi, (48, 48), interpolation=cv.INTER_AREA)



            if np.sum([roi]) != 0:
                roi = data['other'](Image.fromarray(roi))
                roi = roi.reshape((1, roi.shape[0], roi.shape[1], roi.shape[2])).to(device)
                prediction = model(roi)
                label = label_class[torch.max(prediction, 1).indices.item()]
                label_position = (x, y)
                cv.putText(frame, label, label_position, cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            else:
                cv.putText(frame, 'No Faces', (30, 80), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv.imshow('Emotion Detector',frame)
        if cv.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv.destroyAllWindows()

In [None]:
PATH = './model/model.pth'
model = torch.load(PATH)
renderer(model, data_transforms['Model'])

In [None]:
resnet = models.resnet18(weights='DEFAULT')

for param in resnet.parameters():
    param.requires_grad = False
    
resnet.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(resnet.fc.in_features, 7),
    nn.Softmax(dim=1)
)
resnet = resnet.to(device)


EPOCH = 30
criterion = nn.CrossEntropyLoss()
opt = optim.Adam(resnet.parameters(), lr = 0.001)

resnet_trained, history = train_model(dataloaders['Others'], resnet, criterion, opt, EPOCH)

In [None]:
torch.save(resnet_trained, './model/resnet.pth')
plot_loss_acc(history)

In [None]:
PATH = './model/resnet.pth'
resnet = torch.load(PATH)
renderer(resnet, data_transforms['Others'])

In [None]:
model = models.vgg11_bn(weights='DEFAULT')
for params in model.parameters():
    params.requires_grad = False

model.classifier = nn.Sequential(
    nn.Linear(in_features=25088, out_features=4096, bias=True),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=4096, out_features=7, bias=True))

model = model.to(device)


opt = optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

EPOCH = 10
model_trained, history = train_model(dataloaders['Others'], model, criterion, opt, EPOCH)

In [None]:
torch.save(model_trained, './model/vgg.pth')
plot_loss_acc(history)

In [None]:
def evaluate(model, data):
    y = []
    y_pred = []
    model.eval()
    
    with tqdm(data, unit='batch', position=0, leave=True) as pbar:
        for img, lbl in pbar:

            pbar.set_description(f"Evaluating")

            img = img.to(device)
            lbl = lbl.to(device)
            outputs = model(img)
            _, preds = torch.max(outputs, 1)
            y = y + [*np.array(lbl.cpu())]
            y_pred = y_pred + [*np.array(preds.cpu())]
    
    print(classification_report(y, y_pred))

In [None]:
PATH = './model/vgg.pth'
vgg = torch.load(PATH)
evaluate(vgg, dataloaders['Others']['validation'])

In [None]:
PATH = './model/resnet.pth'
vgg = torch.load(PATH)
evaluate(vgg, dataloaders['Others']['validation'])

In [None]:
PATH = './model/model.pth'
cnn = torch.load(PATH)
evaluate(cnn, dataloaders['Model']['validation'])

In [None]:
PATH = './model/model.pth'
model = torch.load(PATH)
renderer(vgg, data_transforms['Model'])