In [4]:
import shutil
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

from emotions_utils import *

In [5]:
CORE_PATH = "../"
ORIG_PATH = f"{CORE_PATH}/emotions"
SAVE_LOGS_PATH = f"{CORE_PATH}/missclassified"

In [6]:
data = ImageFolder(ORIG_PATH)

# fervi

Source: https://github.com/GeorgiosIoannouCoder/fervi

In [30]:
!git clone https://github.com/GeorgiosIoannouCoder/fervi.git

Cloning into 'fervi'...


In [36]:
import tensorflow as tf

from tensorflow.keras import models                    
from tensorflow.keras import layers                      
from tensorflow.keras import Input                       
from tensorflow.keras.callbacks import EarlyStopping   
from tensorflow.keras.callbacks import ReduceLROnPlateau 
from tensorflow.keras.optimizers import Adam

In [37]:
def create_model():
    model = models.Sequential()

    model.add(Input((48, 48, 1)))

    model.add(layers.Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="valid"))
    model.add(layers.BatchNormalization(axis=3))
    model.add(layers.Activation("relu"))

    model.add(layers.Conv2D(64, (3, 3), strides=(1, 1), padding="same"))
    model.add(layers.BatchNormalization(axis=3))
    model.add(layers.Activation("relu"))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(64, (3, 3), strides=(1, 1), padding="valid"))
    model.add(layers.BatchNormalization(axis=3))
    model.add(layers.Activation("relu"))

    model.add(layers.Conv2D(128, (3, 3), strides=(1, 1), padding="same"))
    model.add(layers.BatchNormalization(axis=3))
    model.add(layers.Activation("relu"))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(128, (3, 3), strides=(1, 1), padding="valid"))
    model.add(layers.BatchNormalization(axis=3))
    model.add(layers.Activation("relu"))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Reshape((-1, 128)))

    model.add(layers.LSTM(128))

    model.add(layers.Reshape((-1, 64)))

    model.add(layers.LSTM(64))

    model.add(layers.Dense(200, activation="relu"))

    model.add(layers.Dropout(0.6))

    model.add(layers.Dense(7, activation="softmax"))

    adam_optimizer = Adam(learning_rate=0.0002)

    model.compile(optimizer = adam_optimizer, loss = "categorical_crossentropy", metrics=["accuracy"])

    return model

In [40]:
import pickle

with open("test_indices.pickle", "rb") as file:
    test_indices = pickle.load(file)
print(len(test_indices))

test_data = torch.utils.data.Subset(data, test_indices)
test_dataset = EmotionsDataset(
    test_data,
    transforms=transforms.Compose([transforms.Resize(48), transforms.Grayscale(), transforms.ToTensor()])
)

model = create_model()
model.load_weights("./fervi/model_weights.h5")
model.summary()

123


In [52]:
actual = []
pred = []
label_to_label = {
    0: 0,
    1: 2,
    2: 3,
    3: 4,
    4: 5,
    5: 6,
    6: 1,
}

for i in range(len(test_dataset)):
    img, label = test_dataset[i][0], test_dataset[i][1]
    
    numpy_image = img.permute(1, 2, 0).numpy()[None, :]
    prediction = model.predict(numpy_image)
    predicted = prediction.argmax()
    
    pred.append(label_to_label[predicted])
    actual.append(label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20

In [54]:
len(pred), len(actual)

(123, 123)

In [56]:
get_metrics_report(actual, pred)

{'Accuracy': 0.43902439024390244,
 'Precision_macro': 0.4800011446019849,
 'Precision_micro': 0.43902439024390244,
 'Recall_macro': 0.4129601976970398,
 'Recall_micro': 0.43902439024390244,
 'ROC_AUC': {0: 0.6995412844036697,
  1: 0.6573886639676112,
  2: 0.5455752212389381,
  3: 0.5972027972027972,
  4: 0.8279126213592233,
  5: 0.6140816326530613,
  6: 0.671917191719172}}

In [59]:
class_to_idx = data.class_to_idx
idx_to_class = {v: k for k, v in class_to_idx.items()}

class_to_idx

{'anger': 0,
 'contempt': 1,
 'disgust': 2,
 'fear': 3,
 'joy': 4,
 'sadness': 5,
 'wonder': 6}

In [60]:
get_classification_report(actual, pred, idx_to_class=idx_to_class)

anger emotion
Overall images: 14
Correctly predicted 7/14

contempt emotion
Overall images: 19
Correctly predicted 10/19

disgust emotion
Overall images: 10
Correctly predicted 1/10

fear emotion
Overall images: 13
Correctly predicted 3/13

joy emotion
Overall images: 20
Correctly predicted 17/20

sadness emotion
Overall images: 25
Correctly predicted 8/25

wonder emotion
Overall images: 22
Correctly predicted 8/22



In [None]:
inf_images = [data.imgs[x][0] for x in test_indices]
inf_labels = [data.imgs[x][1] for x in test_indices]

get_mistaken_images_report(inf_images, inf_labels, pred, "fervi", idx_to_class, SAVE_LOGS_PATH)

# dima806/facial_emotions_image_detection

Source: https://huggingface.co/dima806/facial_emotions_image_detection

In [21]:
import pickle

with open("test_indices.pickle", "rb") as file:
    test_indices = pickle.load(file)
print(len(test_indices))

test_data = torch.utils.data.Subset(data, test_indices)
test_dataset = EmotionsDataset(
    test_data,
    transforms=inf_transforms
)

123


In [22]:
class_to_idx = data.class_to_idx
idx_to_class = {v: k for k, v in class_to_idx.items()}

class_to_idx

{'anger': 0,
 'contempt': 1,
 'disgust': 2,
 'fear': 3,
 'joy': 4,
 'sadness': 5,
 'wonder': 6}

In [20]:
# Load model directly
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained("dima806/facial_emotions_image_detection")
model

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=7

In [32]:
label_to_id = {
    0: 5,
    1: 2,
    2: 0,
    3: 1,
    4: 3,
    5: 6,
    6: 4,
}

actual = []
pred = []

for i in range(len(test_dataset)):
    img, label = test_dataset[i][0], test_dataset[i][1]
    predicted = model(img[None, :]).logits.argmax().numpy().item()
    
    actual.append(label)
    pred.append(label_to_id[predicted])

In [34]:
len(pred)

123

In [35]:
get_metrics_report(actual, pred)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'Accuracy': 0.42276422764227645,
 'Precision_macro': 0.3839304862322765,
 'Precision_micro': 0.42276422764227645,
 'Recall_macro': 0.395835442752736,
 'Recall_micro': 0.42276422764227645,
 'ROC_AUC': {0: 0.5429226736566186,
  1: 0.7578441295546559,
  2: 0.5,
  3: 0.6534965034965036,
  4: 0.8611650485436894,
  5: 0.6046938775510204,
  6: 0.6293879387938793}}

In [36]:
get_classification_report(actual, pred, idx_to_class=idx_to_class)

anger emotion
Overall images: 14
Correctly predicted 3/14

contempt emotion
Overall images: 19
Correctly predicted 14/19

disgust emotion
Overall images: 10
Correctly predicted 0/10

fear emotion
Overall images: 13
Correctly predicted 6/13

joy emotion
Overall images: 20
Correctly predicted 16/20

sadness emotion
Overall images: 25
Correctly predicted 6/25

wonder emotion
Overall images: 22
Correctly predicted 7/22



In [None]:
inf_images = [data.imgs[x][0] for x in test_indices]
inf_labels = [data.imgs[x][1] for x in test_indices]

get_mistaken_images_report(inf_images, inf_labels, pred, "dima806_facial_emotions_image_detection", idx_to_class, SAVE_LOGS_PATH)

# DDAMFN

In [7]:
!git clone https://github.com/simon20010923/DDAMFN.git

Cloning into 'DDAMFN'...


In [15]:
import os
os.chdir("DDAMFN/")

In [19]:
!python affectnet_train.py --num_class 7 --epochs 10 --batch_size 16 --aff_path D:\diploma\AffectNetDataset

Whole train set size: 23916
Validation set size: 496
[Epoch 1] Training accuracy: 0.3710. Loss: 3.799. LR 0.000100
[Epoch 1] Validation accuracy:0.6230. Loss:1.331
best_acc:0.623
[Epoch 2] Training accuracy: 0.5387. Loss: 1.864. LR 0.000060
[Epoch 2] Validation accuracy:0.6290. Loss:1.258
best_acc:0.629
[Epoch 3] Training accuracy: 0.5882. Loss: 1.573. LR 0.000036
[Epoch 3] Validation accuracy:0.6694. Loss:1.150
best_acc:0.6694
Model saved.
[Epoch 4] Training accuracy: 0.6037. Loss: 1.447. LR 0.000022
[Epoch 4] Validation accuracy:0.6593. Loss:1.134
best_acc:0.6694
[Epoch 5] Training accuracy: 0.6238. Loss: 1.353. LR 0.000013
[Epoch 5] Validation accuracy:0.6835. Loss:1.107
best_acc:0.6835
Model saved.
[Epoch 6] Training accuracy: 0.6269. Loss: 1.312. LR 0.000008
[Epoch 6] Validation accuracy:0.6673. Loss:1.134
best_acc:0.6835
Model saved.
[Epoch 7] Training accuracy: 0.6325. Loss: 1.282. LR 0.000005
[Epoch 7] Validation accuracy:0.6956. Loss:1.101
best_acc:0.6956
Model saved.
[Epoch 8


  0%|          | 0/10 [00:00<?, ?it/s]
                                      

  0%|          | 0/10 [02:38<?, ?it/s]
                                      

  0%|          | 0/10 [03:03<?, ?it/s]
                                      

  0%|          | 0/10 [03:03<?, ?it/s]
 10%|█         | 1/10 [03:03<27:33, 183.74s/it]
                                               

 10%|█         | 1/10 [05:40<27:33, 183.74s/it]
                                               

 10%|█         | 1/10 [06:05<27:33, 183.74s/it]
                                               

 10%|█         | 1/10 [06:05<27:33, 183.74s/it]
 20%|██        | 2/10 [06:05<24:18, 182.35s/it]
                                               

 20%|██        | 2/10 [08:43<24:18, 182.35s/it]
                                               

 20%|██        | 2/10 [09:08<24:18, 182.35s/it]
                                               

 20%|██        | 2/10 [09:08<24:18, 182.35s/it]
                                             

In [21]:
from DDAMFN.networks.DDAM import DDAMNet

best = DDAMNet()
checkpoint = torch.load(r"D:\diploma\notebooks\DDAMFN\checkpoints\affecnet7_epoch7_acc0.6956.pth")
best.load_state_dict(checkpoint['model_state_dict'])
best.eval()
best = best.to("cuda")

In [23]:
os.chdir("..")

In [24]:
import pickle

with open("test_indices.pickle", "rb") as file:
    test_indices = pickle.load(file)
print(len(test_indices))

test_data = torch.utils.data.Subset(data, test_indices)
orig_dataset = EmotionsDataset(
    test_data,
    transforms=transforms.Compose([
        transforms.Resize((112, 112)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])
)

123


In [27]:
actual = []
pred = []

for i in range(len(orig_dataset)):
    img, label = orig_dataset[i][0], orig_dataset[i][1]
    img = img.to("cuda")
    
    numpy_image = img[None, :]
    prediction, _, _ = best(numpy_image)
    predicted = prediction.argmax()
    
    pred.append(predicted.cpu().item())
    actual.append(label)

In [28]:
get_metrics_report(actual, pred)

{'Accuracy': 0.7398373983739838,
 'Precision_macro': 0.7258574236957694,
 'Precision_micro': 0.7398373983739838,
 'Recall_macro': 0.7392477447364666,
 'Recall_micro': 0.7398373983739838,
 'ROC_AUC': {0: 0.8030799475753604,
  1: 0.8491902834008097,
  2: 0.8734513274336284,
  3: 0.8143356643356644,
  4: 0.9055825242718447,
  5: 0.829795918367347,
  6: 0.8616111611161116}}