In [None]:
%pip install pandas
%pip install scikit-learn
%pip install plotly
%pip install numpy
%pip install matplotlib
%pip install nbformat

In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import plotly.express as px
from sklearn.model_selection import train_test_split
import random


In [2]:
emotion_labels = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Sad",
    5: "Surprise",
    6: "Neutral",
}

print(emotion_labels)

{0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}


In [3]:
fer2013_df = pd.read_csv("../../data/raw/fer2013.csv", index_col=False)
fer2013_df.drop(columns="Usage", inplace=True)

fer2013_df.head()

Unnamed: 0,emotion,pixels
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,0,151 150 147 155 148 133 111 140 170 174 182 15...
2,2,231 212 156 164 174 138 161 173 182 200 106 38...
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


In [4]:
train_ratio = 0.80
test_ratio = 0.10
validation_ratio = 0.10

X = fer2013_df["pixels"].values
y = fer2013_df["emotion"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=validation_ratio / (train_ratio + test_ratio)
)

print(len(X_train))
print(len(X_test))
print(len(X_valid))

28709
3589
3589


In [5]:
random.seed(42)
random_numbers = [random.randint(0, len(X_train)) for _ in range(10)]

img_list = []
for idx in random_numbers:
    image_string = X_train[idx].split(' ')
    image_data = np.asarray(image_string, dtype=np.uint8).reshape(48,48)
    img = Image.fromarray(image_data)
    img_list.append(img)


In [6]:
label_counts = {label: np.sum(y_train == label) for label in np.unique(y_train)}

labels = {}
for key, value in label_counts.items():
    labels[emotion_labels.get(key)] = value

labels

{'Angry': 3977,
 'Disgust': 431,
 'Fear': 4098,
 'Happy': 7187,
 'Sad': 4870,
 'Surprise': 3197,
 'Neutral': 4949}

In [7]:
fig = px.bar(x=list(labels.keys()), y=list(labels.values()), color=list(labels.keys()))

fig.update_layout(
    title="Total images for each label in training set",
    xaxis_title="Emotion",
    yaxis_title="Count",
    height=600,
    width=800,
)

fig.update_traces(texttemplate="%{y}", textposition="inside")

fig.show()