In [None]:
%pip install pandas
%pip install opencv-python

In [1]:
import pandas as pd
import cv2
import os
import numpy as np
import plotly.express as px

In [2]:
fer2013_df = pd.read_csv("../../data/raw/fer2013/fer2013.csv", index_col=False)
fer2013_df["image_name"] = ""
fer2013_df.rename(columns={"emotion": "expression_label"}, inplace=True)
fer2013_df.drop(columns="Usage", inplace=True)

fer2013_df.head()

Unnamed: 0,expression_label,pixels,image_name
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,


In [3]:
expression_labels = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Sad",
    5: "Surprise",
    6: "Neutral",
}

In [4]:
def get_sentiment(expression):
    new_classes = {
        "positive": ["happy"],
        "neutral": ["neutral", "surprise"],
        "negative": ["angry", "disgust", "fear", "sad"],
    }
    if expression.lower() in new_classes["positive"]:
        return "positive"
    elif expression.lower() in new_classes["neutral"]:
        return "neutral"
    elif expression.lower() in new_classes["negative"]:
        return "negative"
    else:
        return "unknown"

In [5]:
for index, row in fer2013_df.iterrows():
    expression = expression_labels.get(row["expression_label"]).lower()
    sentiment = get_sentiment(expression)
    fer2013_df.loc[index, "expression_label"] = sentiment
    fer2013_df.loc[index, "image_name"] = str(
        "fer2013_" + expression + "_" + str(index) + ".jpg"
    )


fer2013_df.rename(columns={"expression_label": "sentiment"}, inplace=True)
fer2013_df["sentiment"] = fer2013_df["sentiment"].astype("category")


fer2013_df.head()

  fer2013_df.loc[index, "expression_label"] = sentiment


Unnamed: 0,sentiment,pixels,image_name
0,negative,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,fer2013_angry_0.jpg
1,negative,151 150 147 155 148 133 111 140 170 174 182 15...,fer2013_angry_1.jpg
2,negative,231 212 156 164 174 138 161 173 182 200 106 38...,fer2013_fear_2.jpg
3,negative,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,fer2013_sad_3.jpg
4,neutral,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,fer2013_neutral_4.jpg


In [None]:
# processed_folder_path = "../../data/processed/fer2013/cropped/"
# if not os.path.exists(processed_folder_path):
#     os.makedirs(processed_folder_path)
# for index, row in fer2013_df.iterrows():
#     image_string = row["pixels"].split(" ")
#     image_data = np.asarray(image_string, dtype=np.uint8).reshape(48, 48)
#     cv2.imwrite(os.path.join(processed_folder_path, row["image_name"]), image_data)

# Combine FER2013 with EXPW


In [6]:
expw_label_path = "../../data/processed/expw/new_label.csv"

expw_label_df = pd.read_csv(expw_label_path, index_col=False)

expw_label_df.head()

Unnamed: 0,image_name,expression_label
0,angry_0.jpg,angry
1,angry_1.jpg,angry
2,fear_2.jpg,fear
3,happy_3.jpg,happy
4,angry_4.jpg,angry


In [7]:
for index, row in expw_label_df.iterrows():
    sentiment = get_sentiment(row["expression_label"])
    expw_label_df.loc[index, "expression_label"] = sentiment

expw_label_df.rename(columns={"expression_label": "sentiment"}, inplace=True)
expw_label_df["sentiment"] = expw_label_df["sentiment"].astype("category")

expw_label_df.head()

Unnamed: 0,image_name,sentiment
0,angry_0.jpg,negative
1,angry_1.jpg,negative
2,fear_2.jpg,negative
3,happy_3.jpg,positive
4,angry_4.jpg,negative


In [8]:
fer2013_label_df = fer2013_df.drop(columns="pixels")

fer2013_label_df.head()

Unnamed: 0,sentiment,image_name
0,negative,fer2013_angry_0.jpg
1,negative,fer2013_angry_1.jpg
2,negative,fer2013_fear_2.jpg
3,negative,fer2013_sad_3.jpg
4,neutral,fer2013_neutral_4.jpg


In [9]:
combined_df = pd.concat([expw_label_df, fer2013_label_df], ignore_index=True)

combined_df.head()

Unnamed: 0,image_name,sentiment
0,angry_0.jpg,negative
1,angry_1.jpg,negative
2,fear_2.jpg,negative
3,happy_3.jpg,positive
4,angry_4.jpg,negative


In [11]:
grouped_count = combined_df.groupby(["sentiment"])["sentiment"].count()

grouped_count

  grouped_count = combined_df.groupby(["sentiment"])["sentiment"].count()


sentiment
negative    36011
neutral     52143
positive    39526
Name: sentiment, dtype: int64

In [12]:
fig = px.bar(
    x=grouped_count.index,
    y=grouped_count.values,
    color=grouped_count.index,
)

fig.update_layout(
    title="Total images for each label (expw & fer2013 combined)",
    xaxis_title="Expressions",
    yaxis_title="Count",
    height=600,
    width=800,
)

fig.update_traces(texttemplate="%{y}", textposition="inside")

fig.show()

In [13]:
neutral_rows_to_drop = combined_df[combined_df["sentiment"] == "neutral"].sample(
    frac=0.25, random_state=42
)

In [14]:
undersampled_df = combined_df.drop(neutral_rows_to_drop.index)

In [16]:
grouped_undersampled_count = undersampled_df.groupby(["sentiment"])[
    "sentiment"
].count()

grouped_undersampled_count





sentiment
negative    36011
neutral     39107
positive    39526
Name: sentiment, dtype: int64

In [17]:
fig = px.bar(
    x=grouped_undersampled_count.index,
    y=grouped_undersampled_count.values,
    color=grouped_undersampled_count.index,
)

fig.update_layout(
    title="Total images for each label (expw & fer2013 undersampled)",
    xaxis_title="Expressions",
    yaxis_title="Count",
    height=600,
    width=800,
)

fig.update_traces(texttemplate="%{y}", textposition="inside")

fig.show()

In [18]:
label_path = "../../data/processed/sentiment/sentiment_label.csv"
combined_img_folder_path = "../../data/processed/sentiment/img/"

if not os.path.exists(combined_img_folder_path):
    os.makedirs(combined_img_folder_path)

undersampled_df.to_csv(label_path, index=False)

In [19]:
expw_images_folder_path = "../../data/processed/expw/cropped/"
fer2013_images_folder_path = "../../data/processed/fer2013/cropped/"

In [20]:
import shutil


for index, row in undersampled_df.iterrows():
    dst = combined_img_folder_path + row["image_name"]
    if "fer2013" in row["image_name"]:
        src = fer2013_images_folder_path + row["image_name"]
    else:
        src = expw_images_folder_path + row["image_name"]

    shutil.copy(src, dst)