In [None]:
### ========================================================================================================================================
## Name:        Saputra Rizky Johan (사푸트라)
## Institution: Seoul National University, South Korea
## Purpose:     Undergraduate Thesis
## Department:  Computer Science and Engineering
## College:     Engineering
## Advisor:     Prof. Lee Changun (이창건 교수)
## Title:       The Application of Facial Emotion Recognition (FER) in the Detection and Measurement of Burnout and Prolonged Stress Levels
## Module:      graph_analyzer.ipynb (A notebook file to generate the burnout and probabilistic graphs vs time for thesis purposes)
### ========================================================================================================================================

# =============================================================================================
# SETUP
# =============================================================================================
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Declare the directory and I/O files configurations
DATA_DIR = "data_sessions"                 # folder containing *_frames.csv
SAVE_DIR = "figures"                       # where to write PNGs
SMOOTH_WINDOW = 5                          # set to 1 to disable smoothing
USE_TIME = True                            # if True uses Timestamp on x-axis; else FrameIndex
os.makedirs(SAVE_DIR, exist_ok=True)

# Declare the emotions within the order in the "Probs" CSV column (7 comma-separated probs)
EMOTION_ORDER = ["Angry", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]
NEGATIVE_SET = {"Angry", "Disgust", "Fear", "Sad"}

# =============================================================================================
# IMPLEMENTATION
# =============================================================================================
# Declare a parser for the probabilities column in order to separate columns and its negative sum
def parse_probs_column(df):
    # Split the comma-separated Probs into 7 float columns
    probs = df["Probs"].str.split(",", expand=True).astype(float)
    probs.columns = [f"P_{e}" for e in EMOTION_ORDER]
    out = pd.concat([df.reset_index(drop=True), probs], axis=1)
    # Set the negative-valence probabilities
    out["P_negative"] = out[[f"P_{e}" for e in EMOTION_ORDER if e in NEGATIVE_SET]].sum(axis=1)
    return out

# Load all the files with *_frames.csv name formattings
frames_files = [f for f in os.listdir(DATA_DIR) if f.endswith("_frames.csv")]
if not frames_files:
    raise FileNotFoundError(f"No *_frames.csv files found in {DATA_DIR}")

# Initialize the ist of dictionaries for the participants
sessions = []
for fname in sorted(frames_files):
    # Set the participant id by retrieving the prefix before first underscore for graph labels
    participant = re.split(r"[_]", fname, maxsplit=1)[0]
    df = pd.read_csv(os.path.join(DATA_DIR, fname))
    # Normalize the burnout to a [0,1] range if the results are set as 0-100 integers
    if df["Burnout"].max() > 1.5:
        df["Burnout_norm"] = df["Burnout"] / 100.0
    else:
        df["Burnout_norm"] = df["Burnout"].astype(float)

    # Parse the probabilities and compute its negative valences
    df = parse_probs_column(df)

    # Declare the x-axis of the graph
    if USE_TIME and "Timestamp" in df.columns:
        # Convert to pandas datetime and utilize seconds offset for a coherent axis
        t = pd.to_datetime(df["Timestamp"])
        df["_x"] = (t - t.iloc[0]).dt.total_seconds()
        x_label = "Time (s)"
    else:
        df["_x"] = df["FrameIndex"].astype(int)
        x_label = "Frame"

    # Declare the smoothing
    if SMOOTH_WINDOW > 1:
        df["Burnout_smooth"] = df["Burnout_norm"].rolling(SMOOTH_WINDOW, min_periods=1).mean()
        df["Pneg_smooth"] = df["P_negative"].rolling(SMOOTH_WINDOW, min_periods=1).mean()
    else:
        df["Burnout_smooth"] = df["Burnout_norm"]
        df["Pneg_smooth"] = df["P_negative"]

    sessions.append({"participant": participant, "df": df, "x_label": x_label})

# Declare the figure for the Burnout Score Trajectory for the specified number of participants
plt.figure(figsize=(10, 6))
for s in sessions:
    d = s["df"]
    plt.plot(d["_x"], d["Burnout_smooth"], label=s["participant"])
plt.xlabel(sessions[0]["x_label"])
plt.ylabel("Burnout Score (normalized)")
plt.title("Figure 3. Burnout Score Trajectory by Participant")
plt.legend(loc="best", title="Participant")
plt.tight_layout()
plt.savefig(os.path.join(SAVE_DIR, "burnout_trajectory.png"), dpi=300, bbox_inches="tight")
plt.show()

# Declare the figure for the Negative-valence Probability Time Series for the specified number of participants
plt.figure(figsize=(10, 6))
for s in sessions:
    d = s["df"]
    plt.plot(d["_x"], d["Pneg_smooth"], label=s["participant"])
plt.xlabel(sessions[0]["x_label"])
plt.ylabel("Negative-valence Probability")
plt.title("Figure 4. Negative-valence Probability Over Time by Participant")
plt.legend(loc="best", title="Participant")
plt.tight_layout()
plt.savefig(os.path.join(SAVE_DIR, "neg_valence_timeseries.png"), dpi=300, bbox_inches="tight")
plt.show()

### ===================================================================================================================================
## END: Add implementations if necessary
### ===================================================================================================================================