In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sys
import os

from tqdm.autonotebook import tqdm as tqdm

import neurokit2 as nk
from neurokit2.eda import eda_process


In [2]:
sys.path.append("'D:\CODING\Emotion_Recognition_Project\EDA-activities")
os.getcwd()

os.chdir("..")

from eda_helper import Processing

os.chdir("./notebooks")

---

In [3]:
def classifier_4_class(val, aro):
    """
    Classifies the input into one of four categories based on valence and arousal.
    :param val: Valence value
    :param aro: Arousal value
    :return: A string representing the category ('HH', 'HL', 'LH', 'LL')
    """
    if val > 5 and aro > 6.5:
        return "HH"  # High Valence, High Arousal
    elif val > 5 and aro <= 6.5:
        return "HL"  # High Valence, Low Arousal
    elif val <= 5 and aro > 6.5:
        return "LH"  # Low Valence, High Arousal
    else:  # val <= 5 and aro <= 6.5
        return "LL"  # Low Valence, Low Arousal


def classifier_2_class_arousal(aro):
    """
    Classifies the input into one of two categories based on arousal.
    :param aro: Arousal value
    :return: A string representing the category ('H', 'L')
    """
    if aro > 6.5:
        return "H"  #  High Arousal
    else:  # aro <= 6.5
        return "L"  # Low Arousal

Flow: *Load the Data* ==> *Apply the feature extraction* ==> *prepare the Data frame of labels* ==> *concat the features dataframe* ==> *Normalize the Data* ==> *write the data*

In [None]:
for i in tqdm(range(1, 31), desc="Generating Neurokit Processed Data:"):
    sub_data = pd.read_csv(
        f"../CASE_full/CASE_full/data/interpolated/physiological/sub_{i}.csv"
    )[::50]
    sub_labels = pd.read_csv(
        f"../CASE_full/CASE_full/data/interpolated/annotations/sub_{i}.csv"
    )
    sub_analysis = eda_process(
        sub_data["gsr"], sampling_rate=1000 / 50, method="neurokit"
    )[0]

    # Create the Data Frame for video and labels from original subject dataset
    final_sub_data = pd.merge(
        pd.DataFrame(
            {
                "time": sub_data["daqtime"],
                "gsr": sub_data["gsr"],
                "video": sub_data["video"],
            }
        ),
        pd.DataFrame(
            {
                "valence": sub_labels["valence"],
                "arousal": sub_labels["arousal"],
                "time": sub_labels["jstime"],
            }
        ),
        on="time",
        how="inner",
    )

    # Merge the Neruokit analyzed Features
    final_sub_data = pd.concat(
        [final_sub_data, sub_analysis[["EDA_Clean", "EDA_Phasic", "EDA_Tonic"]]], axis=1
    )

    # Add the 4 class and 2 class arousal labels
    final_sub_data["class_4"] = final_sub_data.apply(
        lambda row: classifier_4_class(val=row["valence"], aro=row["arousal"]), axis=1
    )
    final_sub_data["class_2_arousal"] = final_sub_data.apply(
        lambda row: classifier_2_class_arousal(aro=row["arousal"]), axis=1
    )

    # Write the Data
    final_sub_data.to_csv(f"../preprocessed_data/sub_{i}.csv", index=False)
    print(len(final_sub_data))

Generating Neurokit Processed Data::   0%|          | 0/30 [00:00<?, ?it/s]

49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032
49032


---

## Working on Windowed Data From Here

Generating the windowed data for the Data of the video id: [5,6,7,8].

In [3]:
# Selecting the videos
data = pd.read_csv("../preprocessed_data/sub_1.csv")
data = data[data["video"].isin([5,6,7,8])]
data

Unnamed: 0,time,gsr,video,valence,arousal,EDA_Clean,EDA_Phasic,EDA_Tonic,class_4,class_2_arousal
15316,765800,4.948,5,5.0,5.001,4.949262,-0.007233,4.956495,LL,L
15317,765850,4.948,5,5.0,5.000,4.950045,-0.006510,4.956555,LL,L
15318,765900,4.956,5,5.0,5.000,4.949724,-0.006894,4.956618,LL,L
15319,765950,4.941,5,5.0,5.000,4.949175,-0.007509,4.956684,LL,L
15320,766000,4.948,5,5.0,5.000,4.949311,-0.007441,4.956752,LL,L
...,...,...,...,...,...,...,...,...,...,...
46627,2331350,6.959,8,0.5,9.500,6.945963,0.031642,6.914321,LH,H
46628,2331400,6.943,8,0.5,9.500,6.947660,0.031422,6.916238,LH,H
46629,2331450,6.943,8,0.5,9.500,6.949633,0.031482,6.918151,LH,H
46630,2331500,6.963,8,0.5,9.500,6.952043,0.031983,6.920061,LH,H


In [4]:
from eda_helper import Processing

In [7]:
def prepare_windows_with_labels(
    data,
    x_column="EDA_Phasic",
    y_columns=["arousal", "valence"],
    y_aggregator = "mean",
    window_size=100,
    window_gap=1,
):
    # prepare the x column
    x_df = Processing.generate_windows_df(data, window_size, window_gap, x_column)
    
    # prepare the y columns
    y_dfs = []
    for y_column in tqdm(y_columns, desc="Preparing Y Columns:"):
        y_df = Processing.generate_windows_df(data, window_size, window_gap, y_column)
        # Handle the aggregation based on the type of y_aggregator
        if isinstance(y_aggregator, str): # for standard functions
            y_df = y_df.agg(y_aggregator, axis=1)
        else:
            # For custom functions, use apply to avoid the FutureWarning
            y_df = y_df.apply(y_aggregator, axis=1)
        assert type(y_df) == pd.Series
        y_dfs.append(y_df)
    y_df = pd.concat(y_dfs, axis=1)
    y_df.columns = y_columns
    
    assert x_df.shape[0] == y_df.shape[0]
    # merge the x and y columns
    final_df = pd.concat([x_df,y_df], axis=1)
    
    
    return final_df

In [13]:
test = prepare_windows_with_labels(data,y_aggregator=lambda x: x.mean())
test

generating window:   0%|          | 0/12553 [00:00<?, ?it/s]

preparing dataframe:   0%|          | 0/12553 [00:00<?, ?it/s]

Preparing Y Columns::   0%|          | 0/2 [00:00<?, ?it/s]

generating window:   0%|          | 0/12553 [00:00<?, ?it/s]

preparing dataframe:   0%|          | 0/12553 [00:00<?, ?it/s]

generating window:   0%|          | 0/12553 [00:00<?, ?it/s]

preparing dataframe:   0%|          | 0/12553 [00:00<?, ?it/s]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,arousal,valence
0,-0.007233,-0.006510,-0.006894,-0.007509,-0.007441,-0.006344,-0.004470,-0.002445,-0.001062,-0.000864,...,-0.020322,-0.015677,-0.008927,-0.002985,-0.001118,-0.004397,-0.010463,-0.015135,5.00001,5.0
1,-0.006510,-0.006894,-0.007509,-0.007441,-0.006344,-0.004470,-0.002445,-0.001062,-0.000864,-0.001541,...,-0.015677,-0.008927,-0.002985,-0.001118,-0.004397,-0.010463,-0.015135,-0.015732,5.00000,5.0
2,-0.006894,-0.007509,-0.007441,-0.006344,-0.004470,-0.002445,-0.001062,-0.000864,-0.001541,-0.001984,...,-0.008927,-0.002985,-0.001118,-0.004397,-0.010463,-0.015135,-0.015732,-0.013090,5.00000,5.0
3,-0.007509,-0.007441,-0.006344,-0.004470,-0.002445,-0.001062,-0.000864,-0.001541,-0.001984,-0.001299,...,-0.002985,-0.001118,-0.004397,-0.010463,-0.015135,-0.015732,-0.013090,-0.010441,5.00000,5.0
4,-0.007441,-0.006344,-0.004470,-0.002445,-0.001062,-0.000864,-0.001541,-0.001984,-0.001299,0.000297,...,-0.001118,-0.004397,-0.010463,-0.015135,-0.015732,-0.013090,-0.010441,-0.010338,5.00000,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12548,-0.046687,-0.047574,-0.045571,-0.042656,-0.040503,-0.039939,-0.041246,-0.044278,-0.048071,-0.050984,...,0.035378,0.036627,0.036440,0.035312,0.033978,0.032879,0.032114,0.031642,9.50000,0.5
12549,-0.047574,-0.045571,-0.042656,-0.040503,-0.039939,-0.041246,-0.044278,-0.048071,-0.050984,-0.051961,...,0.036627,0.036440,0.035312,0.033978,0.032879,0.032114,0.031642,0.031422,9.50000,0.5
12550,-0.045571,-0.042656,-0.040503,-0.039939,-0.041246,-0.044278,-0.048071,-0.050984,-0.051961,-0.051706,...,0.036440,0.035312,0.033978,0.032879,0.032114,0.031642,0.031422,0.031482,9.50000,0.5
12551,-0.042656,-0.040503,-0.039939,-0.041246,-0.044278,-0.048071,-0.050984,-0.051961,-0.051706,-0.052110,...,0.035312,0.033978,0.032879,0.032114,0.031642,0.031422,0.031482,0.031983,9.50000,0.5
