In [82]:
import pandas as pd

In [83]:
df_ckplus_dataset = pd.read_csv('dataset/ckplus_dataset.csv')

In [84]:
df_anger = df_ckplus_dataset[df_ckplus_dataset['peak_emotion'] == 'anger']
df_contempt = df_ckplus_dataset[df_ckplus_dataset['peak_emotion'] == 'contempt']
df_disgust = df_ckplus_dataset[df_ckplus_dataset['peak_emotion'] == 'disgust']
df_fear = df_ckplus_dataset[df_ckplus_dataset['peak_emotion'] == 'fear']
df_happiness = df_ckplus_dataset[df_ckplus_dataset['peak_emotion'] == 'happiness']
df_sadness = df_ckplus_dataset[df_ckplus_dataset['peak_emotion'] == 'sadness']
df_surprise = df_ckplus_dataset[df_ckplus_dataset['peak_emotion'] == 'surprise']

In [85]:
anger_subjects = set(df_anger['subject_id'])
contempt_subjects = set(df_contempt['subject_id'])
disgust_subjects = set(df_disgust['subject_id'])
fear_subjects = set(df_fear['subject_id'])
happiness_subjects = set(df_happiness['subject_id'])
sadness_subjects = set(df_sadness['subject_id'])
surprise_subjects = set(df_surprise['subject_id'])

In [86]:
print('Anger subjects:\t\t', len(anger_subjects), "(45)")
print('Contempt subjects:\t', len(contempt_subjects), "(18)")
print('Disgust subjects:\t', len(disgust_subjects), "(59)")
print('Fear subjects:\t\t', len(fear_subjects), "(25)")
print('Happiness subjects:\t', len(happiness_subjects), "(69)")
print('Sadness subjects:\t', len(sadness_subjects), "(28)")
print('Surprise subjects:\t', len(surprise_subjects), "(83)")

Anger subjects:		 58 (45)
Contempt subjects:	 26 (18)
Disgust subjects:	 72 (59)
Fear subjects:		 55 (25)
Happiness subjects:	 90 (69)
Sadness subjects:	 67 (28)
Surprise subjects:	 92 (83)


In [87]:
# df_anger.to_csv('dataset/anger_all.csv', index=False)
# df_contempt.to_csv('dataset/contempt_all.csv', index=False)
# df_disgust.to_csv('dataset/disgust_all.csv', index=False)
# df_fear.to_csv('dataset/fear_all.csv', index=False)
# df_happiness.to_csv('dataset/happiness_all.csv', index=False)
# df_sadness.to_csv('dataset/sadness_all.csv', index=False)
# df_surprise.to_csv('dataset/surprise_all.csv', index=False)

In [88]:
def filter_dataset(df):
    df = df.copy()

    df1 = (
        df[["subject_id", "session_id", "peak_emotion", "ckplus_labelled"]]
        .drop_duplicates()
        .reset_index(drop=True)
    )
    counts = (
        df.groupby(["subject_id", "session_id"])
        .size()
        .reset_index(name="count")["count"]
        .values.tolist()
    )
    df1["count"] = counts

    subject_ids = df1["subject_id"].unique()
    keep = []
    for subject_id in subject_ids:
        df2 = df1[df1["subject_id"] == subject_id]

        # print(subject_id)
        # print(df2.values)

        keeping = None
        for subject_id, session_id, peak_emotion, ckplus_labelled, count in df2.values:
            if ckplus_labelled == 1:
                keeping = (subject_id, session_id)
                break

        if keeping is None:
            (subject_id, session_id, peak_emotion, ckplus_labelled, count) = df2[
                df2["count"] == df2["count"].max()
            ].values[0]
            keeping = (subject_id, session_id)

        # print(keeping)
        keep.append(keeping)
        # print()

    df_filtered = df1[df1[["subject_id", "session_id"]].apply(tuple, axis=1).isin(keep)].reset_index(drop=True)
    df_filtered = df_filtered.sort_values(
        by=["ckplus_labelled", "count"], ascending=[False, False]
    ).reset_index(drop=True)

    return df_filtered

In [89]:
df_filtered_anger = filter_dataset(df_anger)
df_filtered_contempt = filter_dataset(df_contempt)
df_filtered_disgust = filter_dataset(df_disgust)
df_filtered_fear = filter_dataset(df_fear)
df_filtered_happiness = filter_dataset(df_happiness)
df_filtered_sadness = filter_dataset(df_sadness)
df_filtered_surprise = filter_dataset(df_surprise)

In [90]:
print("Filtered Anger subjects:\t", len(df_filtered_anger), "(45)")
print("Filtered Contempt subjects:\t", len(df_filtered_contempt), "(18)")
print("Filtered Disgust subjects:\t", len(df_filtered_disgust), "(59)")
print("Filtered Fear subjects:\t\t", len(df_filtered_fear), "(25)")
print("Filtered Happiness subjects:\t", len(df_filtered_happiness), "(69)")
print("Filtered Sadness subjects:\t", len(df_filtered_sadness), "(28)")
print("Filtered Surprise subjects:\t", len(df_filtered_surprise), "(83)")


Filtered Anger subjects:	 58 (45)
Filtered Contempt subjects:	 26 (18)
Filtered Disgust subjects:	 72 (59)
Filtered Fear subjects:		 55 (25)
Filtered Happiness subjects:	 90 (69)
Filtered Sadness subjects:	 67 (28)
Filtered Surprise subjects:	 92 (83)


In [91]:
df_filtered_anger.to_csv("dataset/anger_filtered.csv", index=False)
df_filtered_contempt.to_csv("dataset/contempt_filtered.csv", index=False)
df_filtered_disgust.to_csv("dataset/disgust_filtered.csv", index=False)
df_filtered_fear.to_csv("dataset/fear_filtered.csv", index=False)
df_filtered_happiness.to_csv("dataset/happiness_filtered.csv", index=False)
df_filtered_sadness.to_csv("dataset/sadness_filtered.csv", index=False)
df_filtered_surprise.to_csv("dataset/surprise_filtered.csv", index=False)