# Reduced Emotion labels
 Reduce the number of emotion labels in our dataset to 7.

In [1]:
import pandas as pd

df = pd.read_csv('../Datasets/full_dataset.csv', index_col=0)
df

Unnamed: 0,idiom_id,idiom,sentence,emotion
0,0,American Dream,"Sally finally achieved the American Dream, liv...",Happiness
1,0,American Dream,He felt that the American Dream was slipping t...,Frustration
2,0,American Dream,She couldn't help but feel a sense of pride wh...,Pride
3,0,American Dream,They were envious of their neighbor's seemingl...,Envy
4,0,American Dream,The immigrant family tirelessly pursued the Am...,Hope
...,...,...,...,...
10145,2029,laughing stock,They made me the laughing stock of the whole o...,Humiliation
10146,2029,laughing stock,He couldn't believe he'd become the town's lau...,Sadness
10147,2029,laughing stock,The new employee's constant mistakes turned hi...,Frustration
10148,2029,laughing stock,"As a politician, it's crucial to avoid becomin...",Fear


In [2]:
emotion_list = [
    'Anger', 'Resentment', 'Frustration', 'Hate', 'Disgust', 'Boredom',
    'Reluctance', 'Sadness', 'Pity', 'Loneliness', 'Humiliation', 'Longing',
    'Envy', 'Guilt', 'Regret', 'Shame', 'Fear', 'Anxiety', 'Doubt',
    'Desperation', 'Confusion', 'Shock', 'Pleasure', 'Serenity', 'Relief',
    'Happiness', 'Lust', 'Affection', 'Gratitude', 'Admiration', 'Pride',
    'Determination', 'Fascination', 'Surprise', 'Excitement', 'Hope'
]

print(', '.join(emotion_list))

Anger, Resentment, Frustration, Hate, Disgust, Boredom, Reluctance, Sadness, Pity, Loneliness, Humiliation, Longing, Envy, Guilt, Regret, Shame, Fear, Anxiety, Doubt, Desperation, Confusion, Shock, Pleasure, Serenity, Relief, Happiness, Lust, Affection, Gratitude, Admiration, Pride, Determination, Fascination, Surprise, Excitement, Hope


In [3]:
# drop all rows with emotion not in target 36 emotion_list
df_filtered = df[df['emotion'].isin(emotion_list)]

print(f'Dropped {len(df) - len(df_filtered)} rows')

Dropped 465 rows


In [4]:
# map emotions to 7 categories
emotion_map = {
    'Anger': ['Anger', 'Resentment', 'Frustration', 'Hate', 'Disgust'],
    'Boredom': ['Boredom', 'Reluctance'],
    'Sadness': ['Sadness', 'Pity', 'Loneliness', 'Humiliation'],
    'Desire': ['Longing', 'Envy'],
    'Remorse': ['Guilt', 'Regret', 'Shame'],
    'Fear': ['Fear', 'Anxiety', 'Doubt', 'Desperation', 'Confusion', 'Shock'],
    'Joy': [
        'Pleasure', 'Serenity', 'Relief', 'Happiness', 'Lust', 'Affection', 'Gratitude',
        'Admiration', 'Pride', 'Determination', 'Fascination', 'Surprise', 'Excitement', 'Hope'
    ]        
}

In [5]:
# validate emotion mapping
new_emotions = [item for sublist in emotion_map.values() for item in sublist]

for emotion in new_emotions:
    if emotion not in df_filtered.emotion.unique():
        print(emotion)

print()

for emotion in df_filtered.emotion.unique():
    if emotion not in new_emotions:
        print(emotion)




In [6]:
# create new column with reduced emotions
df_filtered['reduced_emotion'] = df_filtered['emotion'].map(lambda x: [k for k, v in emotion_map.items() if x in v][0])
df_filtered

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['reduced_emotion'] = df_filtered['emotion'].map(lambda x: [k for k, v in emotion_map.items() if x in v][0])


Unnamed: 0,idiom_id,idiom,sentence,emotion,reduced_emotion
0,0,American Dream,"Sally finally achieved the American Dream, liv...",Happiness,Joy
1,0,American Dream,He felt that the American Dream was slipping t...,Frustration,Anger
2,0,American Dream,She couldn't help but feel a sense of pride wh...,Pride,Joy
3,0,American Dream,They were envious of their neighbor's seemingl...,Envy,Desire
4,0,American Dream,The immigrant family tirelessly pursued the Am...,Hope,Joy
...,...,...,...,...,...
10145,2029,laughing stock,They made me the laughing stock of the whole o...,Humiliation,Sadness
10146,2029,laughing stock,He couldn't believe he'd become the town's lau...,Sadness,Sadness
10147,2029,laughing stock,The new employee's constant mistakes turned hi...,Frustration,Anger
10148,2029,laughing stock,"As a politician, it's crucial to avoid becomin...",Fear,Fear


In [7]:
df_filtered.reduced_emotion.value_counts()

reduced_emotion
Joy        5190
Anger      1408
Fear       1335
Sadness     690
Desire      531
Remorse     294
Boredom     237
Name: count, dtype: int64

In [8]:
df_filtered.to_csv('../Datasets/reduced_emotions.csv')