In [None]:
# Flips emotion to designate number mapping
# Useful transition paraphrasing pairs separation
import pandas as pd
import os

emotion_shorthand_mapping = {
    'neutral': '0',
    'amusement': '1',
    'excitement': '2',
    'joy': '3',
    'love': '4',
    'optimism': '5',
    'desire': '6',
    'caring': '7',
    'pride': '8',
    'admiration': '9',
    'gratitude': '10',
    'relief': '11',
    'approval': '12',
    'realization': '13',
    'surprise': '14',
    'curiosity': '15',
    'confusion': '16',
    'fear': '17',
    'nervousness': '18',
    'remorse': '19',
    'embarrassment': '20',
    'disappointment': '21',
    'sadness': '22',
    'grief': '23',
    'disgust': '24',
    'anger': '25',
    'annoyance': '26',
    'disapproval': '27'
}

reverse_emotion_mapping = {v: k for k, v in emotion_shorthand_mapping.items()}

# Allows user to specify which data set to isolate transition pairs
dataset = input('Which dataset to isolate transition pairs? ')

In [None]:
# Isolates transition pairs from chosen data sets by matching pairs to the Emotion-Transition Graph
training_df = pd.read_csv(f'emotion-labeled-data/{dataset}/{dataset}-training_t5.tsv', sep='\t').astype(str)
testing_df = pd.read_csv(f'emotion-labeled-data/{dataset}/{dataset}-testing_t5.tsv', sep='\t').astype(str)

# Implentation of Emotion-Transition Graph
emotion_transition = {
    'anger' : ['disgust', 'annoyance'],
    'disgust' : ['annoyance'],
    'disapproval' : ['annoyance'],
    'love': ['joy', 'excitement', 'amusement'],
    'joy': ['excitement', 'amusement'],
    'excitement': ['amusement'],
    'desire': ['caring', 'optimism'],
    'caring': ['optimism'],
    'pride': ['admiration'],
    'gratitude': ['relief'],
    'approval': ['realization'],
    'surprise': ['confusion'],
    'fear': ['nervousness'],
    'remorse': ['embarrassment'],
    'grief': ['sadness', 'disappointment'],
    'sadness': ['disappointment']
}


# Processing Training Set (Larger Set)
input_emotions = []
target_emotions = []

# Translating the numbers into emotions
for index, row in training_df.iterrows():
    prefix_split = row.prefix.split()
    input_emo = reverse_emotion_mapping[prefix_split[0]]
    input_emotions.append(input_emo)
    target_emo = reverse_emotion_mapping[prefix_split[2]]
    target_emotions.append(target_emo)
    
training_df['input_emo'] = input_emotions
training_df['target_emo'] = target_emotions

# Drops all paraphrasing pairs not along emotion gradients in Emotion-Transition Graph
to_drop = []
for index, row in training_df.iterrows():
    if row.input_emo in emotion_transition.keys() and row.target_emo in emotion_transition[row.input_emo]:
        continue
    else:
        to_drop.append(index) 

training_df = training_df.drop(to_drop) 


# Repeating Process with Testing Set (Smaller Set)
input_emotions = []
target_emotions = []

for index, row in testing_df.iterrows():
    prefix_split = row.prefix.split()
    input_emo = reverse_emotion_mapping[prefix_split[0]]
    input_emotions.append(input_emo)
    target_emo = reverse_emotion_mapping[prefix_split[2]]
    target_emotions.append(target_emo)
    
testing_df['input_emo'] = input_emotions
testing_df['target_emo'] = target_emotions

to_drop = []

for index, row in testing_df.iterrows():
    if row.input_emo in emotion_transition.keys() and row.target_emo in emotion_transition[row.input_emo]:
        continue
    else:
        to_drop.append(index)   

testing_df = testing_df.drop(to_drop) 

training_df = training_df.drop(['Unnamed: 0'], axis=1)
testing_df = testing_df.drop(['Unnamed: 0'], axis=1)


# Ensures directory exists for save location
path = f'emotion-labeled-data/{dataset}-transition'
if not os.path.exists(path):
    os.mkdir(path)
    

# Saves transition-only versions of the mix, twitter, and combined data sets
training_df.to_csv(f'emotion-labeled-data/{dataset}-transition/{dataset}-transition-training_t5.tsv', sep='\t')
testing_df.to_csv(f'emotion-labeled-data/{dataset}-transition/{dataset}-transition-testing_t5.tsv', sep='\t')