# 3. Data Preprocessing

In [1]:
import pandas as pd
import random

In [2]:
# read the csv file we created with the gpt-4 generated sentences 
df = pd.read_csv("gpt4_sentence_generation_1Α.csv", names = ['Idiom', 'Sentences'],encoding='latin-1')
df

Unnamed: 0,Idiom,Sentences
0,American Dream,"1. Sally finally achieved the American Dream, ..."
1,Catch-22,1. Stuck in this bureaucratic mess feels like ...
2,Christmas present,"1. Every year, my aunt surprises us with unexp..."
3,Downing Street,1. The new employee quickly found his way to D...
4,Dutch courage,"1. After a few shots of Dutch courage, he fina..."
...,...,...
2317,latch onto,1. She quickly latched onto the idea of a week...
2318,late bloomer,"1. Despite being a late bloomer, she eventuall..."
2319,late model,"1. I just bought a late model car, and I can't..."
2320,laugh a minute,1. Watching the stand-up comedian was like a l...


In [3]:
# split the sentences into 5 different columns
df[['Sentence_1', 'Sentence_2', 'Sentence_3', 'Sentence_4', 'Sentence_5']] = pd.DataFrame([[None]*5]*len(df))

for j in range(len(df)):
    for i in range(5):
        df['Sentence_{}'.format(i+1)][j] = df['Sentences'][j].split('$')[i]

In [4]:
df = df.drop('Sentences', axis = 1)
df

Unnamed: 0,Idiom,Sentence_1,Sentence_2,Sentence_3,Sentence_4,Sentence_5
0,American Dream,"1. Sally finally achieved the American Dream, ...",2. He felt that the American Dream was slippin...,3. She couldn't help but feel a sense of pride...,4. They were envious of their neighbor's seemi...,5. The immigrant family tirelessly pursued the...
1,Catch-22,1. Stuck in this bureaucratic mess feels like ...,2. I can't believe I'm in this Catch-22 situat...,3. Having to choose between two equally undesi...,4. Firing someone who is underperforming at th...,5. Not having a great credit score to rent an ...
2,Christmas present,"1. Every year, my aunt surprises us with unexp...",2. The promotion at work felt like a Christmas...,3. The way she helped me through my darkest ti...,4. His love and support during my struggles we...,5. Catching a breathtaking view of the sunset ...
3,Downing Street,1. The new employee quickly found his way to D...,"2. When I landed my dream job, I couldn't beli...",3. She hesitated before entering the company's...,"4. To others, he appeared to be on Downing Str...","5. They worked so hard, hoping that one day th..."
4,Dutch courage,"1. After a few shots of Dutch courage, he fina...",2. Relying on Dutch courage to confront her fe...,3. He couldn't face his boss without a bit of ...,4. She sought Dutch courage before making the ...,5. Turning to Dutch courage before every diffi...
...,...,...,...,...,...,...
2317,latch onto,1. She quickly latched onto the idea of a week...,2. Peter couldn't shake the feeling of anxiety...,"3. Despite her skepticism, Sarah latched onto ...",4. The young boy latched onto his mother's arm...,5. He latched onto every compliment he receive...
2318,late bloomer,"1. Despite being a late bloomer, she eventuall...","2. He felt slightly anxious, realizing that he...",3. She couldn't help but feel envious when she...,"4. As a late bloomer, he worked hard in his de...",5. The excitement of finally coming into her o...
2319,late model,"1. I just bought a late model car, and I can't...",2. Seeing my neighbor with his new late model ...,"3. Despite having a late model computer, I'm f...",4. After spending a lot of money on my late mo...,5. The envy I feel when I see other people wit...
2320,laugh a minute,1. Watching the stand-up comedian was like a l...,"2. He tried to maintain a straight face, but h...","3. After a tough day at work, John needed a la...",4. The elderly couple enjoyed a laugh a minute...,5. The surprise party was filled with laugh-a-...


In [5]:
# remove the numbers from the start of the sentences
df['Sentence_1'] = df['Sentence_1'].str.replace('1.', '')
df['Sentence_2'] = df['Sentence_2'].str.replace('2.', '')
df['Sentence_3'] = df['Sentence_3'].str.replace('3.', '')
df['Sentence_4'] = df['Sentence_4'].str.replace('4.', '')
df['Sentence_5'] = df['Sentence_5'].str.replace('5.', '')

In [None]:
# some sentences do not contain the emotion label in a parenthesis in the end, but they have the word included in the sentence,
# which makes for a bad sentence so we decided to exclude those idioms from our corpus

# regex pattern that captures the parenthesis with a word inside it
pattern = r'^(.*?)(\s+\(.*\))$'

# boolean mask of strings that do not match the pattern
mask = ~df['Sentence_1'].str.contains(pattern)


In [7]:
df.loc[mask]

Unnamed: 0,Idiom,Sentence_1,Sentence_2,Sentence_3,Sentence_4,Sentence_5
5,Dutch oven,"After a long day at work, Sarah loved coming ...",When James brought his Dutch oven to the potl...,The smell of burnt food came from the Dutch o...,Eloise couldn't help but feel a twinge of env...,"""Why are they always using that Dutch oven as..."
7,Evel Knievel,Idiom Bite the bullet,"1. Despite knowing the consequences, he decide...","2. I hated the idea of facing my fear, but it ...",3. She had to bite the bullet and work overtim...,"4. He wasn't thrilled about the situation, but..."
17,I would,Idiom A piece of cake,,"1. Finishing that puzzle was a piece of cake, ...","2. I anticipated more difficulty, but this tas...",3. She thought the test was a piece of cake an...
22,Jane Doe,"Idiom: ""Bite the bullet""",,1. I knew it was going to be a tough conversat...,2. He bit the bullet and completed the maratho...,"3. I didn't know if I could do it, but I just ..."
25,John Hancock,"""His John Hancock on that document left him w...","""Being asked for her John Hancock at the bott...","""Leaving his John Hancock on that letter made...","""Seeing the famous John Hancock on the memora...","""Requesting her John Hancock on the divorce p..."
...,...,...,...,...,...,...
2281,knife-edge,Walking on a knife-edge between his two feudi...,Balancing life and career on a knife-edge bri...,Seeing her talent for walking on the metaphor...,"After hours of hiking on a knife-edge trail, ...",The startup's future hung on the knife-edge o...
2291,know no bounds,Her creativity and imagination know no bounds...,His kindness and generosity towards others kn...,"After receiving her promotion, her sense of P...",The resentment and bitterness he held inside ...,His capacity for charity and forgiveness know...
2304,land on one's feet,"Despite facing numerous hurdles, Jane always ...","After losing his job, Tom was afraid he would...","When Mary dropped her ice cream cone, she sur...",He had a streak of good fortune and always se...,"Starting over in a new city wasn't easy, but ..."
2312,last thing,When Tom learned he didn't have to complete t...,The last thing I expected was a surprise birt...,I was flooded with pleasure when our boss pra...,"As she watched the storm approach, she couldn...","After a long day, forgetting the dinner in th..."


In [8]:
df = df.drop(df.loc[mask].index)
df

Unnamed: 0,Idiom,Sentence_1,Sentence_2,Sentence_3,Sentence_4,Sentence_5
0,American Dream,"Sally finally achieved the American Dream, li...",He felt that the American Dream was slipping ...,She couldn't help but feel a sense of pride w...,They were envious of their neighbor's seeming...,The immigrant family tirelessly pursued the A...
1,Catch-22,Stuck in this bureaucratic mess feels like a ...,I can't believe I'm in this Catch- situation ...,Having to choose between two equally undesira...,Firing someone who is underperforming at thei...,Not having a great credit score to rent an ap...
2,Christmas present,"Every year, my aunt surprises us with unexpec...",The promotion at work felt like a Christmas p...,The way she helped me through my darkest time...,His love and support during my struggles were...,Catching a breathtaking view of the sunset wa...
3,Downing Street,The new employee quickly found his way to Dow...,"When I landed my dream job, I couldn't believ...",She hesitated before entering the company's D...,"To others, he appeared to be on Downing Stree...","They worked so hard, hoping that one day they..."
4,Dutch courage,"After a few shots of Dutch courage, he finall...",Relying on Dutch courage to confront her fear...,He couldn't face his boss without a bit of Du...,She sought Dutch courage before making the sp...,Turning to Dutch courage before every difficu...
...,...,...,...,...,...,...
2316,last-ditch,In a last-ditch effort to save their marriage...,The team's last-ditch attempt to score a goal...,Her last-ditch effort to ask for a raise befo...,The politician made a last-ditch effort to ra...,They tried a last-ditch recipe to salvage the...
2317,latch onto,She quickly latched onto the idea of a weeken...,Peter couldn't shake the feeling of anxiety a...,"Despite her skepticism, Sarah latched onto th...",The young boy latched onto his mother's arm d...,"He latched onto every compliment he received,..."
2318,late bloomer,"Despite being a late bloomer, she eventually ...","He felt slightly anxious, realizing that he w...",She couldn't help but feel envious when she r...,"As a late bloomer, he worked hard in his dete...",The excitement of finally coming into her own...
2319,late model,"I just bought a late model car, and I can't h...",Seeing my neighbor with his new late model sp...,"Despite having a late model computer, I'm fee...",After spending a lot of money on my late mode...,The envy I feel when I see other people with ...


In [9]:
df = df.reset_index(drop = True)
df

Unnamed: 0,Idiom,Sentence_1,Sentence_2,Sentence_3,Sentence_4,Sentence_5
0,American Dream,"Sally finally achieved the American Dream, li...",He felt that the American Dream was slipping ...,She couldn't help but feel a sense of pride w...,They were envious of their neighbor's seeming...,The immigrant family tirelessly pursued the A...
1,Catch-22,Stuck in this bureaucratic mess feels like a ...,I can't believe I'm in this Catch- situation ...,Having to choose between two equally undesira...,Firing someone who is underperforming at thei...,Not having a great credit score to rent an ap...
2,Christmas present,"Every year, my aunt surprises us with unexpec...",The promotion at work felt like a Christmas p...,The way she helped me through my darkest time...,His love and support during my struggles were...,Catching a breathtaking view of the sunset wa...
3,Downing Street,The new employee quickly found his way to Dow...,"When I landed my dream job, I couldn't believ...",She hesitated before entering the company's D...,"To others, he appeared to be on Downing Stree...","They worked so hard, hoping that one day they..."
4,Dutch courage,"After a few shots of Dutch courage, he finall...",Relying on Dutch courage to confront her fear...,He couldn't face his boss without a bit of Du...,She sought Dutch courage before making the sp...,Turning to Dutch courage before every difficu...
...,...,...,...,...,...,...
2025,last-ditch,In a last-ditch effort to save their marriage...,The team's last-ditch attempt to score a goal...,Her last-ditch effort to ask for a raise befo...,The politician made a last-ditch effort to ra...,They tried a last-ditch recipe to salvage the...
2026,latch onto,She quickly latched onto the idea of a weeken...,Peter couldn't shake the feeling of anxiety a...,"Despite her skepticism, Sarah latched onto th...",The young boy latched onto his mother's arm d...,"He latched onto every compliment he received,..."
2027,late bloomer,"Despite being a late bloomer, she eventually ...","He felt slightly anxious, realizing that he w...",She couldn't help but feel envious when she r...,"As a late bloomer, he worked hard in his dete...",The excitement of finally coming into her own...
2028,late model,"I just bought a late model car, and I can't h...",Seeing my neighbor with his new late model sp...,"Despite having a late model computer, I'm fee...",After spending a lot of money on my late mode...,The envy I feel when I see other people with ...


In [10]:
# define a regular expression pattern to extract the two parts of the sentence
pattern = r'^(.*?)(\s+\(.*\))$'

# use str.extract() to extract the two parts into separate columns
df[['Sentence_1', 'Emotion_1']] = df['Sentence_1'].str.extract(pattern)
df[['Sentence_2', 'Emotion_2']] = df['Sentence_2'].str.extract(pattern)
df[['Sentence_3', 'Emotion_3']] = df['Sentence_3'].str.extract(pattern)
df[['Sentence_4', 'Emotion_4']] = df['Sentence_4'].str.extract(pattern)
df[['Sentence_5', 'Emotion_5']] = df['Sentence_5'].str.extract(pattern)

In [11]:
# Check for null values in the dataset (that would mean that there is an irregularity somewhere)
df.isnull().sum().sum()

0

In [12]:
# find the index of the null values and aleviate the problem manually
null_values = np.where(pd.isnull(df))
for i in range(0,len(null_values[0]),4):
    print(null_values[0][i])

In [13]:
# remove the parenthesis from the labels
df['Emotion_1'] = df['Emotion_1'].str.replace(r'\(|\)', '')
df['Emotion_2'] = df['Emotion_2'].str.replace(r'\(|\)', '')
df['Emotion_3'] = df['Emotion_3'].str.replace(r'\(|\)', '')
df['Emotion_4'] = df['Emotion_4'].str.replace(r'\(|\)', '')
df['Emotion_5'] = df['Emotion_5'].str.replace(r'\(|\)', '')


In [14]:
# create the column names
df = df[['Idiom','Sentence_1', 'Emotion_1', 'Sentence_2', 'Emotion_2','Sentence_3', 'Emotion_3','Sentence_4', 'Emotion_4','Sentence_5', 'Emotion_5']]

In [70]:
# random subset of the dataset for Johannes to test idiom detection
df_sample_500 = df.sample(n=100, axis = 0)
df_sample_500

Unnamed: 0,Idiom,Sentence_1,Emotion_1,Sentence_2,Emotion_2,Sentence_3,Emotion_3,Sentence_4,Emotion_4,Sentence_5,Emotion_5
1223,fool with,He fooled with the wrong person and ended up ...,Anger,Playing the piano is not something you can ju...,Frustration,"I warned her not to fool with that guy, but s...",Regret,She spends her free time fooling with puzzles...,Determination,Fooling with new recipes always brings a sens...,Excitement
917,do one,After another failed attempt to fix the compu...,Frustration,"When she discovered her secret, she couldn't ...",Envy,"With a combination of fear and anxiety, he as...",Fear,"When the chef presented his masterpiece, the ...",Admiration,"Despite feeling unsure, she asked her friend ...",Hope
108,all that jazz,"When Ella performs, she brings energy, pizazz...",Excitement,I'm tired of hearing about their luxurious va...,Envy,"His speech went on and on, covering history, ...",Boredom,"Marissa excels in all her hobbies, including ...",Admiration,"She dismissed our concerns about her new job,...",Reluctance
1253,forked tongue,"He has a forked tongue, so you never really k...",Doubt,When she discovered her friend had a forked t...,Sadness,Every time he speaks with that forked tongue ...,Anxiety,Her forked tongue made it difficult for peopl...,Loneliness,His forked tongue only added to the chaos and...,Confusion
451,brain surgeon,Becoming a brain surgeon isn't rocket science...,Determination,When he realized his daughter grew up to be a...,Pride,Hearing about the incompetent brain surgeon f...,Disgust,"She didn't mean to brag, but casually mention...",Admiration,Learning about the life-saving procedure perf...,Fascination
...,...,...,...,...,...,...,...,...,...,...,...
74,against all odds,"Despite the numerous challenges, she built he...",Determination,"After escaping the stormy seas, our family fi...",Relief,"No one thought he'd recover, but he emerged f...",Surprise,"Ever since she conquered her fears, she began...",Hope,"When she won the race, defeating the seasoned...",Pride
51,about to,She is about to bite my head off when I bring...,Anger,"After years of hard work, he's about to throw...",Frustration,I can't believe she's about to tie the knot; ...,Surprise,They were about to paint the town red when th...,Excitement,I'm about to pull my hair out dealing with th...,Anxiety
530,bundle of joy,I can't wait to hold my cousin's new little b...,Happiness,Seeing their bundle of joy brings back memori...,Nostalgia,That family's adorable bundle of joy is const...,Admiration,I am so thrilled to hear that they're expecti...,Excitement,It seems like just yesterday when we were pre...,Serenity
347,big talk,"He was all big talk at the party, boasting ab...",Pride,Her big talk annoyed me as it was clear she h...,Frustration,"After all his big talk, it was a pleasant sur...",Surprise,I'm tired of listening to her big talk; it al...,Boredom,His big talk was nothing but a desperate atte...,Desperation


In [288]:
df_sample_500.to_csv('sentence_sample_500.csv')

In [71]:
# Random subset of the dataset for Fivos and Carly to conduct the evaluation on
df_eval_100 = df.sample(n=100, axis = 0)
df_eval_100

Unnamed: 0,Idiom,Sentence_1,Emotion_1,Sentence_2,Emotion_2,Sentence_3,Emotion_3,Sentence_4,Emotion_4,Sentence_5,Emotion_5
1123,far cry,This new apartment is a far cry from his old ...,Pleasure,The outcome of the project was a far cry from...,Frustration,His singing skills are a far cry from what th...,Admiration,The service at this restaurant is a far cry f...,Disgust,The weather on our vacation turned out to be ...,Disappointment
297,behind the eight-ball,Feeling trapped behind the eight-ball all wee...,Desperation,Finding himself behind the eight-ball in his ...,Doubt,Being constantly behind the eight-ball seems ...,Frustration,My manager left me behind the eight-ball with...,Anxiety,When she told a joke about being behind the e...,Happiness
641,change one's mind,"After listening to the impressive speech, he ...",Admiration,"Despite the tormenting thoughts, she attempte...",Anxiety,"When he saw the vast beauty of the place, he ...",Fascination,She suddenly changed her mind after learning ...,Fear,He later changed his mind after realizing he ...,Guilt
702,close ranks,"When the team faced external criticism, they ...",Determination,The family closed ranks to protect their secr...,Anxiety,The group of friends closed ranks when one me...,Admiration,She was surprised when her coworkers closed r...,Gratitude,"Despite their arguments, the siblings closed ...",Relief
620,catch a cold,I caught a cold after drenched in the heavy r...,Frustration,My sister always catches a cold during the wi...,Pity,"Every time I prepare for an important event, ...",Anxiety,"If I don't wear warmer clothes, I might catch...",Fear,"She caught a cold before her vacation, but ma...",Relief
...,...,...,...,...,...,...,...,...,...,...,...
998,drop anchor,We decided to drop anchor at this peaceful co...,Serenity,I can't believe they're dropping anchor right...,Frustration,"No matter where we travel, it always feels go...",Happiness,Their constant chatter made me wish I could j...,Longing,Dropping anchor in the uncharted waters was b...,Excitement
595,can't seem,"She can't seem to let go of the past, making ...",Regret,"I can't seem to nail this presentation, and i...",Anxiety,"He can't seem to escape his bad luck, leaving...",Desperation,"They can't seem to stay away from each other,...",Lust,"No matter how hard I try, I can't seem to bre...",Frustration
381,blank slate,Starting over with a blank slate brought her ...,Relief,He felt an unexpected joy as he began his new...,Happiness,The idea of having a blank slate in their rel...,Hope,Being given a blank slate at work after all t...,Shock,The endless possibilities of having a blank s...,Excitement
885,deliver the goods,"When she finally delivered the goods, everyon...",Relief,"I must say, your cooking has truly delivered ...",Admiration,"After months of anticipation, the band's new ...",Excitement,I've been working hard to prove myself to my ...,Pride,The look on her face when she delivered the g...,Happiness


In [85]:
df_eval_100 = df_eval_100.reset_index()
df_eval_100

Unnamed: 0,index,Idiom,Sentence_1,Emotion_1,Sentence_2,Emotion_2,Sentence_3,Emotion_3,Sentence_4,Emotion_4,Sentence_5,Emotion_5
0,1123,far cry,This new apartment is a far cry from his old ...,Pleasure,The outcome of the project was a far cry from...,Frustration,His singing skills are a far cry from what th...,Admiration,The service at this restaurant is a far cry f...,Disgust,The weather on our vacation turned out to be ...,Disappointment
1,297,behind the eight-ball,Feeling trapped behind the eight-ball all wee...,Desperation,Finding himself behind the eight-ball in his ...,Doubt,Being constantly behind the eight-ball seems ...,Frustration,My manager left me behind the eight-ball with...,Anxiety,When she told a joke about being behind the e...,Happiness
2,641,change one's mind,"After listening to the impressive speech, he ...",Admiration,"Despite the tormenting thoughts, she attempte...",Anxiety,"When he saw the vast beauty of the place, he ...",Fascination,She suddenly changed her mind after learning ...,Fear,He later changed his mind after realizing he ...,Guilt
3,702,close ranks,"When the team faced external criticism, they ...",Determination,The family closed ranks to protect their secr...,Anxiety,The group of friends closed ranks when one me...,Admiration,She was surprised when her coworkers closed r...,Gratitude,"Despite their arguments, the siblings closed ...",Relief
4,620,catch a cold,I caught a cold after drenched in the heavy r...,Frustration,My sister always catches a cold during the wi...,Pity,"Every time I prepare for an important event, ...",Anxiety,"If I don't wear warmer clothes, I might catch...",Fear,"She caught a cold before her vacation, but ma...",Relief
...,...,...,...,...,...,...,...,...,...,...,...,...
95,998,drop anchor,We decided to drop anchor at this peaceful co...,Serenity,I can't believe they're dropping anchor right...,Frustration,"No matter where we travel, it always feels go...",Happiness,Their constant chatter made me wish I could j...,Longing,Dropping anchor in the uncharted waters was b...,Excitement
96,595,can't seem,"She can't seem to let go of the past, making ...",Regret,"I can't seem to nail this presentation, and i...",Anxiety,"He can't seem to escape his bad luck, leaving...",Desperation,"They can't seem to stay away from each other,...",Lust,"No matter how hard I try, I can't seem to bre...",Frustration
97,381,blank slate,Starting over with a blank slate brought her ...,Relief,He felt an unexpected joy as he began his new...,Happiness,The idea of having a blank slate in their rel...,Hope,Being given a blank slate at work after all t...,Shock,The endless possibilities of having a blank s...,Excitement
98,885,deliver the goods,"When she finally delivered the goods, everyon...",Relief,"I must say, your cooking has truly delivered ...",Admiration,"After months of anticipation, the band's new ...",Excitement,I've been working hard to prove myself to my ...,Pride,The look on her face when she delivered the g...,Happiness


In [106]:
# Pick out one out of the five sentences for each idiom at random
sent_emot_list = []
for i in range(len(df_eval_100)): 
    
    random_sent_index = random.randint(1, 5)
    
    random_sent_label = f"Sentence_{random_sent_index}"
    emotion_label = f"Emotion_{random_sent_index}"
    
    random_sent_value = df_eval_100.loc[i, random_sent_label]
    emotion_value = df_eval_100.loc[i, emotion_label]
    
    sent_emot_list.append([random_sent_value, emotion_value])

sent_emot_list

[[' The service at this restaurant is a far cry from that of the one we visited last month.',
  ' Disgust'],
 [" Feeling trapped behind the eight-ball all week, she couldn't help but experience a heavy sense of desperation.",
  ' Desperation'],
 [' He later changed his mind after realizing he had been too harsh on his friend.',
  ' Guilt'],
 [' When the team faced external criticism, they decided to close ranks and support each other.',
  ' Determination'],
 [' I caught a cold after drenched in the heavy rain last week.',
  ' Frustration'],
 [' By the time he finally admitted his mistake, it was too little too late.',
  ' Regret'],
 [' She farmed out the childcare to a nanny and felt a sense of serenity, knowing her child was in good hands.',
  ' Serenity'],
 [' Her heart raced with anticipation as she stepped in the elevator, ready to make the pitch of her dreams.',
  ' Excitement'],
 [' She was already feeling homesick, and the weather turning bad during her trip seemed to add insult

In [107]:
df_eval_random = pd.DataFrame(data = sent_emot_list,columns = ['Sentence', 'Emotion'])
df_eval_random

Unnamed: 0,Sentence,Emotion
0,The service at this restaurant is a far cry f...,Disgust
1,Feeling trapped behind the eight-ball all wee...,Desperation
2,He later changed his mind after realizing he ...,Guilt
3,"When the team faced external criticism, they ...",Determination
4,I caught a cold after drenched in the heavy r...,Frustration
...,...,...
95,Dropping anchor in the uncharted waters was b...,Excitement
96,"No matter how hard I try, I can't seem to bre...",Frustration
97,The idea of having a blank slate in their rel...,Hope
98,"When she finally delivered the goods, everyon...",Relief


In [114]:
# also keep track of the index in the original dataset 
df_eval_random['index'] = df_eval_100['index']
df_eval_random['Idiom'] = df_eval_100['Idiom']

df_eval_random = df_eval_random[['index','Idiom','Sentence','Emotion']] 

df_eval_random

Unnamed: 0,index,Idiom,Sentence,Emotion
0,1123,far cry,The service at this restaurant is a far cry f...,Disgust
1,297,behind the eight-ball,Feeling trapped behind the eight-ball all wee...,Desperation
2,641,change one's mind,He later changed his mind after realizing he ...,Guilt
3,702,close ranks,"When the team faced external criticism, they ...",Determination
4,620,catch a cold,I caught a cold after drenched in the heavy r...,Frustration
...,...,...,...,...
95,998,drop anchor,Dropping anchor in the uncharted waters was b...,Excitement
96,595,can't seem,"No matter how hard I try, I can't seem to bre...",Frustration
97,381,blank slate,The idea of having a blank slate in their rel...,Hope
98,885,deliver the goods,"When she finally delivered the goods, everyon...",Relief


In [115]:
df_eval_random.to_csv('sentence_eval_100.csv')

In [83]:
df.to_csv('clean_generated_sentences.csv')

In [15]:
# Random subset of 1000 entries from the dataset to conduct the evaluation on
df_eval_1000 = df.sample(n=1000, axis = 0)
df_eval_1000

Unnamed: 0,Idiom,Sentence_1,Emotion_1,Sentence_2,Emotion_2,Sentence_3,Emotion_3,Sentence_4,Emotion_4,Sentence_5,Emotion_5
1954,jot down,"He quickly jotted down her number, feeling a ...",Excitement,"Unable to sleep, she jotted down her thoughts...","Anxiety, Relief","As the professor spoke, he jotted down notes,...",Determination,She hesitated and then jotted down her name o...,Pride,The detective jotted down clues with fascinat...,Fascination
1165,find it in one's heart,She couldn't find it in her heart to forgive ...,Resentment,Can you find it in your heart to give this po...,Pity,He found it in his heart to set aside his dif...,Gratitude,Julie struggled to find it in her heart to tr...,Doubt,"Despite the pain, she found it in her heart t...",Hope
540,business as usual,"Despite the storm outside, the employees cont...",Determination,"The CEO resigned, but the company kept runnin...",Relief,"Even after winning the lottery, Jane returned...",Anxiety,James thought his promotion would change thin...,Disappointment,"After months of hard work, the team was thril...",Pride
703,close to home,Her comment about my weight cut close to home...,Humiliation,When the speaker discussed the struggles of s...,Gratitude,His story about overcoming addiction hit clos...,Determination,The movie scene about long-distance relations...,Loneliness,The news about the factory closure was close ...,Anxiety
1256,fourth wall,"When she suddenly acknowledged my presence, i...",Surprise,As he shattered the fourth wall in his speech...,Admiration,The moment the actor crashed through the four...,Fascination,Whenever the main character in the novel brok...,Excitement,The way she constantly breaks the fourth wall...,Hope
...,...,...,...,...,...,...,...,...,...,...,...
1083,fact is,"Facing the music, she admitted to her mistake...",Relief,John faces the music every time he's late for...,Humiliation,She had to face the music after her team lost...,Envy,The politician had to face the music when his...,Disgust,He faced the music after his friends discover...,Regret
963,down the toilet,My hard work on the project went down the toi...,Frustration,Everything he's done for her always seems to ...,Pity,I can't believe our vacation went down the to...,Disappointment,"When they made that terrible decision, their ...",Regret,She watched her dreams of becoming a professi...,Sadness
718,come again,"I didn't quite understand your explanation, s...",Confusion,Your repeated success never ceases to amaze m...,Admiration,"Every time I think I've heard it all, you man...",Excitement,"Since your last visit brought us so much joy,...",Affection,I must have misheard your unbelievable statem...,Doubt
1419,go all out,They decided to go all out for their annivers...,Excitement,"He went all out in studying, determined not t...",Determination,I can't believe you went all out just to make...,Gratitude,The team went all out and still ended up losi...,Frustration,"Sally went all out on her presentation, but t...",Disappointment


In [16]:
df_eval_1000 = df_eval_1000.reset_index()
df_eval_1000

Unnamed: 0,index,Idiom,Sentence_1,Emotion_1,Sentence_2,Emotion_2,Sentence_3,Emotion_3,Sentence_4,Emotion_4,Sentence_5,Emotion_5
0,1954,jot down,"He quickly jotted down her number, feeling a ...",Excitement,"Unable to sleep, she jotted down her thoughts...","Anxiety, Relief","As the professor spoke, he jotted down notes,...",Determination,She hesitated and then jotted down her name o...,Pride,The detective jotted down clues with fascinat...,Fascination
1,1165,find it in one's heart,She couldn't find it in her heart to forgive ...,Resentment,Can you find it in your heart to give this po...,Pity,He found it in his heart to set aside his dif...,Gratitude,Julie struggled to find it in her heart to tr...,Doubt,"Despite the pain, she found it in her heart t...",Hope
2,540,business as usual,"Despite the storm outside, the employees cont...",Determination,"The CEO resigned, but the company kept runnin...",Relief,"Even after winning the lottery, Jane returned...",Anxiety,James thought his promotion would change thin...,Disappointment,"After months of hard work, the team was thril...",Pride
3,703,close to home,Her comment about my weight cut close to home...,Humiliation,When the speaker discussed the struggles of s...,Gratitude,His story about overcoming addiction hit clos...,Determination,The movie scene about long-distance relations...,Loneliness,The news about the factory closure was close ...,Anxiety
4,1256,fourth wall,"When she suddenly acknowledged my presence, i...",Surprise,As he shattered the fourth wall in his speech...,Admiration,The moment the actor crashed through the four...,Fascination,Whenever the main character in the novel brok...,Excitement,The way she constantly breaks the fourth wall...,Hope
...,...,...,...,...,...,...,...,...,...,...,...,...
995,1083,fact is,"Facing the music, she admitted to her mistake...",Relief,John faces the music every time he's late for...,Humiliation,She had to face the music after her team lost...,Envy,The politician had to face the music when his...,Disgust,He faced the music after his friends discover...,Regret
996,963,down the toilet,My hard work on the project went down the toi...,Frustration,Everything he's done for her always seems to ...,Pity,I can't believe our vacation went down the to...,Disappointment,"When they made that terrible decision, their ...",Regret,She watched her dreams of becoming a professi...,Sadness
997,718,come again,"I didn't quite understand your explanation, s...",Confusion,Your repeated success never ceases to amaze m...,Admiration,"Every time I think I've heard it all, you man...",Excitement,"Since your last visit brought us so much joy,...",Affection,I must have misheard your unbelievable statem...,Doubt
998,1419,go all out,They decided to go all out for their annivers...,Excitement,"He went all out in studying, determined not t...",Determination,I can't believe you went all out just to make...,Gratitude,The team went all out and still ended up losi...,Frustration,"Sally went all out on her presentation, but t...",Disappointment


In [17]:
# Pick out one out of the five sentences for each idiom at random
sent_emot_list = []
for i in range(len(df_eval_1000)): 
    
    random_sent_index = random.randint(1, 5)
    
    random_sent_label = f"Sentence_{random_sent_index}"
    emotion_label = f"Emotion_{random_sent_index}"
    
    random_sent_value = df_eval_1000.loc[i, random_sent_label]
    emotion_value = df_eval_1000.loc[i, emotion_label]
    
    sent_emot_list.append([random_sent_value, emotion_value])

sent_emot_list

[[' The detective jotted down clues with fascination, getting closer to solving the mystery.',
  ' Fascination'],
 [' Can you find it in your heart to give this poor, helpless animal a home?',
  ' Pity'],
 [' Even after winning the lottery, Jane returned to her job, keeping business as usual to avoid attention.',
  ' Anxiety'],
 [' When the speaker discussed the struggles of single parents, it struck close to home, filling me with gratitude for my own upbringing.',
  ' Gratitude'],
 [" Whenever the main character in the novel broke the fourth wall, I couldn't help but feel a mix of excitement and anticipation for what would come next.",
  ' Excitement'],
 [' When he finished his incredible performance, I was at a loss for words.',
  ' Admiration'],
 [" Can you listen to me, for goodness' sake, and stop interrupting?",
  ' Anger'],
 [" They arrived at the conclusion that he wasn't the most suitable candidate.",
  ' Disappointment'],
 [' Losing all the marbles in that game filled her hea

In [18]:
df_eval_random_1000 = pd.DataFrame(data = sent_emot_list,columns = ['Sentence', 'Emotion'])
df_eval_random_1000

Unnamed: 0,Sentence,Emotion
0,The detective jotted down clues with fascinat...,Fascination
1,Can you find it in your heart to give this po...,Pity
2,"Even after winning the lottery, Jane returned...",Anxiety
3,When the speaker discussed the struggles of s...,Gratitude
4,Whenever the main character in the novel brok...,Excitement
...,...,...
995,She had to face the music after her team lost...,Envy
996,My hard work on the project went down the toi...,Frustration
997,"Every time I think I've heard it all, you man...",Excitement
998,"Sally went all out on her presentation, but t...",Disappointment


In [21]:
# also keep track of the index in the original dataset 
df_eval_random_1000['index'] = df_eval_1000['index']
df_eval_random_1000['Idiom'] = df_eval_1000['Idiom']

df_eval_random_1000 = df_eval_random_1000[['index','Idiom','Sentence','Emotion']] 

df_eval_random_1000

Unnamed: 0,index,Idiom,Sentence,Emotion
0,1954,jot down,The detective jotted down clues with fascinat...,Fascination
1,1165,find it in one's heart,Can you find it in your heart to give this po...,Pity
2,540,business as usual,"Even after winning the lottery, Jane returned...",Anxiety
3,703,close to home,When the speaker discussed the struggles of s...,Gratitude
4,1256,fourth wall,Whenever the main character in the novel brok...,Excitement
...,...,...,...,...
995,1083,fact is,She had to face the music after her team lost...,Envy
996,963,down the toilet,My hard work on the project went down the toi...,Frustration
997,718,come again,"Every time I think I've heard it all, you man...",Excitement
998,1419,go all out,"Sally went all out on her presentation, but t...",Disappointment


In [22]:
df_eval_random_1000.to_csv('sentence_eval_1000.csv')

Following the results of the evaluation, which can be witnessed in the notebook 'Evaluation', we further preprocess the dataset.

In [56]:
df_clean = pd.read_csv('clean_generated_sentences.csv',index_col = 0,encoding='latin-1')
df_clean

Unnamed: 0,Idiom,Sentence_1,Emotion_1,Sentence_2,Emotion_2,Sentence_3,Emotion_3,Sentence_4,Emotion_4,Sentence_5,Emotion_5
0,American Dream,"Sally finally achieved the American Dream, li...",Happiness,He felt that the American Dream was slipping ...,Frustration,She couldn't help but feel a sense of pride w...,Pride,They were envious of their neighbor's seeming...,Envy,The immigrant family tirelessly pursued the A...,Hope
1,Catch-22,Stuck in this bureaucratic mess feels like a ...,Anxiety,I can't believe I'm in this Catch- situation ...,Frustration,Having to choose between two equally undesira...,Desperation,Firing someone who is underperforming at thei...,Confusion,Not having a great credit score to rent an ap...,Anger
2,Christmas present,"Every year, my aunt surprises us with unexpec...",Surprise,The promotion at work felt like a Christmas p...,Happiness,The way she helped me through my darkest time...,Gratitude,His love and support during my struggles were...,Affection,Catching a breathtaking view of the sunset wa...,Pleasure
3,Downing Street,The new employee quickly found his way to Dow...,Admiration,"When I landed my dream job, I couldn't believ...",Excitement,She hesitated before entering the company's D...,Anxiety,"To others, he appeared to be on Downing Stree...",Regret,"They worked so hard, hoping that one day they...",Determination
4,Dutch courage,"After a few shots of Dutch courage, he finall...",Excitement,Relying on Dutch courage to confront her fear...,Shame,He couldn't face his boss without a bit of Du...,Anxiety,She sought Dutch courage before making the sp...,Relief,Turning to Dutch courage before every difficu...,Desperation
...,...,...,...,...,...,...,...,...,...,...,...
2025,last-ditch,In a last-ditch effort to save their marriage...,Hope,The team's last-ditch attempt to score a goal...,Frustration,Her last-ditch effort to ask for a raise befo...,Relief,The politician made a last-ditch effort to ra...,Determination,They tried a last-ditch recipe to salvage the...,Desperation
2026,latch onto,She quickly latched onto the idea of a weeken...,Excitement,Peter couldn't shake the feeling of anxiety a...,Anxiety,"Despite her skepticism, Sarah latched onto th...",Hope,The young boy latched onto his mother's arm d...,Fear,"He latched onto every compliment he received,...",Pride
2027,late bloomer,"Despite being a late bloomer, she eventually ...",Pride,"He felt slightly anxious, realizing that he w...",Anxiety,She couldn't help but feel envious when she r...,Envy,"As a late bloomer, he worked hard in his dete...",Determination,The excitement of finally coming into her own...,Excitement
2028,late model,"I just bought a late model car, and I can't h...",Happiness,Seeing my neighbor with his new late model sp...,Longing,"Despite having a late model computer, I'm fee...",Frustration,After spending a lot of money on my late mode...,Shock,The envy I feel when I see other people with ...,Envy


In [57]:
df_1 = df_clean[['Idiom', 'Sentence_1','Emotion_1']]
df_1 = df.rename(columns={'Sentence_1': 'Sentence', 'Emotion_1': 'Emotion'}) 

df_2 = df_clean[['Idiom', 'Sentence_2','Emotion_2']]
df_2 = df.rename(columns={'Sentence_2': 'Sentence', 'Emotion_2': 'Emotion'}) 

df_3 = df_clean[['Idiom', 'Sentence_3','Emotion_3']]
df_3 = df.rename(columns={'Sentence_3': 'Sentence', 'Emotion_3': 'Emotion'}) 

df_4 = df_clean[['Idiom', 'Sentence_4','Emotion_4']]
df_4 = df.rename(columns={'Sentence_4': 'Sentence', 'Emotion_4': 'Emotion'}) 

df_5 = df_clean[['Idiom', 'Sentence_5','Emotion_5']]
df_5 = df.rename(columns={'Sentence_5': 'Sentence', 'Emotion_5': 'Emotion'}) 

In [70]:
# Reshaping the dataset from 11 columns to 3 columns
df_all = pd.concat([df_1,df_2,df_3,df_4,df_5])
df_all = df_all.drop(['Sentence_1','Emotion_1','Sentence_2','Emotion_2','Sentence_3','Emotion_3','Sentence_3','Emotion_3','Sentence_4','Emotion_4','Sentence_5','Emotion_5'],axis=1)
df_all = df_all.sort_values('Idiom')
df_all = df_all.reset_index(drop = True)
df_all

Unnamed: 0,Idiom,Sentence,Emotion
0,American Dream,"Sally finally achieved the American Dream, li...",Happiness
1,American Dream,The immigrant family tirelessly pursued the A...,Hope
2,American Dream,He felt that the American Dream was slipping ...,Frustration
3,American Dream,She couldn't help but feel a sense of pride w...,Pride
4,American Dream,They were envious of their neighbor's seeming...,Envy
...,...,...,...
10145,laughing stock,"As a politician, it's crucial to avoid becomi...",Fear
10146,laughing stock,He couldn't believe he'd become the town's la...,Sadness
10147,laughing stock,They made me the laughing stock of the whole ...,Humiliation
10148,laughing stock,The new employee's constant mistakes turned h...,Frustration


In [71]:
# the 36 defined target emotions
emotion_list = [
    'Anger', 'Resentment', 'Frustration', 'Hate', 'Disgust', 'Boredom',
    'Reluctance', 'Sadness', 'Pity', 'Loneliness', 'Humiliation', 'Longing',
    'Envy', 'Guilt', 'Regret', 'Shame', 'Fear', 'Anxiety', 'Doubt',
    'Desperation', 'Confusion', 'Shock', 'Pleasure', 'Serenity', 'Relief',
    'Happiness', 'Lust', 'Affection', 'Gratitude', 'Admiration', 'Pride',
    'Determination', 'Fascination', 'Surprise', 'Excitement', 'Hope'
]

In [72]:
df_all['Emotion'] = df_all['Emotion'].str.strip()

In [73]:
# Observe the emotions that are not included in the emotion typology list but are found in our dataset
df_all[~df_all['Emotion'].isin(emotion_list)]

Unnamed: 0,Idiom,Sentence,Emotion
66,I take it,I take it you didn't see the movie since you'...,Curiosity
68,I take it,"They didn't turn up for the event, so I take ...",Indifference
69,I take it,"Since he left without a word, I take it he di...",Disappointment
138,Monopoly money,He threw the fake bills around like they were...,Disregard
170,Russian roulette,Whenever they attempted to fix the old wiring...,Dread
...,...,...,...
10057,laced-up,When he showed up to the casual event all lac...,Amusement
10110,last thing one needs,"After a long day at work, noisy neighbors are...",Annoyance
10112,last thing one needs,Realizing you left your wallet at home after ...,Irritation
10114,last thing one needs,Hearing your favorite musician is coming to t...,Disappointment


In [74]:
# Remove said emotions
df_all = df_all[df_all['Emotion'].isin(emotion_list)]
df_all = df_all.reset_index(drop = True)
df_all

Unnamed: 0,Idiom,Sentence,Emotion
0,American Dream,"Sally finally achieved the American Dream, li...",Happiness
1,American Dream,The immigrant family tirelessly pursued the A...,Hope
2,American Dream,He felt that the American Dream was slipping ...,Frustration
3,American Dream,She couldn't help but feel a sense of pride w...,Pride
4,American Dream,They were envious of their neighbor's seeming...,Envy
...,...,...,...
9638,laughing stock,"As a politician, it's crucial to avoid becomi...",Fear
9639,laughing stock,He couldn't believe he'd become the town's la...,Sadness
9640,laughing stock,They made me the laughing stock of the whole ...,Humiliation
9641,laughing stock,The new employee's constant mistakes turned h...,Frustration


In [75]:
# Get the index for the entries that include the emotion label word to word in the sentence
index_list = []
for i in range(len(df_all)):
    for emotion in emotion_list:
        if emotion.lower() in df_all['Sentence'][i].lower():
            index_list.append(i)

In [76]:
# Remove said entries from the dataframe
df_all = df_all.drop(index_list)
df_all = df_all.reset_index(drop = True)
df_all

Unnamed: 0,Idiom,Sentence,Emotion
0,American Dream,"Sally finally achieved the American Dream, li...",Happiness
1,American Dream,The immigrant family tirelessly pursued the A...,Hope
2,American Dream,He felt that the American Dream was slipping ...,Frustration
3,American Dream,They were envious of their neighbor's seeming...,Envy
4,Catch-22,I can't believe I'm in this Catch- situation ...,Frustration
...,...,...,...
7357,laughing stock,"As a politician, it's crucial to avoid becomi...",Fear
7358,laughing stock,He couldn't believe he'd become the town's la...,Sadness
7359,laughing stock,They made me the laughing stock of the whole ...,Humiliation
7360,laughing stock,The new employee's constant mistakes turned h...,Frustration


In [77]:
df_all.to_csv('gpt4_generated_sentences_clean.csv')