# 5. Extra Sentences Generation

In [31]:
import pandas as pd
from tqdm import tqdm
import csv
import os
import openai

In [4]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [1]:
# Import the slide dataset (following the cleaning up done in the preprocessing phase)
slide = pd.read_csv('idioms.csv', index_col = 0)
slide

Unnamed: 0,Idiom
0,American Dream
1,Catch-22
2,Christmas present
3,Downing Street
4,Dutch courage
...,...
4824,your man
4825,yours truly
4826,zero in on
4827,zero-day


In [None]:
# pass the instructions prompt to gpt-4, alongside the idiom list, generate the sentences and store them in a csv file. 
# generate sentences including idioms with the use of their non-figurative meaning.

instructions = ' You are good at generating sentences containing phrases and labeling them based on the emotion that they carry. The list of emotion labels is the following: Anger, Resentment, Frustration, Hate, Disgust, Boredom, Reluctance, Sadness, Pity, Loneliness, Humiliation, Longing, Envy, Guilt, Regret, Shame, Fear, Anxiety, Doubt, Desperation, Confusion, Shock, Pleasure, Serenity, Relief, Happiness, Lust, Affection, Gratitude, Admiration, Pride, Determination, Fascination, Surprise, Excitement, Hope. Create 1 sentence per phrase on the list, but only when the phrase can have a literal sense. Label them according to the emotion but exclude the word of the label or any derivative of it from the generated sentence.'
sentence_dict = {}


with open('gpt4_sentence_generation_literal.csv', 'w+', newline='') as csvfile:
    fieldnames = ['Phrase', 'Sentence']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()    


    # iterate over slide idioms and pass prompt to gpt
    for index, row in tqdm(slide.iterrows(), total=len(slide)):
        sentences = []
        prompt = row['Idiom']
        completion = openai.ChatCompletion.create(
          model="gpt-4", # "gpt-3.5-turbo",
          messages=[
              {"role": "system", "content": instructions},
              {"role": "user", "content": prompt}
          ]
        )



        pred = completion.choices[0].message.content
        pred = pred.replace('\n', '$')
        sentence_dict[prompt] = pred # key is prompt, value is pred

        writer.writerow({'Phrase': prompt, 'Sentence': pred})



In [None]:
# pass the instructions prompt to gpt-4, alongside the idiom list, generate the sentences and store them in a csv file. 
# generate sentences containing multiple idioms per sentence.

instructions = ' You are good at generating sentences containing idioms and labeling them based on the emotion that they carry. The list of emotion labels is the following: Anger, Resentment, Frustration, Hate, Disgust, Boredom, Reluctance, Sadness, Pity, Loneliness, Humiliation, Longing, Envy, Guilt, Regret, Shame, Fear, Anxiety, Doubt, Desperation, Confusion, Shock, Pleasure, Serenity, Relief, Happiness, Lust, Affection, Gratitude, Admiration, Pride, Determination, Fascination, Surprise, Excitement, Hope. Create 1 sentence per idioms from the list, using the negation of the idiom in the sentence. Label them according to the emotion but exclude the word of the label or any derivative of it from the generated sentence.'
sentence_dict = {}


with open('gpt4_sentence_generation_negated_idioms_1.csv', 'w+', newline='') as csvfile:
    fieldnames = ['Phrase', 'Sentence']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()    


    # iterate over slide idioms and pass prompt to gpt
    for index, row in tqdm(new_slide_3.iterrows(), total=len(new_slide_3)):
        sentences = []
        prompt = row['Idiom']
        completion = openai.ChatCompletion.create(
          model="gpt-4", # "gpt-3.5-turbo",
          messages=[
              {"role": "system", "content": instructions},
              {"role": "user", "content": prompt}
          ]
        )



        pred = completion.choices[0].message.content
        pred = pred.replace('\n', '$')
        sentence_dict[prompt] = pred # key is prompt, value is pred

        writer.writerow({'Phrase': prompt, 'Sentence': pred})


What follows is the preprocessing of the generated datasets

In [73]:
df_multiple_idioms = pd.read_csv('gpt4_sentence_generation_multiple_idioms.csv' , encoding='latin-1')
df_multiple_idioms

Unnamed: 0,Idioms,Sentence,Emotions
0,"""blood,sweat and tears"", ""American Dream"", ""do...","Despite all the blood, sweat, and tears, he f...","Pride, Gratitude"
1,"""American Dream"", ""grass is greener on the oth...","She was chasing the American dream, hoping th...",Longing
2,"""American Dream"", ""house of cards"", ""wheel of ...",When they saw how their neighbor's American dr...,Anxiety
3,"""American Dream"", ""come up empty', ""once bitte...",After years of pursuing the American dream an...,Determination
4,"""American Dream"", ""slip through one's fingers""","He chased the American dream for years, but su...","Frustration,Disgust"
...,...,...,...
669,,The suspense was killing Laura as she waited ...,Anxiety
670,,The movie was so scary that it sent shivers d...,Fear
671,,Jack felt like a fish out of water at the for...,Discomfort
672,,That Johnny-come-lately thinks he's the cat'...,Envy


In [92]:
# the 36 defined target emotions
emotion_list = [
    'Anger', 'Resentment', 'Frustration', 'Hate', 'Disgust', 'Boredom',
    'Reluctance', 'Sadness', 'Pity', 'Loneliness', 'Humiliation', 'Longing',
    'Envy', 'Guilt', 'Regret', 'Shame', 'Fear', 'Anxiety', 'Doubt',
    'Desperation', 'Confusion', 'Shock', 'Pleasure', 'Serenity', 'Relief',
    'Happiness', 'Lust', 'Affection', 'Gratitude', 'Admiration', 'Pride',
    'Determination', 'Fascination', 'Surprise', 'Excitement', 'Hope'
]


In [76]:
df_multiple_idioms_dirty = df_multiple_idioms[~df_multiple_idioms['Emotions'].isin(emotion_list)]

In [59]:
df_literal['Sentence'] = df_literal['Sentence'].str.replace('Sentence:', ' ')
df_literal    

Unnamed: 0,Phrase,Sentence
0,American Dream,"Hope: Chasing the American Dream, he worked ti..."
1,Catch-22,"I can't seem to escape this absurd catch-22,..."
2,Christmas present,I can't wait to give you your Christmas presen...
3,Downing Street,"Excitement: ""The moment I turned onto Downing ..."
4,Dutch courage,Taking a shot of Dutch courage before the pres...
...,...,...
2254,jump rope,The children laughed and cheered as they enj...
2255,jump ship,"Seeing the business struggling, several empl..."
2256,jump the gun,I didn't even have a chance to prepare befor...
2257,jump the queue,She was so frustrated when someone tried to ju...


In [60]:
df_literal.to_csv('df_literal.csv')

In [118]:
df_negated = pd.read_csv('gpt4_sentence_generation_negated_idioms.csv' , encoding='latin-1')
df_negated = df_negated.drop(['Unnamed: 3'],axis = 1)
df_negated

Unnamed: 0,Phrase,Sentence,Emotion
0,American Dream,"They say it's the land of opportunity, but fo...",Desperation
1,Catch-22,"Even after trying every possible solution, he ...",Frustration
2,Christmas present,"Even though he couldn't afford it, he decided ...",Gratitude
3,Downing Street,Even if the new policy comes from Downing Stre...,Determination
4,Dutch courage,"Despite her strong appearance, she needed more...",Anxiety
...,...,...,...
2172,in the works,Even though they told me everything was not in...,Hope
2173,in the world,"Despite all the obstacles, she wouldn't trade...",Gratitude
2174,in the worst way,"In the worst way, he didn't want to admit how ...",Longing
2175,in the wrong place at the wrong time,"Although he tried his best, he couldn't shake ...",Anxiety


In [119]:
df_negated_clean = df_negated[df_negated['Emotion'].isin(emotion_list)]
df_negated_clean = df_negated_clean.reset_index(drop = True)
df_negated_clean

Unnamed: 0,Phrase,Sentence,Emotion
0,American Dream,"They say it's the land of opportunity, but fo...",Desperation
1,Catch-22,"Even after trying every possible solution, he ...",Frustration
2,Christmas present,"Even though he couldn't afford it, he decided ...",Gratitude
3,Downing Street,Even if the new policy comes from Downing Stre...,Determination
4,Dutch courage,"Despite her strong appearance, she needed more...",Anxiety
...,...,...,...
1948,in the works,Even though they told me everything was not in...,Hope
1949,in the world,"Despite all the obstacles, she wouldn't trade...",Gratitude
1950,in the worst way,"In the worst way, he didn't want to admit how ...",Longing
1951,in the wrong place at the wrong time,"Although he tried his best, he couldn't shake ...",Anxiety


In [120]:
index_list = []
for i in range(len(df_negated_clean)):
    for emotion in emotion_list:
        if emotion.lower() in df_negated_clean['Sentence'][i].lower():
            index_list.append(i)

In [121]:
df_negated_clean = df_negated_clean.drop(index_list)
df_negated_clean = df_negated_clean.reset_index(drop = True)
df_negated_clean

Unnamed: 0,Phrase,Sentence,Emotion
0,American Dream,"They say it's the land of opportunity, but fo...",Desperation
1,Catch-22,"Even after trying every possible solution, he ...",Frustration
2,Christmas present,"Even though he couldn't afford it, he decided ...",Gratitude
3,Downing Street,Even if the new policy comes from Downing Stre...,Determination
4,Dutch courage,"Despite her strong appearance, she needed more...",Anxiety
...,...,...,...
1752,in the wind,Even though she thought she had everything und...,Frustration
1753,in the world,"Despite all the obstacles, she wouldn't trade...",Gratitude
1754,in the worst way,"In the worst way, he didn't want to admit how ...",Longing
1755,in the wrong place at the wrong time,"Although he tried his best, he couldn't shake ...",Anxiety


In [124]:
df_negated_clean.to_csv('gpt4_generated_sentences_negated_clean.csv')