-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_data.py
37 lines (32 loc) · 1.57 KB
/
generate_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import openai
import dotenv
import os
import json
import tqdm
dotenv.load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')
base_prompt = """
We are retraining a classifier. It is a multi class classifier. The classes are: anger, fear, joy, love, sadness, surprise.
After training we measured that in lot of cases it confuses %ACTUAL% as %PREDICTED%. Please generate some more text of class %ACTUAL%. You
can take inspiration in style and lenght from %TEXT%. Output only text without additional information. It should be one sentence in lenght.
"""
def chat_with_gpt3_5_turbo(prompt):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
]
)
return response.choices[0].message['content']
with open('confusion_dict.json', 'r') as fh:
confusion_dict = json.load(fh)
with open('./data/emotion_dataset_generated_2.jsonl', 'a') as fh:
for i in range(3):
to_generate = [('fear','surprise'),('anger','fear'),('sadness','fear'),('sadness','anger'),('anger','sadness'),('joy','surprise')]
for actual, predicted in to_generate:
for text in tqdm.tqdm(confusion_dict[actual + '_confused_as_' + predicted]):
promt = base_prompt.replace('%ACTUAL%', actual).replace('%PREDICTED%', predicted).replace('%TEXT%', text)
new_text = chat_with_gpt3_5_turbo(promt)
dct = {'label': actual, 'text': new_text}
fh.write(json.dumps(dct) + '\n')