/
llm_feedback_rankings.py
81 lines (70 loc) · 2.88 KB
/
llm_feedback_rankings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import csv
import time
import openai
import argparse
import pandas as pd
from tqdm import tqdm
from constants import RANKINGS_PROMPT
parser = argparse.ArgumentParser()
parser.add_argument('--gpt_version', choices=['gpt-3.5-turbo', 'gpt-4'], default='gpt-3.5-turbo')
parser.add_argument('--input_csv', type = str, default = 'chatgpt_feedback/without_dolly/test_pairwise_data.csv')
parser.add_argument('--save_feedback_csv', type = str, default = None)
parser.add_argument('--start_index', type = int, default = 0)
args = parser.parse_args()
PROMPT_DICT = {
"prompt_input": (
"{instruction}\n\nInput:\n{input}"
),
"prompt_no_input": (
"{instruction}"
),
}
def get_reward(instruction, input, output_1, output_2):
if str(input) == "":
print('here')
instruction = PROMPT_DICT['prompt_no_input'].format(instruction = instruction)
prompt = RANKINGS_PROMPT.format(instruction = instruction, output_1 = output_1, output_2 = output_2)
else:
instruction = PROMPT_DICT['prompt_input'].format(instruction = instruction, input = input)
prompt = RANKINGS_PROMPT.format(instruction = instruction, output_1 = output_1, output_2 = output_2)
messages = [{"role": "user", "content": prompt}]
return messages
def main():
df = pd.read_csv(args.input_csv)
df = df.iloc[args.start_index:]
for j in tqdm(range(len(df))):
if j != 0 and j % 20 == 0:
time.sleep(5)
try:
instruction = df.iloc[j]['instruction']
input = df.iloc[j]['input']
output1 = df.iloc[j]['response1']
output2 = df.iloc[j]['response2']
completion = openai.ChatCompletion.create(
model = args.gpt_version,
messages = get_reward(instruction, input, output1, output2))
feedback_1 = completion['choices'][0]['message']['content']
completion = openai.ChatCompletion.create(
model = args.gpt_version,
messages = get_reward(instruction, input, output2, output1))
feedback_2 = completion['choices'][0]['message']['content']
if '(a)' in feedback_1 and '(b)' in feedback_2:
feedback = '(a)'
elif '(b)' in feedback_1 and '(a)' in feedback_2:
feedback = '(b)'
elif '(a)' in feedback_1 and '(a)' in feedback_2:
feedback = 'equal'
elif '(b)' in feedback_1 and '(b)' in feedback_2:
feedback = 'equal'
else:
continue
print(feedback_1, feedback_2, feedback)
with open(args.save_feedback_csv, 'a') as f:
csvwriter = csv.writer(f)
csvwriter.writerow([instruction, input, output1, output2, feedback])
except:
print('Sleeping...')
time.sleep(5)
if __name__ == '__main__':
main()