# 답변 LLM 평가

In [1]:
import os
from openai import OpenAI
import pandas as pd
import getpass
import time

### 시크릿 키, 파일 경로 설정

In [2]:
os.environ["OPENAI_API_KEY"] = getpass.getpass()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
data_path = "eval_input_3.xlsx"

### OpenAI 셋업

In [3]:
#openai api key
client = OpenAI(api_key=OPENAI_API_KEY)

#instruction
instruct = """
You are an AI assistant EVALUATING the answer of Large Language Model.
Based on "Correct_answer", determine whether the "llm_answer" is correct.
If the "llm_answer" is included in "correct_answer", determine as correct answer.
If the "llm_answer" is right then return O and if the "answer" is wrong then return X
Answer ONLY WITH O, X

Q1: correct_answer = 이 약은 급ㆍ만성기관지염, 기관지천식, 후두염, 부비동염, 낭성섬유증의 질환에서 객담배출곤란에 사용합니다.
llm_answer = 뮤테론캡슐200밀리그램(아세틸시스테인)의 효능은 급·만성기관지염, 기관지천식, 후두염, 부비동염, 낭성섬유증의 객담배출곤란에 사용합니다.
A1: O

Q2: correct_answer = 이 약은 소화불량, 식욕감퇴(식욕부진), 과식, 체함, 소화촉진, 소화불량으로 인한 위부팽만감에 사용합니다.
llm_answer = 제공된 정보에는 "닥터베아제정"에 대한 정보가 없습니다. 따라서 해당 제품의 효능에 대해 알 수 없습니다.
A2: X

Q3: correct_answer = 빛을 피해 실온에서 보관하십시오. 어린이의 손이 닿지 않는 곳에 보관하십시오.
llm_answer = 어린이부루펜시럽(이부프로펜)은 15도~30도의 실온에서 보관해야 합니다. 직사광선을 피하고 습기가 없는 곳에 보관하는 것이 좋습니다. 어린이의 손이 닿지 않는 곳에 보관해야 합니다.
A3: O
"""

In [4]:
def get_openai_response(text):
    try:
        response = client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": instruct},
                {"role": "user", "content": text}
            ],
            temperature=0
        )
        bot_resp = response.choices[0].message.content
        return bot_resp
    except Exception as e:
        print(f"An error occurred: {e}")
        return "Failed to get an answer"

### 데이터 셋업

In [5]:
data_list = pd.read_excel(data_path)
list_len = len(data_list)

correct_answer_list = list(data_list['correct_answer'].fillna(value='none'))
llm_answer_list = list(data_list['llm_answer'].fillna(value='none'))

### API 호출 및 결과 저장

In [6]:
result_list = []

for i in range(list_len):
    correct_answer = correct_answer_list[i]
    if correct_answer == 'none':
        result_list.append('crt_none')
        continue

    llm_answer = llm_answer_list[i]
    if llm_answer == 'none':
        result_list.append('llm_none')
        continue

    text = f"""
    correct_answer = {correct_answer}
    llm_answer = {llm_answer}
    """

    result = get_openai_response(text)
    print(f"{i}:{result}")

    result_list.append(result)

0:O
1:O
3:X
6:O
7:O
8:O
10:O
11:O
12:O
13:O
14:O
15:O
16:O
17:O
18:O
19:O
20:O
21:O
22:O
24:X
25:O
26:O
27:O
28:O
29:X
31:X
32:O
34:O
35:O
36:O
38:X
39:O
40:O
41:O
42:O
43:O
44:O
45:O
46:X
47:X
48:O
49:O
50:O
52:O
53:O
54:O
55:X
56:O
57:O
58:X
59:O
60:O
61:O
62:O
63:O
64:O
66:X
67:O
68:O
69:O
70:O
71:O
73:O
74:O
75:O
76:O
77:O
78:O
80:X
82:X
83:O
84:O
85:O
87:O
88:O
89:O
90:O
91:O
92:O
94:O
96:O
97:O
98:O
99:O
101:X
103:O
104:O
105:O
106:O
108:O
110:O
111:O
112:O
113:O
114:O
115:X
116:O
117:X
118:O
119:O
120:O
122:O
123:O
124:X
125:O
126:O
127:O
129:X
131:O
132:O
133:O
134:O
136:X
138:O
139:O
140:O
141:O
142:O
143:O
144:O
145:X
146:O
147:O
148:O
150:O
151:X
153:O
154:O
155:O
157:X
159:O
160:O
161:O
162:O
164:X
166:O
167:O
168:O
169:O
171:X
172:O
173:O
174:O
175:O
176:O
178:X
180:O
181:O
182:O
183:O
185:X
186:O
187:O
188:O
189:O
190:O
192:X
194:O
195:X
196:O
197:O
199:O
200:X
201:O
202:O
203:O
204:O
206:O
207:O
208:O
209:O
210:O
211:O
213:X
214:O
215:O
216:O
217:O
218:O
219:O
220:O
221:

In [7]:
df = pd.DataFrame({
    'correct_answer': correct_answer_list,
    'llm_answer': llm_answer_list,
    'result': result_list
})

df.to_excel('4차_3_large_k3_include.xlsx', index=False)