In [None]:
import openai
import pandas as pd
from tqdm import tqdm


# PARAMETERS
openai.api_key = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'

MODELS = ['gpt-3.5-turbo', 'gpt-4']

DATASET = 'test_sets/test_at.csv'

PROMPT_EN_AT = "This is an answer triggering task. Your task is that of telling if a question can be answered given the provided context. Your reply should be: 1: it can be answered; 0: it cannot be answered. Your reply should contain only the corresponding number and nothing else (i.e., 0 or 1). CONTEXT: {} QUESTION: {} ANSWER: "
PROMPT_PT_AT = "Este é uma tarefa de answer triggering. Sua tarefa é dizer se uma pergunta pode ser respondida com base no contexto fornecido. Sua resposta deve ser: 1: pode ser respondido; 0: não pode ser respondido. Sua resposta deve conter apenas o número correspondente e nada mais (ou seja, 0 ou 1). CONTEXTO: {} PERGUNTA: {} RESPOSTA: "

USECOLS = ['id_qa', 'corpus', 
            'abstract', 'abstract_translated_pt',
            'question_en_origin', 'question_pt_origin',
            'question_en_paraphase', 'question_pt_paraphase', 
            'at_labels']

EXPERIMENTS = {
    # 1) Pergunta em E, texto em E
    '1_Qen_Cen': {
        'prompt': PROMPT_EN_AT,
        'question': 'question_en_origin',
        'context': 'abstract',
        'answer': 'at_labels'
    },
    # 2) Pergunta em P, texto em P
    '2_Qpt_Cpt': {
        'prompt': PROMPT_PT_AT,
        'question': 'question_pt_origin',
        'context': 'abstract_translated_pt',
        'answer': 'at_labels'
    },
    # 3) Paráfrase em E, texto em E
    '3_Qparaen_Cen': {
        'prompt': PROMPT_EN_AT,
        'question': 'question_en_paraphase',
        'context': 'abstract',
        'answer': 'at_labels'
    },
    # 4) Paráfrase em P, texto em P
    '4_Qparapt_Cpt': {
        'prompt': PROMPT_PT_AT,
        'question': 'question_pt_paraphase',
        'context': 'abstract_translated_pt',
        'answer': 'at_labels'
    },
}


def chatgpt_answer(prompt, question, context='', model='gpt-3.5-turbo'):
    # Available models: 'gpt-3.5-turbo' and 'gpt-4'

    response = openai.ChatCompletion.create(
        model=model,
        temperature=0.0,
        max_tokens=100,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        messages=[
            {'role': 'system', 
             'content': prompt.format(context, question)}
        ]
    )
    
    return response.choices[0].message.content

In [None]:
df = pd.read_csv(DATASET, usecols=USECOLS)
df = df[df['at_labels'].notnull()]
print(len(df))
df.head()

In [None]:
for model in MODELS:
    print(f'\n\n>>> MODEL: {model}')

    for exp in EXPERIMENTS:
        print(f'>>>>>> Experiment: {exp}')
        
        prompt = EXPERIMENTS[exp]['prompt']
        question_col = EXPERIMENTS[exp]['question']
        context_col = EXPERIMENTS[exp]['context']
        answer_col = EXPERIMENTS[exp]['answer']

        gpt = []
        is_right = []
        for _, row in tqdm(df.iterrows()):
            question = row[question_col]
            context = None if question_col == '' else row[question_col]

            ans = chatgpt_answer(prompt, question, context, model)
            
            gpt.append(ans)
            is_right.append(1 if ans == str(row['at_labels'])[0] else 0)

        df['gpt_answers'] = gpt
        df['is_right'] = is_right

        df.to_csv(f'results/experiments_chatgpt_at/{model}_{exp}.csv', index=False)