In [None]:
!pip install -q -U openai
!pip install -q gdown

In [None]:
import os
import pandas as pd
from tqdm.auto import tqdm
import json

import datetime
import time

In [None]:
GPT_MODEL = "gpt-3.5-turbo-0125"
DATA_DIR = 'SE2024'
INPUT_DATA_PATH  = f'{DATA_DIR}/test_split.csv'
OUTPUT_DATA_PATH = f'{DATA_DIR}/inference.jsonl'
BACKUP_OUTPUT_DATA_PATH = f'{DATA_DIR}/inference_backup.jsonl'

# Get dataset

In [None]:
def gdrive_download(file_id, file_name):
    !gdown $file_id -O $file_name

In [None]:
if not os.path.exists(DATA_DIR):
    os.mkdir(DATA_DIR)
if not os.path.exists(INPUT_DATA_PATH):
    gdrive_download('1JcpBjTXv2OfaG6uYcIJO-Yk69nT9uN8i', INPUT_DATA_PATH)

In [None]:
dataset = pd.read_csv(INPUT_DATA_PATH)

# Chat Bots

In [None]:
from openai import OpenAI
OPENAI_API_KEY = "..."

class GPTBot:
    def __init__(self, model):
        print("Initiating GPT chat bot...")

        self.model = model
        self.client = OpenAI(api_key=OPENAI_API_KEY)

        print("GPT chat bot Initiated!")

    def get_completion(self, prompt):
        for _ in range(20):
            try:
                return self.__get_completion_handler(prompt)
            except Exception as e:
                print(f"GPT completion failed ::[{datetime.datetime.now()}]::")
                time.sleep(10)
                print(f"Trying GPT completion ::[{datetime.datetime.now()}]::")
        return "Failed"

    def __get_completion_handler(self, prompt):
        messages = [{"role": "user", "content": prompt}]
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            temperature=0, # this is the degree of randomness of the model's output
        )
        return response.choices[0].message.content

# Prompt Setup

In [None]:
base_prompt = """\
You are given a brief example riddle and four options to choose the answer amongst them. \
A riddle is a question or statement intentionally phrased so as to require ingenuity in ascertaining its answer or meaning. \


Riddle: "{riddle}"

Options:
Option 1: "{option_1}"
Option 2: "{option_2}"
Option 3: "{option_3}"
Option 4: "None of the above options are correct"


To solve the riddle, think step by step for each option and consider providing an informative explanation or just the option number. \
Feel free to think creatively and consider alternative perspectives!\
At the end, announce the option you think is the best one in the format: 'Option 1' or 'Option 2' or 'Option 3' or 'Option 4':
"""

def get_prompt(ds):
    return base_prompt.format(
            riddle=ds['QUESTION'],
            option_1=ds['OPTION 1'],
            option_2=ds['OPTION 2'],
            option_3=ds['OPTION 3'],
    )


# Read/Write utils

In [None]:
def save_inference(data, address):
    with open(address, 'w') as jsonl_file:
        for item in data:
            jsonl_file.write(json.dumps(item) + '\n')

In [None]:
def add_inference(data, address):
    with open(address, 'a+') as jsonl_file:
        for item in data:
            jsonl_file.write(json.dumps(item) + '\n')

In [None]:
def read_inference(address):
    json_list = []
    with open(address, 'r') as file:
        for line in file:
            data = json.loads(line)
            json_list.append(data)
    return json_list

# Experiment

In [None]:
chat_bot = GPTBot(model=GPT_MODEL)

## Execute experiment

In [None]:
results = []
itr = tqdm(dataset.iterrows(), total=len(dataset), desc="Processing")

for index, ds in itr:
    prompt = get_prompt(ds)
    result = chat_bot.get_completion(prompt)

    data = {
        "question": ds['QUESTION'],
        'option 1': ds['OPTION 1'],
        'option 2': ds['OPTION 2'],
        'option 3': ds['OPTION 3'],
        'option 4': ds['OPTION 4'],
        'gpt': result
    }
    add_inference([data], OUTPUT_DATA_PATH)
    results.append(data)
    
save_inference(results, BACKUP_OUTPUT_DATA_PATH)

print(f"Dumped {len(results)} records to {OUTPUT_DATA_PATH}")