In [1]:
# !pip install -q -U openai
# !pip install -q gdown==v4.6.3

In [2]:
import os
import pandas as pd
from tqdm.auto import tqdm
import json

import datetime
import time

In [None]:
GPT_MODEL = "gpt-4"
DATA_DIR = 'SE2024'
INPUT_DATA_PATH  = f'{DATA_DIR}/test_split'
OUTPUT_DATA_PATH = f'{DATA_DIR}/infer.jsonl'

# Get dataset

In [5]:
def gdrive_download(file_id, file_name):
    !gdown $file_id --output $file_name

In [None]:
if not os.path.exists(DATA_DIR):
    os.mkdir(DATA_DIR)
if not os.path.exists(INPUT_DATA_PATH):
    gdrive_download('1JcpBjTXv2OfaG6uYcIJO-Yk69nT9uN8i', INPUT_DATA_PATH)

In [None]:
dataset = pd.read_csv(INPUT_DATA_PATH)

# Chat Bots

In [None]:
import openai
OPENAI_API_KEY = "<API key here>"

class GPTBot:
    def __init__(self, model="gpt-4"):
        print("Initiating GPT chat bot...")
        
        openai.api_key = OPENAI_API_KEY

        self.model = model
        print("GPT chat bot Initiated!")

    def get_completion(self, prompt):
        i = 0
        while i<10:
            i+=1
            try:
                completion = self.__get_completion_handler(prompt)
            except:
                print(f"GPT completion failed ::[{datetime.datetime.now()}]::")
                time.sleep(10)
                print(f"Trying GPT completion ::[{datetime.datetime.now()}]::")
            else:
                break
        return completion

    def __get_completion_handler(self, prompt):
        messages = [{"role": "user", "content": prompt}]
        response = openai.ChatCompletion.create(
            model=self.model,
            messages=messages,
            temperature=0, # this is the degree of randomness of the model's output
        )
        return response.choices[0].message["content"]

# Prompt Setup

In [6]:
base_prompt = """
You are given a riddle and four options to choose the answer amongst them. A riddle is a question or statement intentionally phrased so as to require ingenuity in ascertaining its answer or meaning, typically presented as a game.
Different ideas can be used in these riddles:
    1. Riddles often employ misdirection, leading you away from the actual solution.
    2. They include elements with double meanings, requiring a keen eye for words with dual interpretations.
    3. Metaphorical wordplay adds another layer, urging you to decipher figurative language.
    4. Look out for exaggeration, as riddles may present overly dramatic details to divert your attention.
    5. Common phrases and sayings may hide within the puzzle, demanding familiarity.
    6. Associations and irony play a crucial role, introducing unexpected connections.
    7. Numerical puzzles can also be part of the mystery, requiring you to decode their significance.
    8. Elemental imagery, drawn from nature, might hold key descriptors.
    9. Rhyming and sound clues can add a poetic dimension.
    10. Avoid sexism ans sex cliche, for example, gender bias for jobs, based on their positions or their outcome.
    11. Also, it is important to note you should decode the upcoming riddle using everyday logic and creativity.
Although a clever solution is required, avoid supernatural solutions and keep your answer within the limits of realistic imagination. For example, having superhuman abilities or unusual events or things are mostly a not preferred choice unless that is a better solution. Now which of the following options is the answer to the following riddle:

to solve the riddle, consider mentioned pont above and think step by step for each option, and it the end, mention the option you think is the best, in the format: 'Option 1' or 'Option 2' or 'Option 3' or 'Option 4'


Riddle: "{RIDDLE}"

Options:
Option 1: "{OPTION_1}"
Option 2: "{OPTION_2}"
Option 3: "{OPTION_3}"
Option 4: "None of the above options are correct"


Let's think step by step about each option, then at the end, choose the best and the most logical option:
"""

def get_prompt(ds):
    return base_prompt.format(
            RIDDLE=ds['QUESTION'],
            OPTION_1=ds['OPTION 1'],
            OPTION_2=ds['OPTION 2'],
            OPTION_3=ds['OPTION 3'],
    )


# Read/Write utils

In [None]:
def save_inference(data, address):
    with open(address, 'w') as jsonl_file:
        for item in data:
            jsonl_file.write(json.dumps(item) + '\n')

In [None]:
def add_inference(data, address):
    with open(address, 'a+') as jsonl_file:
        for item in data:
            jsonl_file.write(json.dumps(item) + '\n')

In [None]:
def read_inference(address):
    json_list = []
    with open(address, 'r') as file:
        for line in file:
            data = json.loads(line)
            json_list.append(data)
    return json_list

# Experiment

In [9]:
results = []

if os.path.exists(OUTPUT_DATA_PATH):
    results = read_inference(OUTPUT_DATA_PATH)

    print(f"Records recovered from {OUTPUT_DATA_PATH}")
    print(f".::{len(results)} records::.")

In [None]:
chat_bot = GPTBot(model=GPT_MODEL)

## Execute experiment

In [10]:
start = len(results)

for idx, ds in enumerate(tqdm(dataset, total=len(dataset)), start=1):
    if idx <= start:
        continue

    prompt = get_prompt(ds)
    result = chat_bot.get_completion(prompt)
    
    data = {
        "question": ds['QUESTION'], 
        'option 1': ds['OPTION 1'],
        'option 2': ds['OPTION 2'],
        'option 3': ds['OPTION 3'],
        'option 4': ds['OPTION 4'],
        'gpt': result
    }
    add_inference([data], OUTPUT_DATA_PATH)
save_inference(results, OUTPUT_DATA_PATH)

print(f"Dumped {len(results)} records to {OUTPUT_DATA_PATH}")

  0%|          | 0/60 [00:00<?, ?it/s]

Dumped 60 records to SentencePuzzleKD/KD_test_gpt-4.csv
