In [None]:
# Utils

In [None]:
import numpy as np
import os
import pandas as pd
from tqdm.auto import tqdm

## Chat Bots

In [None]:
import openai
import time


class GPTBot:
    def __init__(self, model="gpt-4"):
        print("Initiating GPT chat bot...")

        from secrets import OPENAI_API_KEY
        openai.api_key = OPENAI_API_KEY

        self.model = model
        print("GPT chat bot Initiated!")

    def get_completion(self, prompt):
        while True:
            try:
                completion = self.__get_completion_handler(prompt)
            except:
                print("GPT completion failed")
                time.sleep(20)
            else:
                break
        return completion

    def __get_completion_handler(self, prompt):
        messages = [{"role": "user", "content": prompt}]
        response = openai.ChatCompletion.create(
            model=self.model,
            messages=messages,
            temperature=0, # this is the degree of randomness of the model's output
        )
        return response.choices[0].message["content"]

In [None]:
import pyperclip
import datetime

class CopyCatBot:
    def __init__(self):
        print("CopyCatBot Initiated!")

    @staticmethod
    def get_completion(prompt):
        pyperclip.copy(prompt)

        while True:
            completion = pyperclip.paste()
            if completion != prompt:
                prompt(f"Completion completed: {datetime.datetime.now()}")
                return completion

            pyperclip.copy(prompt)
            time.sleep(20)


## Sentence Puzzle

In [None]:
from dataclasses import dataclass
from typing import List
from enum import Enum, auto

class ModeQ(Enum):
    Train = auto()
    Test = auto()

@dataclass()
class SentencePuzzle:
    id: str
    question: str
    answer: str
    label: int
    choices: List[str]
    choice_order: List[int]

    def __post_init__(self):
        if self.answer is None:
            self.mode = ModeQ.Test
        else:
            self.mode = ModeQ.Train


def load_sentence_puzzles(file_path: str) -> list[SentencePuzzle]:
    print(f"Loading sentence puzzles from {file_path}")
    sps = np.load(file_path, allow_pickle=True)
    puzzles = [
        SentencePuzzle(
            id=sp['id'],
            question=sp['question'],
            answer=sp.get('answer', None),
            label=sp.get('label', None),
            choices=sp['choice_list'],
            choice_order=sp.get('choice_order', None)
        )
        for sp in sps]
    print(f"Loaded {len(puzzles)} sentence puzzles")
    return puzzles


# Prompt Setup

In [None]:
question = "A man shaves everyday, yet keeps his beard long."
answer = "He wants to maintain his appearance."

base_prompt = """
Your task is to generate a descriptive explanation from a question to an answer option. \
In the following, a question and an option as the answer to the question is provided. \
The answer might be or not be a correct answer.

Write a descriptive explanation in at most one paragraph and 200 words to show that path from question to the answer.

Question: ```{question}```
Answer OptionL ```{option}```
"""

generate_prompt_baseline = lambda que, opt: base_prompt.format(question=que, option=opt)

# response = get_completion(prompt)
# print(generate_prompt_baseline(question,answer))


# Experiment

## Config

In [None]:
Demo = True
PromptMode = "baseline"
Phase = ["train", "test"][0]

QuestionsPath = {
    "train": "../datasets/data/SP-train.npy",
    "test": "../datasets/data/SP-val-nolabel.npy"
}[Phase]

DumpDir = "SentencePuzzleKD"
DumpPath = os.path.join(DumpDir, "KD_" + Phase + ".csv")

## SetUP

In [None]:
print("Initiating experiments pipeline...")

if Demo:
    chat_bot = CopyCatBot()
else:
    chat_bot = GPTBot()

if PromptMode == "baseline":
    prompt_generator = generate_prompt_baseline
else:
    print(f"Unknown prompt generating method: {PromptMode}")
    raise Exception()

if not os.path.exists(DumpDir):
    os.mkdir(DumpDir)
    print("DumpDir created")

puzzles = load_sentence_puzzles(QuestionsPath)

In [None]:
kd_report = {
    "id": list(),
    "question": list(),
    "option_1": list(),
    "hypothesis_1": list(),
    "option_2": list(),
    "hypothesis_2": list(),
    "option_3": list(),
    "hypothesis_3": list(),
    "option_4": list(),
    "answer": list(),
    "label": list(),
}

if os.path.exists(DumpPath):
    df = pd.read_csv(DumpPath)
    for col in kd_report.keys():
        kd_report[col] = df[col].tolist()

    print(f"Records recovered from {DumpPath}")
    print(f".::{len(kd_report['id'])} records::.")

## Execute experiment

In [None]:
start = len(kd_report['id'])
length = len(puzzles)

for idx in tqdm(range(start, length)):
    puzzle = puzzles[idx]

    for i in [1, 2, 3]:
        put = f"option_{i}"
        het = f"hypothesis_{i}"
        if len(kd_report['id']) > len(kd_report[put]):
            option = puzzle.choices[i - 1]
            prompt = prompt_generator(que=puzzle.question, opt=option)
            # hypothesis = chat_bot.get_completion(prompt)
            hypothesis = "9092301030"
            kd_report[put].append(option)
            kd_report[het].append(hypothesis)

    put = "option_4"
    if len(kd_report['id']) > len(kd_report[put]):
        option = puzzle.choices[3]
        kd_report[put].append(option)

    if len(kd_report['id']) > len(kd_report['answer']):
        kd_report['answer'].append(puzzle.answer)
        kd_report["label"].append(puzzle.label+1)

    df = pd.DataFrame(kd_report)
    df.to_csv(DumpPath, index=False)

    kd_report['id'].append(puzzle.id)
    kd_report['question'].append(puzzle.question)

print(f"Dumped {idx} records to {DumpPath}")