In [None]:
!pip install -q cohere gdown tqdm

In [None]:
from cohere import Client, CohereAPIError
import os
import pandas as pd
from tqdm.notebook import tqdm

api_key = '...'
client = Client(api_key)

In [None]:
RAW_DATA_PATH = 'SE2024/train_gpt-4_raw_RAG.csv'
RESULT_CSV_PATH = "SE2024/train_logical_relation.csv"


# Cohere api limit safety

In [None]:
from time import sleep

class SafeCohere:
    def __init__(self, client):
        self.co = client
    
    def chat(self, prompt, **kw_args):
        while True:
            try:
                return self.co.chat(
                    prompt,
                    **kw_args
                )
            except CohereAPIError as e:
                print(f"Rate limit reached, waiting for 60 seconds: {e}")
                sleep(60)
                
co = SafeCohere(client)

# Prepare data

In [None]:
os.makedirs("./SE2024", exist_ok=True)

In [None]:
if os.path.exists(RAW_DATA_PATH):
    print('Data file already exists')
else:
    print("Data doesn't exist, start download from the google drive...")
    !gdown 15VK8MaOEg2gF8iwmI4bummXt8whZF9Bq -O $RAW_DATA_PATH

In [None]:
data = pd.read_csv(RAW_DATA_PATH)

# Prepare prompt template

In [None]:
prompt_template = """\
<|Instruction|>
I would provide you a Riddle and its Answer. I would Also provide you a hypothesis about the answer. \
Your task is to give me in short that what is the logical path from question to its answer.
Common tricks to be able to answer this riddles:
    1. Riddles often employ misdirection, leading you away from the actual solution.
    2. They include elements with double meanings, requiring a keen eye for words with dual interpretations.
    3. Metaphorical wordplay adds another layer, urging you to decipher figurative language.
    4. Look out for exaggeration, as riddles may present overly dramatic details to divert your attention.
    5. Common phrases and sayings may hide within the puzzle, demanding familiarity.
    6. Associations and irony play a crucial role, introducing unexpected connections.
    7. Numerical puzzles can also be part of the mystery, requiring you to decode their significance.
    8. Elemental imagery, drawn from nature, might hold key descriptors.
    9. Rhyming and sound clues can add a poetic dimension.
    10. Avoid sexism ans sex cliche, for example, gender bias for jobs, based on their positions or their outcome.
    11. make sure to put the most logical solution first.
Please keep your response as short as you can.

<|Example|>
Riddle: "Mr. and Mrs. Mustard have six daughters and each daughter has one brother. But there are only 9 people in the family, how is that possible?"
Answer: "Each daughter shares the same brother."
Hypothesis: "The question states that Mr. and Mrs. Mustard have six daughters and each daughter has one brother. This might initially lead one to think that there are 6 brothers, one for each daughter, which would total 14 people in the family (2 parents, 6 daughters, 6 brothers). However, the question also specifies that there are only 9 people in the family. The only way this is possible is if each daughter shares the same brother. This means there is only one brother who is the sibling to all six daughters. Therefore, the total number of people in the family is 9: Mr. and Mrs. Mustard, their six daughters, and one son."
Logical Relation: "The only way this is possible is if each daughter shares the same brother. This means there is only one brother who is the sibling to all six daughter"

<|Problem|>
Riddle: "{riddle}"
Answer: "{answer}"
Hypothesis: "{thesis}"
Logical Relation:
"""


In [None]:
riddle = "Two girls have the same parents and were born at the same hour of the same day of the same month, but they are not twins. How can this be possible?"
answer = "They were not born in the same year."
thesis = """The question states that two girls have the same parents and were born at the same hour of the same day of the same month, but they are not twins. This might seem contradictory at first, as we usually associate the birth of two siblings at the same time to be twins. However, the question does not specify that the girls were born in the same year. Therefore, the girls could have been born exactly one year (or multiple years) apart, on the same day, at the same hour. This would make them not twins, but rather siblings with coincidentally synchronized birth times. Hence, the answer option "They were not born in the same year" is a plausible explanation to the question."""


In [None]:
prompt = prompt_template.format(riddle=riddle, answer=answer, thesis=thesis)
print(prompt)

# Generate logical relation

In [None]:
def fix_start_end_generated(text:str):
  while text[0] in ["'", '"']:
    text = text[1:]
  while text[-1] in ["'", '"']:
    text = text[:-1]
  return text


In [None]:
logical_revised = []

itr = tqdm(data.iterrows(), total=len(data))
for i, row in itr:
    id = row['ID']
    riddle = row['QUESTION']
    answer = row['ANSWER']
    thesis = row['HYPOTHESIS']
    prompt = prompt_template.format(riddle=riddle, answer=answer, thesis=thesis)
    prediction = co.chat(
        prompt,
        model='command',
        temperature=0.0,
    )
    logical_relation = fix_start_end_generated(prediction.text)
    data = {
        "id": id,
        "question": riddle,
        "answer": answer,
        "explanation": logical_relation
    }
    logical_revised.append(data)

# Save result

In [None]:
result_df = pd.DataFrame(logical_revised)
result_df.to_csv(RESULT_CSV_PATH, index=False)