In [1]:
import numpy as np

In [2]:
from dataclasses import dataclass
from typing import List

@dataclass()
class SentencePuzzle:
    id: str
    question: str
    answer: str
    label: int
    choices: List[str]
    choice_order: List[int]

    def __repr__(self):
        if self.choice_order is not None:
            choices = "\n\t".join(f"\"{self.choices[self.choice_order.index(idx)]}\"" for idx in range(len(self.choices)))
        else:
            choices = "\n\t".join(self.choices)
        return f"""SentencePuzzle(
id=\"{self.id}\",
question=\"{self.question}\",
answer=\"{self.answer if self.answer is not None else '<question mode>'}\",
choices=[
    {choices}
])"""

    def json(self):
        return {
            "id": self.id,
            "question": self.question,
            "answer": self.answer,
            "label": self.label,
            "choices": self.choices,
            "choice_order": self.choice_order
        }


class SentencePuzzles:
    def __init__(self, file_path):
        self.file_path = file_path
        self.puzzles: list[SentencePuzzle] = []
        self.load()

    def load(self):
        print(f"Loading sentence puzzles from {self.file_path}")
        sps = np.load(self.file_path, allow_pickle=True)
        self.puzzles = [
            SentencePuzzle(
                id=sp['id'],
                question=sp['question'],
                answer=sp.get('answer', None),
                label=sp.get('label', None),
                choices=sp['choice_list'],
                choice_order=sp.get('choice_order', None)
            )
            for sp in sps]
        print(f"Loaded {len(self.puzzles)} sentence puzzles")



In [15]:
SP_PATH = "../datasets/data/SP-val-nolabel.npy"
# SP_PATH = "../datasets/data/SP-val-nolabel.npy"
# WP_PATH = "../datasets/pilot/word_puzzle.npy"

In [16]:
sp = SentencePuzzles(SP_PATH)
sp.puzzles[0]

Loading sentence puzzles from ../datasets/data/SP-val-nolabel.npy
Loaded 60 sentence puzzles


SentencePuzzle(
id="SP-146",
question="A guy bet his neighbor 50 bucks that his dog could jump higher than a house. Thinking this was not possible, the neighbor took the bet and lost.
Why did he lose the bet?",
answer="<question mode>",
choices=[
    A house can not jump.
	The height of the tiny house is lower than the dog.
	That dog species were famous for their jumping ability.
	None of above.
])

In [17]:
data = {
    1: [],
    2: [],
    3: [],
    4: [],
}
for p in sp.puzzles:
    data[1].append(p.choices[0])
    data[2].append(p.choices[1])
    data[3].append(p.choices[2])
    data[4].append(p.choices[3])

import pandas as pd

pd.DataFrame(data).to_csv("test.csv", index=False)


In [5]:
for p in sp.puzzles:
    print(p.id)

SP-0
SP-0_SR
SP-0_CR
SP-1
SP-1_SR
SP-1_CR
SP-2
SP-2_SR
SP-2_CR
SP-3
SP-3_SR
SP-3_CR
SP-4
SP-4_SR
SP-4_CR
SP-5
SP-5_SR
SP-5_CR
SP-6
SP-6_SR
SP-6_CR
SP-7
SP-7_SR
SP-7_CR
SP-8
SP-8_SR
SP-8_CR
SP-9
SP-9_SR
SP-9_CR
SP-10
SP-10_SR
SP-10_CR
SP-11
SP-11_SR
SP-11_CR
SP-12
SP-12_SR
SP-12_CR
SP-13
SP-13_SR
SP-13_CR
SP-14
SP-14_SR
SP-14_CR
SP-15
SP-15_SR
SP-15_CR
SP-16
SP-16_SR
SP-16_CR
SP-17
SP-17_SR
SP-17_CR
SP-18
SP-18_SR
SP-18_CR
SP-19
SP-19_SR
SP-19_CR
SP-20
SP-20_SR
SP-20_CR
SP-21
SP-21_SR
SP-21_CR
SP-22
SP-22_SR
SP-22_CR
SP-23
SP-23_SR
SP-23_CR
SP-24
SP-24_SR
SP-24_CR
SP-25
SP-25_SR
SP-25_CR
SP-26
SP-26_SR
SP-26_CR
SP-27
SP-27_SR
SP-27_CR
SP-28
SP-28_SR
SP-28_CR
SP-29
SP-29_SR
SP-29_CR
SP-30
SP-30_SR
SP-30_CR
SP-31
SP-31_SR
SP-31_CR
SP-32
SP-32_SR
SP-32_CR
SP-33
SP-33_SR
SP-33_CR
SP-34
SP-34_SR
SP-34_CR
SP-35
SP-35_SR
SP-35_CR
SP-36
SP-36_SR
SP-36_CR
SP-37
SP-37_SR
SP-37_CR
SP-38
SP-38_SR
SP-38_CR
SP-39
SP-39_SR
SP-39_CR
SP-40
SP-40_SR
SP-40_CR
SP-41
SP-41_SR
SP-41_CR
SP-42
SP-42_SR
SP-42_C

In [6]:
print(sp.puzzles[0])
print(sp.puzzles[1])
print(sp.puzzles[2])

SentencePuzzle(
id="SP-0",
question="Mr. and Mrs. Mustard have six daughters and each daughter has one brother. But there are only 9 people in the family, how is that possible?",
answer="Each daughter shares the same brother.",
choices=[
    "Each daughter shares the same brother."
	"Some daughters get married and have their own family."
	"Some brothers were not loved by family and moved away."
	"None of above."
])
SentencePuzzle(
id="SP-0_SR",
question="The six daughters of Mr. and Mrs. Mustard each have one brother. However, the family only consists of nine people; how is that possible?",
answer="Each daughter shares the same brother.",
choices=[
    "Each daughter shares the same brother."
	"Some daughters get married and have their own family."
	"Some brothers were not loved by family and moved away."
	"None of above."
])
SentencePuzzle(
id="SP-0_CR",
question="A chess team have five players and each player has one coach. But there are only six participants in the team. How is that

In [10]:
import pandas as pd

sp_dict = {
    "original": [],
    "semantic": [],
    "context": [],
}
for x in range(len(sp.puzzles)//3):
    idx = x*3
    sp_dict["original"].append(sp.puzzles[idx].json())
    sp_dict["semantic"].append(sp.puzzles[idx+1].json())
    sp_dict["context"].append(sp.puzzles[idx+2].json())

sp_df = pd.DataFrame(sp_dict)
sp_df.to_csv("./sentence_puzzle_grouped.csv", index=False)

In [8]:
wp_ds = np.load(WP_PATH, allow_pickle=True)
len(wp_ds)

492

In [9]:
wp_ds[0]

{'id': 'WP-0',
 'question': 'How do you spell COW in thirteen letters?',
 'answer': 'SEE O DOUBLE YOU.',
 'distrator1': 'COWCOWCOWCOWW',
 'distrator2': 'SEE OH DEREFORD',
 'distrator(unsure)': 'None of above.',
 'label': 0,
 'choice_list': ['SEE O DOUBLE YOU.',
  'SEE OH DEREFORD',
  'COWCOWCOWCOWW',
  'None of above.'],
 'choice_order': [0, 2, 1, 3]}