In [17]:
import os
import sys
import importlib

In [18]:
# Setup module path for local imports
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [19]:
import data.zebra
import data.format

importlib.reload(data.zebra)
importlib.reload(data.format)

from data.zebra import Zebra

In [20]:
zebra_dataset = Zebra(hf_token=os.environ['HF_TOKEN'])

In [21]:
# Look at plain dataset
num_samples = 3
for i in range(num_samples):
    print(zebra_dataset[i])

{'question': 'Given There are 5 houses, numbered 1 to 5 from left to right, as seen from across the street. Each house is occupied by a different person. Each house has a unique attribute for each of the following characteristics:\n - Each person has a unique name: `Peter`, `Alice`, `Bob`, `Eric`, `Arnold`\n - The people are of nationalities: `norwegian`, `german`, `dane`, `brit`, `swede`\n - People have unique favorite book genres: `fantasy`, `biography`, `romance`, `mystery`, `science fiction`\n - Everyone has something unique for lunch: `stir fry`, `grilled cheese`, `pizza`, `spaghetti`, `stew`\n - Each person has a favorite color: `red`, `green`, `blue`, `yellow`, `white`\n - The people keep unique animals: `bird`, `dog`, `cat`, `horse`, `fish`\n\n## Clues:\n1. The person who loves fantasy books is the Norwegian.\n2. The cat lover and the person who loves biography books are next to each other.\n3. The German is Bob.\n4. The person who loves yellow is Bob.\n5. The person whose favo

In [22]:
# If formatting to train a Causal LM we can format the data into single strings
for i in range(num_samples):
    formatted = data.format.lm_format_qa_instance(zebra_dataset[i])
    print(formatted)

### Question Given There are 5 houses, numbered 1 to 5 from left to right, as seen from across the street. Each house is occupied by a different person. Each house has a unique attribute for each of the following characteristics:
 - Each person has a unique name: `Peter`, `Alice`, `Bob`, `Eric`, `Arnold`
 - The people are of nationalities: `norwegian`, `german`, `dane`, `brit`, `swede`
 - People have unique favorite book genres: `fantasy`, `biography`, `romance`, `mystery`, `science fiction`
 - Everyone has something unique for lunch: `stir fry`, `grilled cheese`, `pizza`, `spaghetti`, `stew`
 - Each person has a favorite color: `red`, `green`, `blue`, `yellow`, `white`
 - The people keep unique animals: `bird`, `dog`, `cat`, `horse`, `fish`

## Clues:
1. The person who loves fantasy books is the Norwegian.
2. The cat lover and the person who loves biography books are next to each other.
3. The German is Bob.
4. The person who loves yellow is Bob.
5. The person whose favorite color is 

In [23]:
# If formatting to train an instruction tuned LM we can format the data into a list of chat objects
for i in range(num_samples):
    formatted = data.format.chat_format_qa_instance(zebra_dataset[i])
    print(formatted)

[{'role': 'user', 'content': 'Given There are 5 houses, numbered 1 to 5 from left to right, as seen from across the street. Each house is occupied by a different person. Each house has a unique attribute for each of the following characteristics:\n - Each person has a unique name: `Peter`, `Alice`, `Bob`, `Eric`, `Arnold`\n - The people are of nationalities: `norwegian`, `german`, `dane`, `brit`, `swede`\n - People have unique favorite book genres: `fantasy`, `biography`, `romance`, `mystery`, `science fiction`\n - Everyone has something unique for lunch: `stir fry`, `grilled cheese`, `pizza`, `spaghetti`, `stew`\n - Each person has a favorite color: `red`, `green`, `blue`, `yellow`, `white`\n - The people keep unique animals: `bird`, `dog`, `cat`, `horse`, `fish`\n\n## Clues:\n1. The person who loves fantasy books is the Norwegian.\n2. The cat lover and the person who loves biography books are next to each other.\n3. The German is Bob.\n4. The person who loves yellow is Bob.\n5. The p

In [24]:
# Then for prompting we can construct few shot learning prompts
example_query = zebra_dataset[0]['question']
examples = [zebra_dataset[i] for i in range(1, 5)]
prompt = data.format.lm_create_fewshot_prompt(example_query, examples)
print(prompt)

### Question Given There are 4 houses, numbered 1 to 4 from left to right, as seen from across the street. Each house is occupied by a different person. Each house has a unique attribute for each of the following characteristics:
 - Each person has a unique name: `Alice`, `Eric`, `Arnold`, `Peter`
 - Each person has an occupation: `artist`, `engineer`, `teacher`, `doctor`
 - People have unique favorite book genres: `fantasy`, `science fiction`, `mystery`, `romance`
 - People use unique phone models: `google pixel 6`, `iphone 13`, `oneplus 9`, `samsung galaxy s21`

## Clues:
1. The person who is an engineer is directly left of the person who uses a Samsung Galaxy S21.
2. The person who loves fantasy books is in the second house.
3. Alice is not in the second house.
4. Eric is the person who is a teacher.
5. The person who uses a Samsung Galaxy S21 is the person who loves fantasy books.
6. The person who uses an iPhone 13 is the person who loves science fiction books.
7. The person who l

In [25]:
# The few shot prompt can also be formatted for instruction tuned models
example_query = zebra_dataset[0]
chat_prompt = data.format.chat_create_fewshot_prompt(example_query, examples)
print(chat_prompt)

[{'role': 'user', 'content': 'Given There are 4 houses, numbered 1 to 4 from left to right, as seen from across the street. Each house is occupied by a different person. Each house has a unique attribute for each of the following characteristics:\n - Each person has a unique name: `Alice`, `Eric`, `Arnold`, `Peter`\n - Each person has an occupation: `artist`, `engineer`, `teacher`, `doctor`\n - People have unique favorite book genres: `fantasy`, `science fiction`, `mystery`, `romance`\n - People use unique phone models: `google pixel 6`, `iphone 13`, `oneplus 9`, `samsung galaxy s21`\n\n## Clues:\n1. The person who is an engineer is directly left of the person who uses a Samsung Galaxy S21.\n2. The person who loves fantasy books is in the second house.\n3. Alice is not in the second house.\n4. Eric is the person who is a teacher.\n5. The person who uses a Samsung Galaxy S21 is the person who loves fantasy books.\n6. The person who uses an iPhone 13 is the person who loves science ficti