In [2]:
from cot import Collection
from cot.generate import TEMPLATES
from pprint import pprint

# Model Chain of Thought (CoT) Generation

## Examples of available templates
Three groups of templates for CoT generation:
1) Instructions
2) CoT-Triggers
3) Answer-Extractions

In [3]:
pprint(TEMPLATES["instructions"]['qa-01'])

'Answer the following question through step-by-step reasoning.'


In [4]:
pprint(TEMPLATES["cot-triggers"]['kojima-01'])

"Answer: Let's think step by step."


In [5]:
pprint(TEMPLATES["answer-extractions"]['kojima-01'])

'Therefore, the answer is'


## Generating Chain of Thought examples

In [6]:
# load dataset (detailed explanation in generate_samples notebook)
collection = Collection(["worldtree"], verbose=False)
worldtree_100_random = collection.select(split="train", number_samples=100, random_samples=True, seed=0)
worldtree_100_random

Loading worldtree...


| Name      |   Train | Valid   | Test   |
|-----------|---------|---------|--------|
| worldtree |     100 | -       | -      |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'open_book_qa', 'qed', 'strategy_qa', 'svamp']

In [7]:
# you can also open an existing json
# collection = Collection.from_json("worldtree_random_100_only_dataset.json")

#### Example of config file

In [8]:
"""
config={
    "idx_range": None, # Determines which indices the generate_and_extract routine is applied to, Default: None (All items are used)
    "debug": True, # Determines whether an api is called or a mock is returned, used for debugging, Default: True (api is not used)
    "instruction_keys": ["qa-01"], # Determines which instructions are used from templates.json, Default: None (All used)
    "cot_trigger_keys": ["kojima-01"], # Determines which cot triggers are used from templates.json, Default: None (All are used)
    "answer_extraction_keys": ["kojima-01"], # Determines which answer extraction prompts are used from templates.json, Default: None (All are used)
    "author" : "", # Name of the person responsible for generation, Default: ""
    "api_service": "openai", # Name of the API called ("openai", "huggingface_hub"), Default: "openai"
    "engine": "text-davinci-002", # Name of the engine used (for "huggingface_hub" use for example "google/flan-t5-xl"), Default: "text-davinci-002"
    "temperature": 0, # Name of the person responsible for generation, Default: 0
    "max_tokens": 128, # Maximum length of output generated by the model, Default: 128
    "api_time_interval": 1.0, # Pause between two api calls in seconds, Default: 1.0
    "warn": False,
}
"""

config={
    "debug": True,
    "instruction_keys": [None],
    "cot_trigger_keys": [
        'kojima-01'
    ],
    "answer_extraction_keys": [
        'kojima-01'
    ],
    "author" : "simon",
    "api_service": "openai",
    "engine": "text-davinci-002",
    "temperature": 0,
    "max_tokens": 512,
    "api_time_interval": 1.0,
    "verbose": False
}

In [9]:
# Generating chains of thought and answer extractions
worldtree_100_random.generate(name="worldtree", config=config)

n_samples: 100, n_instruction_keys: 1, n_cot_trigger_keys: 1, n_answer_extraction_keys: 1
You are about to call the openai API which produces costs.
Due to your settings you are about to call the openai API in total 200 times.
Number API calls for CoT generation: n_samples * n_instruction_keys * n_cot_trigger_keys
Number API calls for answer extraction: n_samples * n_instruction_keys * n_cot_trigger_keys * n_answer_extraction_keys
Do you want to continue? y/n



In [11]:
import json
config = json.load(open("config.json"))
worldtree_100_random.generate(config=config)

n_samples: 100, n_instruction_keys: 1, n_cot_trigger_keys: 1, n_answer_extraction_keys: 1
You are about to call the openai API which produces costs.
Due to your settings you are about to call the openai API in total 200 times.
Number API calls for CoT generation: n_samples * n_instruction_keys * n_cot_trigger_keys
Number API calls for answer extraction: n_samples * n_instruction_keys * n_cot_trigger_keys * n_answer_extraction_keys
Do you want to continue? y/n



KeyError: 'None'

#### Question, choices and right answer

In [79]:
pprint(worldtree_100_random["worldtree"]["train"][0]["question"])
pprint(worldtree_100_random["worldtree"]["train"][0]["choices"])
pprint(worldtree_100_random["worldtree"]["train"][0]["answer"])

('A parent and a child share several characteristics. Both individuals are '
 'tall, have curly hair, are good cooks, and have freckles. Which of these '
 'characteristics is a learned behavior?')
['being tall', 'having curly hair', 'being a good cook', 'having freckles']
['being a good cook']


#### Model generated chain of thought and extracted answer (correct)

In [80]:
pprint(worldtree_100_random["worldtree"]["train"][0]["generated_cot"][0]["cot"])
pprint(worldtree_100_random["worldtree"]["train"][0]["generated_cot"][0]['answers'][0]['answer'])

(' A parent and child share characteristics because they have the same genes. '
 'So, being tall, having curly hair, and having freckles are all '
 'characteristics that are passed down from the parent to the child. However, '
 'being a good cook is a learned behavior and is not passed down from the '
 'parent to the child.')
' C) being a good cook.'


### Save generated CoTs and extracted answers

In [40]:
# Saving into collection file, appending model output
worldtree_100_random.dump("worldtree_100_model.json")