## Generate and extract

In [17]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain
from cot import Collection
import json

In [18]:
"""CoT Chain"""

llm = OpenAI(temperature=.0,model_name="ada") #for chat: gpt-3.5-turbo


template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

{cot_trigger}
"""

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger"], template=template)
cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="cot")

In [26]:
"""answer extraction chain"""

extraction_template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

Cot: {cot_trigger}{cot}
{answer_extraction}
"""

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction"], template=extraction_template)
answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="predicted_answer")


In [27]:
"""CoT-Ans_extraction chain"""

from langchain.chains import SequentialChain
overall_chain = SequentialChain(chains=[cot_chain, answer_chain],
                                input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction"],
                                output_variables=["cot", "predicted_answer"],
                                verbose=True)

In [21]:
"""Generate CoT with use of TS-schema"""
#compare with config used before; what about max tokens?
#config contains what the chain needs
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is" 
}

worldtree = Collection(["worldtree"], verbose=False)
worldtree_1 = worldtree.select(split="train", number_samples=1, random_samples=True, seed=0)



Loading worldtree...


In [28]:
test = worldtree_1.generate_extract_flexibly(chain=overall_chain,input_dict=input_dict)
#test

Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '0f5c6874-5187-4e5d-b645-16a941f2e40f', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n

In [29]:
#worldtree_1['worldtree']['train'][0]
worldtree_new = {'worldtree':{'train':test}}

In [30]:
#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_new, outfile)
collect = Collection.from_json('sample.json')

In [31]:
collect

| Name      |   Train | Valid   | Test   |
|-----------|---------|---------|--------|
| worldtree |       1 | -       | -      |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

## Generate or extract

In [None]:
"""Generate CoT with use of TS-schema"""
#compare with config used before; what about max tokens?
#config contains what the chain needs
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is" 
}
cot_dataset = Collection.from_json("worldtree_10.json") #input dataset
cot_dataset = cot_dataset.select(split="train", number_samples=1, random_samples=True, seed=0)


extract = cot_dataset.extract_flexible(chain=answer_chain,input_dict=input_dict)

In [34]:
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is" 
}
worldtree = Collection(["worldtree"], verbose=False)
worldtree_1 = worldtree.select(split="train", number_samples=1, random_samples=True, seed=0)

generate_only = extract = worldtree_1.generate_flexible(chain=cot_chain,input_dict=input_dict)

Loading worldtree...
Generating worldtree...
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '7622c1a5-c03b-444f-9847-ef1a6fe3a7d1', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned beh

## Reflection

In [44]:
reflect_template = """
    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}
    Answer: {answer}
    
    {reflection_prompt}
    """
prompt_template = PromptTemplate(input_variables=["question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt'], template=reflect_template)
cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection")

extraction_template = """{instruction}

    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}{answer}
    {reflection_prompt}
    
    {reflect_answer_extraction}
    """
    #Get reflection
prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt','reflect_answer_extraction'], template=extraction_template)
answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection_answer")

    # This is the overall chain where we run these two chains in sequence.
from langchain.chains import SequentialChain
reflect_overall_chain = SequentialChain(chains=[cot_chain, answer_chain],input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction",'cot','answer','reflection_prompt','reflect_answer_extraction'],
        output_variables=["reflection", "reflection_answer"],
        verbose=True)




In [46]:
#check for what is already in item
input_dict = {
    'cot_trigger':"", 
    'answer':"", 
    'answer_extraction': "", 
    'cot': "", 
    'instruction': "",
    'reflection_prompt':"Double check this",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?'
}
metareason= worldtree_1.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)

Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3369, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/wq/slcg80w1143270jpznm3k9hm0000gn/T/ipykernel_2435/4289028613.py", line 11, in <cell line: 11>
    metareason= worldtree_1.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)
  File "/Users/robertpraas/Desktop/ThoughtSource/libs/cot/cot/dataloader.py", line 370, in metareason_flexible
    return self_reason(self[name], chain, input_dict)
  File "/Users/robertpraas/Desktop/ThoughtSource/libs/cot/cot/new_generate.py", line 212, in self_reason
    processed_example = _self_reason(example,input_dict,chain)
  File "/Users/robertpraas/Desktop/ThoughtSource/libs/cot/cot/new_generate.py", line 236, in _self_reason
    answer["answer"] = lang_chain['predicted_answer']
KeyError: 'predicted_answer'

During handling of the above exception, another exception occurr

## ...

In [None]:
supervision = "Double check this idea, are the reasoning and answer sound yes/no?"


In [1]:
# Example data:
instruction = "Answer the following question through step-by-step reasoning."
question = "Animals may fight, make threatening sounds, and act aggressively toward members of the same species. These behaviors usually occur as the result of",
answer_choices = [
                    "competition",
                    "conservation",
                    "decomposition",
                    "pollution"
                ]
cot_trigger = "Answer: Let's think step by step."
answer_extraction = "Therefore, the answer is"

## Function Creation

In [2]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain

In [3]:
import os
from cot import Collection
from cot.generate import FRAGMENTS
from rich.pretty import pprint
import json

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain

"""From collection to changed collection"""
def cot_to_context(example):
    example['context'] = example['generated_cot'][0]['cot']
    example ['generated_cot'] = list()
    return example

def main(data_dict,instruction,answer_extraction,cot_trigger):
    var_dataset = {"instruction":instruction,"answer_extraction":answer_extraction,"cot_trigger":cot_trigger} # "cot_trigger":cot_trigger,
    cot_in_context = data_dict.map(cot_to_context)
    final = cot_in_context.map(generate_cot,fn_kwargs = var_dataset) #
    return final

def generate_cot(item,instruction,answer_extraction,cot_trigger):

    llm = OpenAI(temperature=.0)
    template = """{instruction}

    Question: {question}
    Answer_choices: {answer_choices}

    {cot_trigger}
    """
    prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger"], template=template)
    cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="cot")

    """This chain tries answer extraction and supervision at once"""
    extraction_template = """{instruction}

    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}
    """
    prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction"], template=extraction_template)
    answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="answer")

    # This is the overall chain where we run these two chains in sequence.
    from langchain.chains import SequentialChain
    overall_chain = SequentialChain(chains=[cot_chain, answer_chain],input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction"],
    # overall_chain = SequentialChain(chains=[cot_chain, supervision_chain],input_variables=["instruction","question","answer_choices","cot_trigger",'cot',"answer_extraction","supervision"],
        # Here we return multiple variables
        output_variables=["cot", "answer"],
        verbose=True)
    answer_cot = overall_chain({"instruction":instruction,"question":item['question'],"answer_choices":item['choices'],"cot_trigger":cot_trigger,"answer_extraction":answer_extraction})
    # item['generated_cot']['cot'] = answer_cot['cot']
    # item['generated_cot']['answers']['answer'] = answer_cot['answer']
    # generated_cot["cot"] = cot
    # generated_cot["date"] = print_now(1)
    # answer["answer"] = predicted_answer
    # generated_cot["answers"].append(answer)
    # item["generated_cot"].append(generated_cot)

    return answer_cot

In [None]:
#reflection template has new instruction


In [None]:
def generate_reflection(item, cot_chain, reflection_prompt,reflect_answer_extraction):
    llm = OpenAI(temperature=.0)
    reflect_template = """
    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}
    Answer: {answer}
    
    {reflection_prompt}
    """
    prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt'], template=reflect_template)
    cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection")

In [4]:

cot_dataset = Collection.from_json("worldtree_10.json") #input dataset
cot_dataset = cot_dataset.select(split="train", number_samples=1, random_samples=True, seed=0) #input # samples,seed
cot_dataset = cot_dataset['worldtree']



In [8]:
cot_dataset['train'][0]

{'id': '1722',
 'ref_id': '',
 'question': 'Sharpening a pencil and tearing paper are examples of physical changes. Which statement describes why these are physical changes?',
 'type': 'multiplechoice',
 'choices': ['There is a change in how the objects are used.',
  'There is a change in the appearance of the objects.',
  'There is a change in the materials from which the objects are made.',
  'There is a change in both the appearance of the objects and the materials from which they are made.'],
 'context': '',
 'cot': ['Shape is a property of the appearance of an object.',
  'If something undergoes physical change then the chemical properties of that something will remain unchanged.',
  'Material composition is a kind of chemical property.',
  'Changed is the opposite of unchanged.',
  'Composed of means made of.',
  'If something undergoes a physical change then the physical properties of that something will change.',
  'Appearance is a kind of physical property.',
  'Sharpening an 

In [9]:

llm = OpenAI(temperature=.0,model_name="gpt-3.5-turbo")


In [None]:
#instruction = "Answer the following question through step-by-step reasoning."
#test = generate_cot(cot_dataset['train'],instruction,answer_extraction,cot_trigger)
llm = OpenAI(temperature=.0)
reflect_template = """
Question: {question}
Answer_choices: {answer_choices}

Cot: {cot_trigger}{cot}
{answer_extraction}
Answer: {answer}

{reflection_prompt}
"""
prompt_template = PromptTemplate(input_variables=["question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt'], template=reflect_template)
cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection")
cot_chain.run()




cot_trigger = "Answer: Let's think step by step."
answer_extraction = "Therefore, the answer is"
reflection_prompt = 'Do you think the Answer is really the correct answer?' #try with {answer}



gen_test = generate_reflection(cot_dataset['train'],cot_chain,reflection_prompt, reflect_answer_extraction)

item, cot_chain, reflection_prompt,reflect_answer_extraction

In [None]:

    """This chain tries answer extraction and supervision at once"""
    extraction_template = """{instruction}

    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}{answer}
    {reflection_prompt}
    
    {reflect_answer_extraction}
    """
    #Get reflection
    prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt','reflect_answer_extraction'], template=extraction_template)
    answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection_answer")

    # This is the overall chain where we run these two chains in sequence.
    from langchain.chains import SequentialChain
    overall_chain = SequentialChain(chains=[cot_chain, answer_chain],input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction",'cot','answer'],

        output_variables=["reflection", "reflection_answer"],
        verbose=True)
    answer_cot = overall_chain({"instruction":instruction,"question":item['question'],"answer_choices":item['choices'],"cot_trigger":cot_trigger,"answer_extraction":answer_extraction})


    return answer_cot

In [58]:
cot_dataset = Collection.from_json("worldtree_10.json") #input dataset
cot_dataset = cot_dataset.select(split="train", number_samples=1, random_samples=True, seed=0) #input # samples,seed
cot_dataset = cot_dataset['worldtree']

instruction = "Answer the following question through step-by-step reasoning."
cot_trigger = "Answer: Let's think step by step."
answer_extraction = "Therefore, the answer is"

test = generate_cot(cot_dataset['train'],instruction,answer_extraction,cot_trigger)



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [59]:
test
test['cot']
test['answer']

{'instruction': 'Answer the following question through step-by-step reasoning.',
 'question': ['Sharpening a pencil and tearing paper are examples of physical changes. Which statement describes why these are physical changes?'],
 'answer_choices': [['There is a change in how the objects are used.',
   'There is a change in the appearance of the objects.',
   'There is a change in the materials from which the objects are made.',
   'There is a change in both the appearance of the objects and the materials from which they are made.']],
 'cot_trigger': "Answer: Let's think step by step.",
 'answer_extraction': 'Therefore, the answer is',
 'cot': '\n    Step 1: Physical changes involve a change in the physical properties of an object.\n    \n    Step 2: Sharpening a pencil involves a change in the shape of the pencil, while tearing paper involves a change in the shape of the paper.\n    \n    Step 3: Therefore, these are physical changes because there is a change in the appearance of the o

In [44]:
cot_dataset['train'][0]['generated_cot']

[{'id': '8a1b6bdc-4c0a-4a93-be8e-46ff33b8aa31',
  'fragments_version': '0.01',
  'instruction': None,
  'cot_trigger': 'kojima-01',
  'cot_trigger_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}',
  'prompt_text': '',
  'cot': ' When you sharpen a pencil, the appearance of the pencil changes, but the materials from which it is made remain the same. When you tear paper, the appearance of the paper changes, but the materials from which it is made remain the same. Therefore, the correct answer is B) There is a change in the appearance of the objects.',
  'answers': [{'id': '1a4ad0a8-54f8-4632-8bea-55946b4f33dd',
    'answer_extraction': 'kojima-A-D',
    'answer_extraction_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}{cot}\n{answer_extraction}',
    'answer_extraction_text': '',
    'answer': ' B.',
    'correct_answer': True}],
  'author': 'your_name',
  'date': '2023/01/27 18:22:27',
  'api_service': 'openai',
  'model': "{'name': 

In [None]:
#add to thoughtsource

## Arxiv

In [3]:
"""Upgrades"""
#langchain.__version__ #old 0.0.14
#!pip install --upgrade langchain #langchain-0.0.92
#!pip install -U openai

## Dataset manipulation function

In [1]:
import os
from cot import Collection
from cot.generate import FRAGMENTS
from rich.pretty import pprint
import json

In [31]:
"""From collection to changed collection"""
def cot_to_context(example):
    example['context'] = example['generated_cot'][0]['cot']
    example ['generated_cot'] = list()
    return example

cot_dataset = Collection.from_json("worldtree_10.json") #input dataset
cot_dataset = cot_dataset.select(split="train", number_samples=1, random_samples=True, seed=0)
updated_dataset = cot_dataset['worldtree'].map(cot_to_context) #input dataset name

#force dataset into the right format
dataset = {"train":[updated_dataset['train'][0]]}
dict_dataset = {"worldtree":dataset} 

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(dict_dataset, outfile)
collect = Collection.from_json('sample.json')

  0%|          | 0/1 [00:00<?, ?ex/s]

In [32]:
collect

| Name      |   Train | Valid   | Test   |
|-----------|---------|---------|--------|
| worldtree |       1 | -       | -      |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

In [33]:
collect['worldtree']['train'][0]

{'id': '1722',
 'ref_id': '',
 'question': 'Sharpening a pencil and tearing paper are examples of physical changes. Which statement describes why these are physical changes?',
 'type': 'multiplechoice',
 'choices': ['There is a change in how the objects are used.',
  'There is a change in the appearance of the objects.',
  'There is a change in the materials from which the objects are made.',
  'There is a change in both the appearance of the objects and the materials from which they are made.'],
 'context': ' When you sharpen a pencil, the appearance of the pencil changes, but the materials from which it is made remain the same. When you tear paper, the appearance of the paper changes, but the materials from which it is made remain the same. Therefore, the correct answer is B) There is a change in the appearance of the objects.',
 'cot': ['Shape is a property of the appearance of an object.',
  'If something undergoes physical change then the chemical properties of that something will