## Generate and extract

In [9]:
from langchain.llms import OpenAI 
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain
from cot import Collection
import json
from langchain.chat_models import ChatOpenAI


In [10]:
"""CoT Chain"""

llm = ChatOpenAI(temperature=.0,model_name="gpt-3.5-turbo") #ADA #for chat: gpt-3.5-turbo




template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

{cot_trigger}
"""

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger"], template=template)
cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="cot")

In [11]:
"""answer extraction chain"""

extraction_template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

Cot: {cot_trigger}{cot}
{answer_extraction}
"""

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction"], template=extraction_template)
answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="predicted_answer")


In [12]:
"""CoT-Ans_extraction chain"""

from langchain.chains import SequentialChain
overall_chain = SequentialChain(chains=[cot_chain, answer_chain],
                                input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction"],
                                output_variables=["cot", "predicted_answer"],
                                verbose=True)

In [13]:
"""Generate CoT with use of TS-schema"""
#compare with config used before; what about max tokens?
#config contains what the chain needs
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is" 
}

worldtree = Collection(["worldtree"], verbose=False)
worldtree_1 = worldtree.select(split="train", number_samples=1, random_samples=True, seed=0)



Loading worldtree...


In [14]:
test = worldtree_1.generate_extract_flexibly(chain=overall_chain,input_dict=input_dict)
#test

Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '21f570c1-b403-41a6-a792-e2f56bc80007', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n\nA) Being tall: This characteristic is mostly determined by genetics and is not a learned beh

In [15]:
#Force langchain into TS structure 
worldtree_new = {'worldtree':{'train':test}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_new, outfile)
collect = Collection.from_json('sample.json')

collect

| Name      |   Train | Valid   | Test   |
|-----------|---------|---------|--------|
| worldtree |       1 | -       | -      |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

## Generate or extract

In [17]:
#from langchain.llms.openai import OpenAIChat

from langchain.llms import OpenAI 
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain
from cot import Collection
import json

In [23]:
"""Extract script: Assumes there are CoTs in the dataset already"""

input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is" 
}
cot_dataset = Collection.from_json("worldtree_10.json") #input dataset
cot_dataset = cot_dataset.select(split="train", number_samples=1, random_samples=True, seed=0)


extract = cot_dataset.extract_flexible(chain=answer_chain,input_dict=input_dict)

Generating worldtree...
processed_example:
{'id': '1722', 'ref_id': '', 'question': 'Sharpening a pencil and tearing paper are examples of physical changes. Which statement describes why these are physical changes?', 'type': 'multiplechoice', 'choices': ['There is a change in how the objects are used.', 'There is a change in the appearance of the objects.', 'There is a change in the materials from which the objects are made.', 'There is a change in both the appearance of the objects and the materials from which they are made.'], 'context': '', 'cot': ['Shape is a property of the appearance of an object.', 'If something undergoes physical change then the chemical properties of that something will remain unchanged.', 'Material composition is a kind of chemical property.', 'Changed is the opposite of unchanged.', 'Composed of means made of.', 'If something undergoes a physical change then the physical properties of that something will change.', 'Appearance is a kind of physical property.'

In [29]:
extract[0]['generated_cot'][3]

{'id': '8038f951-c775-488b-94b2-b9380cb4bb48',
 'fragments_version': '0.01',
 'instruction': 'Be faithful and a little hopeful',
 'cot_trigger': "Answer: Let's think step by step.",
 'cot_trigger_template': '',
 'prompt_text': '',
 'cot': ' When you sharpen a pencil, the appearance of the pencil changes, but the materials from which it is made remain the same. When you tear paper, the appearance of the paper changes, but the materials from which it is made remain the same. Therefore, the correct answer is B) There is a change in the appearance of the objects.',
 'answers': [],
 'author': '',
 'date': '2023/03/13 11:59:17',
 'api_service': '',
 'model': "{'name': '', 'temperature': 0, 'max_tokens': 800}",
 'comment': 'self_reflection cot',
 'annotations': []}

In [30]:
"""Generate only"""
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is" 
}
worldtree = Collection(["worldtree"], verbose=False)
worldtree_1 = worldtree.select(split="train", number_samples=1, random_samples=True, seed=0)

generate_only = worldtree_1.generate_flexible(chain=cot_chain,input_dict=input_dict)

Loading worldtree...
Generating worldtree...
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '793240c3-b1fd-4777-98c2-9cae8cb98ad6', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned beh

In [31]:
generate_only

[{'id': '1577',
  'ref_id': '',
  'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?',
  'type': 'multiplechoice',
  'choices': ['being tall',
   'having curly hair',
   'being a good cook',
   'having freckles'],
  'context': '',
  'cot': ['Skills are learned characteristics.',
   'A behavior is a kind of characteristic.',
   'Cooking is a kind of skill for preparing food.'],
  'answer': ['being a good cook'],
  'generated_cot': [{'id': '793240c3-b1fd-4777-98c2-9cae8cb98ad6',
    'fragments_version': '0.01',
    'instruction': 'Be faithful and a little hopeful',
    'cot_trigger': "Answer: Let's think step by step.",
    'cot_trigger_template': '',
    'prompt_text': '',
    'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n

## Reflection

In [47]:
cot_dataset = Collection.from_json("worldtree_10.json") #input dataset
worldtree_1 = cot_dataset.select(split="train", number_samples=1, random_samples=True, seed=0)


In [24]:
llm = ChatOpenAI(temperature=.0,model_name="gpt-3.5-turbo")

reflect_template = """
    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}
    Answer: {answer}
    
    {reflection_prompt}
    """
prompt_template = PromptTemplate(input_variables=["question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt'], template=reflect_template)
reflect_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection")

extraction_template = """{instruction}

    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}{answer}
    {reflection_prompt}{reflection}

    {reflect_answer_extraction}
    """
    #Get reflection
prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt','reflection','reflect_answer_extraction'], template=extraction_template)
reflect_answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection_answer")

    # This is the overall chain where we run these two chains in sequence.
from langchain.chains import SequentialChain
reflect_overall_chain = SequentialChain(chains=[reflect_chain, reflect_answer_chain],input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction",'cot','answer','reflection_prompt','reflect_answer_extraction'],
        output_variables=["reflection", "reflection_answer"],
        verbose=True)




In [49]:
#check for what is already in item
input_dict = {
    'cot_trigger':"", 
    'answer':"", 
    'answer_extraction': "", 
    'cot': "", 
    'instruction': "",
    'reflection_prompt':"Double check this",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?'
}
metareason= worldtree_1.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)

Generating worldtree...
[{'id': '8a1b6bdc-4c0a-4a93-be8e-46ff33b8aa31', 'fragments_version': '0.01', 'instruction': None, 'cot_trigger': 'kojima-01', 'cot_trigger_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}', 'prompt_text': '', 'cot': ' When you sharpen a pencil, the appearance of the pencil changes, but the materials from which it is made remain the same. When you tear paper, the appearance of the paper changes, but the materials from which it is made remain the same. Therefore, the correct answer is B) There is a change in the appearance of the objects.', 'answers': [{'id': '1a4ad0a8-54f8-4632-8bea-55946b4f33dd', 'answer_extraction': 'kojima-A-D', 'answer_extraction_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}{cot}\n{answer_extraction}', 'answer_extraction_text': '', 'answer': ' B.', 'correct_answer': True}], 'author': 'your_name', 'date': '2023/01/27 18:22:27', 'api_service': 'openai', 'model': "{'name': 'text-davinci-003'

In [55]:
metareason[0]['generated_cot'][0]['context']

' \n                                                                                                                                                                                                                                                              '

## Experiments

In [7]:
from cot import Collection
col = Collection.load_thoughtsource_100(load_pregenerated_cots=True)
col.select_generated_cots(author='thoughtsource', cot_trigger='kojima-01')
dataset = col['worldtree']


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/robertpraas/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

In [8]:
worldtree = Collection(["worldtree"], verbose=False)
worldtree_1 = worldtree.select(split="train", number_samples=1, random_samples=True, seed=0)

Loading worldtree...


In [55]:
worldtree_1['worldtree']['train'][0]

{'id': '1577',
 'ref_id': '',
 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?',
 'type': 'multiplechoice',
 'choices': ['being tall',
  'having curly hair',
  'being a good cook',
  'having freckles'],
 'context': '',
 'cot': ['Skills are learned characteristics.',
  'A behavior is a kind of characteristic.',
  'Cooking is a kind of skill for preparing food.'],
 'answer': ['being a good cook'],
 'generated_cot': [],
 'feedback': []}

Reflect

Generate and extract

In [9]:
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is" 
}

In [10]:
generate_test = worldtree_1.generate_extract_flexibly(chain=overall_chain,input_dict=input_dict)

Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '2db4a9e4-301c-4197-8796-b40931fd7d6f', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n

In [14]:
#Force langchain into TS structure 
worldtree_new = {'worldtree':{'train':generate_test}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_new, outfile)
collect = Collection.from_json('sample.json')

collect['worldtree']['train']['generated_cot']

[[{'id': '2db4a9e4-301c-4197-8796-b40931fd7d6f',
   'fragments_version': '0.01',
   'instruction': 'Be faithful and a little hopeful',
   'cot_trigger': "Answer: Let's think step by step.",
   'cot_trigger_template': '',
   'prompt_text': '',
   'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n\n6. F) is a learned behavior.\n\n7. G) is a learned behavior.\n\n8. H) is a learned behavior.\n\n9. I) is a learned behavior.\n\n10. J) is a learned behavior.\n\n11. K) is a learned behavior.\n\n12. L) is a learned behavior.\n\n13. M) is a learned behavior.\n\n14. N) is a learned behavior.\n\n15. O) is a learned behavior.\n\n16. P) is a learned behavior.\n\n17. Q) is a learned behavior.\n\n18. R) is a learned behavior.\n\n19. S) is a learned behavior.\n\n20. T) is a learned behavior.\n\n21. U) is a learned behavior.\n\n22. V) is a learned behavior.\n\n23. W) is a learned behavio

Generate

In [17]:
generate_only = worldtree_1.generate_flexible(chain=cot_chain,input_dict=input_dict)
generate_only

Generating worldtree...
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '7d977578-5359-49d9-bd26-c373911bf3c9', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n\n6. F) is a 

[{'id': '1577',
  'ref_id': '',
  'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?',
  'type': 'multiplechoice',
  'choices': ['being tall',
   'having curly hair',
   'being a good cook',
   'having freckles'],
  'context': '',
  'cot': ['Skills are learned characteristics.',
   'A behavior is a kind of characteristic.',
   'Cooking is a kind of skill for preparing food.'],
  'answer': ['being a good cook'],
  'generated_cot': [{'id': '7d977578-5359-49d9-bd26-c373911bf3c9',
    'fragments_version': '0.01',
    'instruction': 'Be faithful and a little hopeful',
    'cot_trigger': "Answer: Let's think step by step.",
    'cot_trigger_template': '',
    'prompt_text': '',
    'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n

Extract

In [18]:
#Force langchain into TS structure -- from geneate
worldtree_new = {'worldtree':{'train':generate_only}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_new, outfile)
collect = Collection.from_json('sample.json')

In [23]:
extract = collect.extract_flexible(chain=answer_chain,input_dict=input_dict)

Generating worldtree...
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '7d977578-5359-49d9-bd26-c373911bf3c9', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n\n6. F) is a 

In [33]:
coll = Collection.load_thoughtsource_100(names='worldtree',load_pregenerated_cots=True)
coll = coll.select(split="all", number_samples=1)
coll.select_generated_cots(author='thoughtsource',cot_trigger = None)