## Data

In [44]:
"""Example dataset with one sample for two datasets"""

from cot import Collection
coll = Collection.load_thoughtsource_100(names=['worldtree'],load_pregenerated_cots=True) #random_sample=False?
coll = coll.select(split="all", number_samples=1)
coll.select_generated_cots(cot_trigger = "kojima-01", api_service='cohere') #have one


In [10]:
coll['worldtree']['test'][0]

{'id': 'worldtree_test_906',
 'ref_id': '',
 'question': 'Gravity on Earth is caused by',
 'type': 'multiplechoice',
 'choices': ['the mass of Earth.',
  'the rotation of Earth.',
  'the revolution of Earth.',
  'weather patterns on Earth.'],
 'context': '',
 'cot': ['The mass of a planet causes the pull of gravity on that planet.',
  'Earth is a kind of planet.'],
 'answer': ['the mass of Earth.'],
 'generated_cot': [{'id': '301fe39a-cc43-4771-9694-eda6bddfc1c2',
   'fragments_version': '0.01',
   'instruction': None,
   'cot_trigger': 'kojima-01',
   'cot_trigger_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}',
   'prompt_text': '',
   'cot': "\n1. The mass of Earth causes a gravitational attraction between Earth and you.\n2. The rotation of Earth causes centrifugal force, which is a pseudo-force that appears to be a reaction force in a rotating reference frame.\n3. The revolution of Earth around the Sun causes Earth to be pulled by the Sun's gravity.\n4. 

## Generate and extract

In [11]:
from langchain.llms import OpenAI 
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain
import json
from langchain.chat_models import ChatOpenAI


In [12]:
"""CoT Chain"""

llm = ChatOpenAI(temperature=.0,model_name="gpt-3.5-turbo") #ADA #for chat: gpt-3.5-turbo




template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

{cot_trigger}
"""

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger"], template=template)
cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="cot")

In [13]:
"""answer extraction chain"""

extraction_template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

Cot: {cot_trigger}{cot}
{answer_extraction}
"""

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction"], template=extraction_template)
answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="predicted_answer")


In [14]:
"""CoT-Ans_extraction chain"""

from langchain.chains import SequentialChain
overall_chain = SequentialChain(chains=[cot_chain, answer_chain],
                                input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction"],
                                output_variables=["cot", "predicted_answer"],
                                verbose=True)

In [18]:
"""Generate CoT with use of TS-schema"""
#compare with config used before; what about max tokens?
#config contains what the chain needs
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is",
    'model':"gpt-3.5-turbo",
    'temperature': 0,
    'max_tokens': 800 
}
coll.generate_extract_flexible(chain=overall_chain,input_dict=input_dict)




Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [29]:
coll['worldtree']['test'][0]['generated_cot'][27]

{'id': '2763c9a3-0b71-45ce-b5e3-33c9334559c0',
 'fragments_version': '0.01',
 'instruction': 'Be faithful and a little hopeful',
 'cot_trigger': "Answer: Let's think step by step.",
 'cot_trigger_template': '',
 'prompt_text': '',
 'cot': 'A) Is the mass of Earth a factor in causing gravity on Earth? Yes, it is. The more massive an object, the stronger its gravitational pull. So, option A is a possible answer.\n\nB) Is the rotation of Earth a factor in causing gravity on Earth? No, it is not. The rotation of Earth affects other phenomena, such as day and night, but not gravity. So, option B is not a correct answer.\n\nC) Is the revolution of Earth a factor in causing gravity on Earth? No, it is not. The revolution of Earth around the Sun affects the seasons, but not gravity. So, option C is not a correct answer.\n\nD) Are weather patterns on Earth a factor in causing gravity on Earth? No, they are not. Weather patterns are caused by atmospheric conditions, but they do not affect gravit

## Generate or extract

In [2]:
#from langchain.llms.openai import OpenAIChat

from langchain.llms import OpenAI 
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain
from cot import Collection
import json

In [36]:
"""Extract script: Assumes there are CoTs in the dataset already"""

input_dict = {
    'instruction': None,
    "answer_extraction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    'api_service': "chat_openai",
    'model':"gpt-3.5-turbo",
    'temperature': 0,
    'max_tokens': 800 
}
coll.extract_flexible(chain=answer_chain,input_dict=input_dict)

Generating worldtree...


In [None]:
coll['strategy_qa']['train'][0]['generated_cot']

In [38]:
coll['worldtree']['test'][0]['generated_cot'][1]

{'id': '6b3d0b3b-bc87-4ecb-a271-ee7d181cc2a2',
 'fragments_version': '0.01',
 'instruction': None,
 'cot_trigger': "Answer: Let's think step by step.",
 'cot_trigger_template': '',
 'prompt_text': '',
 'cot': "\n1. The mass of Earth causes a gravitational attraction between Earth and you.\n2. The rotation of Earth causes centrifugal force, which is a pseudo-force that appears to be a reaction force in a rotating reference frame.\n3. The revolution of Earth around the Sun causes Earth to be pulled by the Sun's gravity.\n4. Weather patterns on Earth do not cause gravity.\nThe correct answer is A) the mass of Earth.",
 'answers': [{'id': '65a672ce-5b05-4c74-aaac-5a05051c66a2',
   'answer_extraction': 'Be faithful and a little hopeful',
   'answer_extraction_template': '',
   'answer_extraction_text': '',
   'answer': 'as you approach each question, and use logical reasoning to eliminate incorrect answer choices.',
   'correct_answer': None}],
 'author': '',
 'date': '2023/04/12 11:06:02',

In [40]:
"""Generate only"""
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    'api_service': "chat_openai",
    'model': "gpt-3.5-turbo",
    "answer_extraction": "Therefore, among A through D, the answer is",
    'temperature': 0,
    'max_tokens': 800  
}

coll.generate_flexible(chain=cot_chain,input_dict=input_dict)

Generating worldtree...


In [None]:
coll['worldtree']['test'][0]['generated_cot'][2]

## Reflection

In [42]:
llm = ChatOpenAI(temperature=.0,model_name="gpt-3.5-turbo")

reflect_template = """
    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}
    Answer: {answer}
    
    {reflection_prompt}
    """
prompt_template = PromptTemplate(input_variables=["question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt'], template=reflect_template)
reflect_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection")

extraction_template = """{instruction}

    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}{answer}
    {reflection_prompt}{reflection}

    {reflect_answer_extraction}
    """
    #Get reflection
prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt','reflection','reflect_answer_extraction'], template=extraction_template)
reflect_answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection_answer")

    # This is the overall chain where we run these two chains in sequence.
from langchain.chains import SequentialChain
reflect_overall_chain = SequentialChain(chains=[reflect_chain, reflect_answer_chain],input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction",'cot','answer','reflection_prompt','reflect_answer_extraction'],
        output_variables=["reflection", "reflection_answer"],
        verbose=True)




In [45]:
#check for what is already in item
input_dict = {
    'cot_trigger':"", 
    'answer':"", 
    'answer_extraction': "", 
    'cot': "", 
    'instruction': "",
    'api_service': "chat_openai", 
    'model': "gpt-3.5-turbo",
    'reflection_prompt':"Double check this",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?',
    'temperature': 0,
    'max_tokens': 800 

}
coll.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)

Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [46]:
coll['worldtree']['test'][0]['generated_cot'][1]

{'id': 'ddbcca69-1ded-4593-a80f-03f3644190db',
 'fragments_version': '0.01',
 'instruction': '',
 'cot_trigger': 'Double check this',
 'cot_trigger_template': '',
 'prompt_text': '',
 'cot': 'Confirmed, the correct answer is A) the mass of Earth.',
 'answers': [{'id': 'ea0f47f5-c8f8-41ee-ada0-f59618e8748f',
   'answer_extraction': 'Based on the reflection, what is the definite answer?',
   'answer_extraction_template': '',
   'answer_extraction_text': 'self_reflection',
   'answer': 'The definite answer is A) the mass of Earth.',
   'correct_answer': None}],
 'author': '',
 'date': '2023/04/12 11:09:03',
 'api_service': 'chat_openai',
 'model': "{'name': 'gpt-3.5-turbo', 'temperature': 0, 'max_tokens': 800}",
 'comment': 'self_reflection cot',
 'annotations': []}

## Experiments

In [7]:
from cot import Collection
col = Collection.load_thoughtsource_100(load_pregenerated_cots=True)
col.select_generated_cots(author='thoughtsource', cot_trigger='kojima-01')
dataset = col['worldtree']


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/robertpraas/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ex/s]

In [8]:
worldtree = Collection(["worldtree"], verbose=False)
worldtree_1 = worldtree.select(split="train", number_samples=1, random_samples=True, seed=0)

Loading worldtree...


In [55]:
worldtree_1['worldtree']['train'][0]

{'id': '1577',
 'ref_id': '',
 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?',
 'type': 'multiplechoice',
 'choices': ['being tall',
  'having curly hair',
  'being a good cook',
  'having freckles'],
 'context': '',
 'cot': ['Skills are learned characteristics.',
  'A behavior is a kind of characteristic.',
  'Cooking is a kind of skill for preparing food.'],
 'answer': ['being a good cook'],
 'generated_cot': [],
 'feedback': []}

Reflect

Generate and extract

In [9]:
input_dict = {
    "instruction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is" 
}

In [10]:
generate_test = worldtree_1.generate_extract_flexibly(chain=overall_chain,input_dict=input_dict)

Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '2db4a9e4-301c-4197-8796-b40931fd7d6f', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n

In [14]:
#Force langchain into TS structure 
worldtree_new = {'worldtree':{'train':generate_test}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_new, outfile)
collect = Collection.from_json('sample.json')

collect['worldtree']['train']['generated_cot']

[[{'id': '2db4a9e4-301c-4197-8796-b40931fd7d6f',
   'fragments_version': '0.01',
   'instruction': 'Be faithful and a little hopeful',
   'cot_trigger': "Answer: Let's think step by step.",
   'cot_trigger_template': '',
   'prompt_text': '',
   'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n\n6. F) is a learned behavior.\n\n7. G) is a learned behavior.\n\n8. H) is a learned behavior.\n\n9. I) is a learned behavior.\n\n10. J) is a learned behavior.\n\n11. K) is a learned behavior.\n\n12. L) is a learned behavior.\n\n13. M) is a learned behavior.\n\n14. N) is a learned behavior.\n\n15. O) is a learned behavior.\n\n16. P) is a learned behavior.\n\n17. Q) is a learned behavior.\n\n18. R) is a learned behavior.\n\n19. S) is a learned behavior.\n\n20. T) is a learned behavior.\n\n21. U) is a learned behavior.\n\n22. V) is a learned behavior.\n\n23. W) is a learned behavio

Generate

In [17]:
generate_only = worldtree_1.generate_flexible(chain=cot_chain,input_dict=input_dict)
generate_only

Generating worldtree...
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '7d977578-5359-49d9-bd26-c373911bf3c9', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n\n6. F) is a 

[{'id': '1577',
  'ref_id': '',
  'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?',
  'type': 'multiplechoice',
  'choices': ['being tall',
   'having curly hair',
   'being a good cook',
   'having freckles'],
  'context': '',
  'cot': ['Skills are learned characteristics.',
   'A behavior is a kind of characteristic.',
   'Cooking is a kind of skill for preparing food.'],
  'answer': ['being a good cook'],
  'generated_cot': [{'id': '7d977578-5359-49d9-bd26-c373911bf3c9',
    'fragments_version': '0.01',
    'instruction': 'Be faithful and a little hopeful',
    'cot_trigger': "Answer: Let's think step by step.",
    'cot_trigger_template': '',
    'prompt_text': '',
    'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n

Extract

In [18]:
#Force langchain into TS structure -- from geneate
worldtree_new = {'worldtree':{'train':generate_only}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_new, outfile)
collect = Collection.from_json('sample.json')

In [23]:
extract = collect.extract_flexible(chain=answer_chain,input_dict=input_dict)

Generating worldtree...
processed_example:
{'id': '1577', 'ref_id': '', 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?', 'type': 'multiplechoice', 'choices': ['being tall', 'having curly hair', 'being a good cook', 'having freckles'], 'context': '', 'cot': ['Skills are learned characteristics.', 'A behavior is a kind of characteristic.', 'Cooking is a kind of skill for preparing food.'], 'answer': ['being a good cook'], 'generated_cot': [{'id': '7d977578-5359-49d9-bd26-c373911bf3c9', 'fragments_version': '0.01', 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': '\n1. A) is a learned behavior.\n\n2. B) is a learned behavior.\n\n3. C) is a learned behavior.\n\n4. D) is a learned behavior.\n\n5. E) is a learned behavior.\n\n6. F) is a 

In [33]:
coll = Collection.load_thoughtsource_100(names='worldtree',load_pregenerated_cots=True)
coll = coll.select(split="all", number_samples=1)
coll.select_generated_cots(author='thoughtsource',cot_trigger = None)