## Data

In [None]:
"""Example dataset with one sample for two datasets"""

from cot import Collection
coll = Collection.load_thoughtsource_100(names=['worldtree'],load_pregenerated_cots=True) #random_sample=False?
coll = coll.select(split="all", number_samples=1)
coll.select_generated_cots(cot_trigger = "kojima-01", api_service='cohere') #have one


In [1]:
from cot import Collection

coll = Collection("worldtree", load_pregenerated_cots=True,generate_mode='recache')
coll = coll.select(split="test", number_samples=1)
coll.select_generated_cots(cot_trigger = "kojima-01", api_service='cohere') 

Loading worldtree...
Downloading and preparing dataset worldtree_dataset/thoughtsource to /Users/robertpraas/.cache/huggingface/datasets/worldtree_dataset/thoughtsource/1.0.0/4ec0cd827b41f05891af9a27bf461fecd407e2fe7c1beebfed1eb00193c2cd52...


Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset worldtree_dataset downloaded and prepared to /Users/robertpraas/.cache/huggingface/datasets/worldtree_dataset/thoughtsource/1.0.0/4ec0cd827b41f05891af9a27bf461fecd407e2fe7c1beebfed1eb00193c2cd52. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
coll['worldtree']['test'][0]

{'id': 'worldtree_test_788',
 'ref_id': '',
 'question': 'When light hits a mirror, most of the light is',
 'type': 'multiplechoice',
 'choices': ['refracted.', 'reflected.', 'absorbed.', 'transmitted.'],
 'context': '',
 'cot': ['A mirror reflects light.',
  'Reflection is when a wave bounces off a surface and travels in the opposite direction relative to the angle of incidence.',
  'Light is a kind of wave.',
  'When light hits a reflective object , that light bounces off that object.',
  'A mirror is a kind of reflective object.'],
 'answer': ['reflected.'],
 'generated_cot': [],
 'feedback': []}

## Generate and extract

In [2]:
from langchain.llms import OpenAI 
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain
import json
from langchain.chat_models import ChatOpenAI


In [3]:
"""CoT Chain"""

llm = ChatOpenAI(temperature=.0,model_name="gpt-3.5-turbo") #ADA #for chat: gpt-3.5-turbo




template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

{cot_trigger}
"""

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger"], template=template)
cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="cot")

In [4]:
"""answer extraction chain"""

extraction_template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

Cot: {cot_trigger}{cot}
{answer_extraction}
"""

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction"], template=extraction_template)
answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="predicted_answer")


In [5]:
"""CoT-Ans_extraction chain"""

from langchain.chains import SequentialChain
overall_chain = SequentialChain(chains=[cot_chain, answer_chain],
                                input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction"],
                                output_variables=["cot", "predicted_answer"],
                                verbose=True)

In [6]:
coll

| Name      | Train   | Valid   |   Test |
|-----------|---------|---------|--------|
| worldtree | -       | -       |      1 |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'med_qa_open', 'medmc_qa', '_init_', 'mmlu_clinical_knowledge', 'mmlu_college_biology', 'mmlu_college_medicine', 'mmlu_medical_genetics', 'mmlu_professional_medicine', '_init_', 'mmlu_anatomy', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

In [6]:
"""Generate CoT with use of TS-schema"""
#compare with config used before; what about max tokens?
#config contains what the chain needs
input_dict = {'input_dict':
              {
                  'chain': overall_chain,
                  "instruction": "Be faithful and a little hopeful",
                  "cot_trigger": "Answer: Let's think step by step.",
                  "answer_extraction": "Therefore, among A through D, the answer is",
                  'model': "gpt-3.5-turbo",
                  'temperature': 0,
                  'max_tokens': 800
              }
              }
    
coll.generate_extract_flexible(input_dict=input_dict)




Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
{'id': 'worldtree_test_788', 'ref_id': '', 'question': 'When light hits a mirror, most of the light is', 'type': 'multiplechoice', 'choices': ['refracted.', 'reflected.', 'absorbed.', 'transmitted.'], 'context': '', 'cot': ['A mirror reflects light.', 'Reflection is when a wave bounces off a surface and travels in the opposite direction relative to the angle of incidence.', 'Light is a kind of wave.', 'When light hits a reflective object , that light bounces off that object.', 'A mirror is a kind of reflective object.'], 'answer': ['reflected.'], 'generated_cot': [{'id': '5c3620a5-058a-42db-9f3d-ebee29d1d3ae', 'fragments_version': None, 'instruction': 'Be faithful and a little hopeful', 'cot_trigger': "Answer: Let's think step by step.", 'cot_trigger_template': '', 'prompt_text': '', 'cot': 'When light hits a mirror, it bounces back off the surface of the mirror. This process is called 

In [11]:
second_coll = Collection.to_Collection(coll,"worldtree",'test','file_test')


In [13]:
worldtree_2 = {'worldtree':{'test':coll}}

In [16]:
#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_2, outfile)
collect = Collection.from_json('sample.json')

collect['worldtree']['test']['generated_cot']

[[{'id': '470bcc15-51e6-40da-a19d-d575123a07a1',
   'fragments_version': '0.01',
   'instruction': 'Be faithful and a little hopeful',
   'cot_trigger': "Answer: Let's think step by step.",
   'cot_trigger_template': '',
   'prompt_text': '',
   'cot': 'When light hits a mirror, it bounces back off the surface of the mirror. This process is called reflection. Therefore, the correct answer is B) reflected.',
   'answers': [{'id': 'c755d584-135b-4610-b6b5-680b834142f5',
     'answer_extraction': 'Therefore, among A through D, the answer is',
     'answer_extraction_template': '',
     'answer_extraction_text': '',
     'answer': 'B) reflected.',
     'correct_answer': None}],
   'author': '',
   'date': '2023/05/07 13:11:15',
   'api_service': '',
   'model': "{'name': 'gpt-3.5-turbo', 'temperature': 0, 'max_tokens': 800}",
   'comment': 'generated and extracted',
   'annotations': []}]]

In [18]:
collect['worldtree']

DatasetDict({
    test: Dataset({
        features: ['id', 'ref_id', 'question', 'type', 'choices', 'context', 'cot', 'answer', 'generated_cot', 'feedback'],
        num_rows: 1
    })
})

In [19]:
third_coll = Collection("worldtree", load_pregenerated_cots=True)
third_coll = third_coll.select(split="test", number_samples=1)
third_coll.select_generated_cots(cot_trigger = "kojima-01", api_service='cohere') 

Reusing dataset worldtree_dataset (/Users/robertpraas/.cache/huggingface/datasets/worldtree_dataset/thoughtsource/1.0.0/4ec0cd827b41f05891af9a27bf461fecd407e2fe7c1beebfed1eb00193c2cd52)


Loading worldtree...


  0%|          | 0/3 [00:00<?, ?it/s]

## Generate or extract

In [7]:
from cot import Collection
new_coll = Collection("worldtree", load_pregenerated_cots=True)
new_coll = new_coll.select(split="test", number_samples=1)

new_coll.select_generated_cots(cot_trigger = "kojima-01", api_service='cohere') 


Reusing dataset worldtree_dataset (/Users/robertpraas/.cache/huggingface/datasets/worldtree_dataset/thoughtsource/1.0.0/4ec0cd827b41f05891af9a27bf461fecd407e2fe7c1beebfed1eb00193c2cd52)


Loading worldtree...


  0%|          | 0/3 [00:00<?, ?it/s]

In [15]:
new_coll

| Name      | Train   | Valid   |   Test |
|-----------|---------|---------|--------|
| worldtree | -       | -       |      1 |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'med_qa_open', 'medmc_qa', '_init_', 'mmlu_clinical_knowledge', 'mmlu_college_biology', 'mmlu_college_medicine', 'mmlu_medical_genetics', 'mmlu_professional_medicine', '_init_', 'mmlu_anatomy', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

In [11]:
"""Here use the single langchain"""
input_dict = {
    'input_dict': {
        'chain': cot_chain,
        "instruction": "Be faithful and a little hopeful",
        "cot_trigger": "Answer: Let's think step by step.",
        "answer_extraction": "Therefore, among A through D, the answer is",
        'model': "gpt-3.5-turbo",
        'api_service': 'OpenAI',
        'temperature': 0,
        'max_tokens': 800
    }
} 

new_coll.generate_flexible(input_dict=input_dict,name='worldtree',split='test')
new_coll

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer')).


Generating worldtree...


| Name      | Train   | Valid   |   Test |
|-----------|---------|---------|--------|
| worldtree | -       | -       |      1 |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'med_qa_open', 'medmc_qa', '_init_', 'mmlu_clinical_knowledge', 'mmlu_college_biology', 'mmlu_college_medicine', 'mmlu_medical_genetics', 'mmlu_professional_medicine', '_init_', 'mmlu_anatomy', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

In [28]:
"""Extract script: Assumes there are CoTs in the dataset already"""

input_dict = {
    'input_dict':{
    'chain': answer_chain,
    'instruction': None,
    "answer_extraction": "Be faithful and a little hopeful",
    "cot_trigger": "Answer: Let's think step by step.",
    'api_service': "chat_openai",
    'model':"gpt-3.5-turbo",
    'temperature': 0,
    'max_tokens': 800 
}}
new_coll.extract_flexible(input_dict=input_dict)

Generating worldtree...
[{'id': '2e815d15-e354-4956-8df6-68b38ffdbe38', 'answer_extraction': 'Be faithful and a little hopeful', 'answer_extraction_template': '', 'answer_extraction_text': '', 'answer': 'As an AI language model, I do not have beliefs or emotions, but I will always provide accurate and helpful responses to your questions.', 'answer_from_choices': '', 'correct_answer': None}]
################
[{'id': '2e815d15-e354-4956-8df6-68b38ffdbe38', 'answer_extraction': 'Be faithful and a little hopeful', 'answer_extraction_template': '', 'answer_extraction_text': '', 'answer': 'As an AI language model, I do not have beliefs or emotions, but I will always provide accurate and helpful responses to your questions.', 'answer_from_choices': '', 'correct_answer': None}, {'id': 'c4b30671-576e-4654-9bd5-931e28f3c024', 'answer_extraction': 'Be faithful and a little hopeful', 'answer_extraction_template': '', 'answer_extraction_text': '', 'answer': 'As an AI language model, I do not have b

## Reflection

In [29]:
llm = ChatOpenAI(temperature=.0,model_name="gpt-3.5-turbo")

reflect_template = """
    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}
    Answer: {answer}
    
    {reflection_prompt}
    """
prompt_template = PromptTemplate(input_variables=["question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt'], template=reflect_template)
reflect_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection")

extraction_template = """{instruction}

    Question: {question}
    Answer_choices: {answer_choices}

    Cot: {cot_trigger}{cot}
    {answer_extraction}{answer}
    {reflection_prompt}{reflection}

    {reflect_answer_extraction}
    """
    #Get reflection
prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt','reflection','reflect_answer_extraction'], template=extraction_template)
reflect_answer_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="reflection_answer")

    # This is the overall chain where we run these two chains in sequence.
from langchain.chains import SequentialChain
reflect_overall_chain = SequentialChain(chains=[reflect_chain, reflect_answer_chain],input_variables=["instruction","question","answer_choices","cot_trigger","answer_extraction",'cot','answer','reflection_prompt','reflect_answer_extraction'],
        output_variables=["reflection", "reflection_answer"],
        verbose=True)




In [32]:
#check for what is already in item
input_dict = {'input_dict':{
    'chain':reflect_overall_chain,
    'cot_trigger':"", 
    'answer':"", 
    'answer_extraction': "", 
    'cot': "", 
    'instruction': "",
    'api_service': "chat_openai", 
    'model': "gpt-3.5-turbo",
    'reflection_prompt':"Double check this",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?',
    'temperature': 0,
    'max_tokens': 800 
}
}
coll.metareason_flexible(input_dict=input_dict)

Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [33]:
coll['worldtree']['test'][0]['generated_cot'][1]

{'id': '6d6af7a2-9246-4569-bf59-82f87217c732',
 'fragments_version': '',
 'instruction': '',
 'cot_trigger': 'Double check this',
 'cot_trigger_template': '',
 'prompt_text': '',
 'cot': 'As an AI language model, I can confirm that the answer is correct. When light hits a mirror, it is reflected back off the surface of the mirror. This is due to the smooth surface of the mirror, which allows the light to bounce back in a predictable way. Therefore, the correct answer is B) reflected.',
 'answers': [{'id': '85ab99c5-5651-45b2-b9bb-f934aab50e37',
   'answer_extraction': 'Based on the reflection, what is the definite answer?',
   'answer_extraction_template': '',
   'answer_extraction_text': 'self_reflection',
   'answer': 'The definite answer is B) reflected.',
   'answer_from_choices': '',
   'correct_answer': None}],
 'author': '',
 'date': '2023/05/12 10:42:30',
 'api_service': 'chat_openai',
 'model': "{'name': 'gpt-3.5-turbo', 'temperature': 0, 'max_tokens': 800}",
 'comment': 'self