## Imports and chains

In [1]:
from langchain.llms import OpenAI 
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain
from cot import Collection
import json
from langchain.chat_models import ChatOpenAI
# from dataloader import to_Collection

In [32]:
#llm = OpenAI(temperature=.0,model_name="text-davinci-003")   #gpt-3.5-turbo #Chat
llm = ChatOpenAI(temperature=.0,model_name="gpt-3.5-turbo")  

reflect_template = """
    Question: {question}
    Answer_choices: {answer_choices}

    {cot_trigger} {cot}
    {answer_extraction} {answer}
    
    Reflection: {reflection_prompt}
    """
reflect_prompt_template = PromptTemplate(input_variables=["question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt'], template=reflect_template)
reflect_chain = LLMChain(llm=llm, prompt=reflect_prompt_template,output_key="reflection")

#{instruction}
extraction_template = """

    Question: {question}
    Answer_choices: {answer_choices}

    {cot_trigger} {cot}
    {answer_extraction} {answer}
    
    Reflection: {reflection_prompt}
    {reflection}

    {reflect_answer_extraction}
    """
    #Get reflection
ans_prompt_template = PromptTemplate(input_variables=["question","answer_choices","cot_trigger","cot","answer_extraction",'answer','reflection_prompt','reflection','reflect_answer_extraction'], template=extraction_template)
reflect_answer_chain = LLMChain(llm=llm, prompt=ans_prompt_template,output_key="reflection_answer")

    # This is the overall chain where we run these two chains in sequence.
from langchain.chains import SequentialChain
reflect_overall_chain = SequentialChain(chains=[reflect_chain, reflect_answer_chain],input_variables=["question","answer_choices","cot_trigger","answer_extraction",'cot','answer','reflection_prompt','reflect_answer_extraction'],
        output_variables=["reflection", "reflection_answer"],
        verbose=True)



In [3]:
instruction = "Answer the following question through step-by-step reasoning."
question = "Animals may fight, make threatening sounds, and act aggressively toward members of the same species. These behaviors usually occur as the result of",
answer_choices = [
                    "competition",
                    "conservation",
                    "decomposition",
                    "pollution"
                ]
cot_trigger = "Answer: Let's think step by step."
cot = "Aggression is needed to defend something, one needs to defend their spot when they are in competition"
answer_extraction = "Therefore, the answer is"
answer = "competition"
reflection_prompt = "do you agree with the cot yes or no"
reflection = "Great reasoning mate!"
reflect_answer_extraction = "Based on the text above the answer is:"

In [29]:
print(reflect_prompt_template.format(question=question,answer_choices=answer_choices,cot_trigger=cot_trigger,
                                     cot=cot,answer_extraction=answer_extraction,
                                     answer=answer,reflection_prompt=reflection_prompt))


    Question: ('Animals may fight, make threatening sounds, and act aggressively toward members of the same species. These behaviors usually occur as the result of',)
    Answer_choices: ['competition', 'conservation', 'decomposition', 'pollution']

    Answer: Let's think step by step.Aggression is needed to defend something, one needs to defend their spot when they are in competition
    Therefore, the answer is competition
    
    Reflection: do you agree with the cot yes or no
    


In [37]:
print(ans_prompt_template.format(question=question,answer_choices=answer_choices,cot_trigger=cot_trigger,
                                     cot=cot,answer_extraction=answer_extraction,
                                     answer=answer,reflection_prompt=reflection_prompt,
                                     reflection=reflection,reflect_answer_extraction=reflect_answer_extraction))



    Question: ('Animals may fight, make threatening sounds, and act aggressively toward members of the same species. These behaviors usually occur as the result of',)
    Answer_choices: ['competition', 'conservation', 'decomposition', 'pollution']

    Answer: Let's think step by step. Aggression is needed to defend something, one needs to defend their spot when they are in competition
    Therefore, the answer is competition
    
    Reflection: do you agree with the cot yes or no
    Great reasoning mate!

    Based on the text above the answer is:
    


## Data


In [16]:
coll = Collection.load_thoughtsource_100(names='strategy_qa',load_pregenerated_cots=True) #random_sample=False?
coll = coll.select(split="all", number_samples=30)
coll.select_generated_cots(cot_trigger = "kojima-01", api_service='cohere') #have one

In [23]:
coll['strategy_qa']['train'][0]['generated_cot']

[{'id': '7d78d689-e644-485e-8954-46c9ed2eda33',
  'fragments_version': '0.01',
  'instruction': None,
  'cot_trigger': 'kojima-01',
  'cot_trigger_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}',
  'prompt_text': '',
  'cot': '\nThe largest crustacean is the Japanese spider crab, which has a leg span of up to\n4 meters (13 feet).\nThe largest king-sized mattress is about 180 cm (70 inches) wide.\n4 meters (13 feet) is about 170 cm (67 inches).\nThe Japanese spider crab cannot stretch out completely on a king-sized mattress.',
  'answers': [{'id': '290ca17e-3b1a-47cb-917b-b7a4b8350212',
    'answer_extraction': 'kojima-yes-no',
    'answer_extraction_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}{cot}\n{answer_extraction}',
    'answer_extraction_text': '',
    'answer': '\nNo.',
    'correct_answer': True}],
  'author': 'thoughtsource',
  'date': '2023/02/14 17:33:32',
  'api_service': 'cohere',
  'model': "{'name': 'command-xlarg

## Run reflection

In [5]:
input_dict = {
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is", 
    'answer':"", 
    'cot': "", 
    'reflection_prompt':"Double check this",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?'
}
metareason = coll.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)


Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [9]:
#@Konstantin
test = to_Collection(metareason,"strategy_qa",'train','file_test')

NameError: name 'to_Collection' is not defined

In [43]:
#to dataloader

"""Creates json and collection from Thoughtsource ouptut; chain_output from chain_generation"""
def to_Collection(chain_output,dataset_name,split,file_name):

    #Force langchain into TS structure 
    ts_set = {dataset_name:{split:chain_output}}

    #create and collect a json to make collection
    with open(f"{file_name}.json", "w") as outfile:
        json.dump(ts_set, outfile)
    collect = Collection.from_json('sample.json')

    return collect

In [100]:
#Force langchain into TS structure 
worldtree_new = {'worldtree':{'test':metareason}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_new, outfile)
collect = Collection.from_json('sample.json')

collect

| Name      | Train   | Valid   |   Test |
|-----------|---------|---------|--------|
| worldtree | -       | -       |     30 |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

In [None]:
#Conclusion: with these prompts, there is no new information and no changed answers

In [108]:
input_dict = {
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is", 
    'answer':"", 
    'cot': "", 
    'reflection_prompt':"Critique the reasoning above",
    'reflect_answer_extraction':'Based on the reasoning and the reflection, what is the final answer? (A-D)?'
}
metareason = coll.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)

Generating worldtree...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m



In [109]:
#Force langchain into TS structure 
worldtree_new = {'worldtree':{'test':metareason}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(worldtree_new, outfile)
collect = Collection.from_json('sample.json')

collect

| Name      | Train   | Valid   |   Test |
|-----------|---------|---------|--------|
| worldtree | -       | -       |     30 |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

In [None]:
#chatgpt seems to be a bad corrector, what about its competitors?
#but davinci is no better

In [54]:
#example cot and ans

metareason[0]['generated_cot'][3]
metareason[0]['generated_cot'][3]['answers']

[{'id': '8b8ddb55-e1fc-4fac-85c7-333a76cacd91',
  'answer_extraction': 'Therefore, among A through D, the answer is',
  'answer_extraction_template': '',
  'answer_extraction_text': 'self_reflection',
  'answer': 'The definite answer is B) There is a change in the appearance of the objects.',
  'correct_answer': None}]

Next: evaluate in annotator

## Strategy_qa and yes/no

In [12]:
# coll = Collection.load_thoughtsource_100(names='strategy_qa',load_pregenerated_cots=True)
# coll = coll.select(split="all", number_samples=1)
# coll.select_generated_cots(cot_trigger = "kojima-01", api_service='openai') #have one

In [13]:
coll['strategy_qa']['train'][0]['generated_cot']

[{'id': '6067839f-c5f5-494b-a71c-10dde2739954',
  'fragments_version': '0.01',
  'instruction': None,
  'cot_trigger': 'kojima-01',
  'cot_trigger_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}',
  'prompt_text': '',
  'cot': ' First, we need to convert 900,000 pounds to US dollars. As of June 2020, 1 pound is equal to 1.25 US dollars. So, 900,000 pounds is equal to 1,125,000 US dollars.\n\nNext, we need to compare this amount to the net worth of a billionaire. According to Forbes, the minimum net worth of a US billionaire is currently $1.1 billion.\n\nTherefore, a 900,000 pound net worth person would not be an American billionaire if they exchange currency in June 2020.',
  'answers': [{'id': '5d8d5fee-f1ee-4d8f-bd71-8261f260519c',
    'answer_extraction': 'kojima-yes-no',
    'answer_extraction_template': '{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}{cot}\n{answer_extraction}',
    'answer_extraction_text': '',
    'answer': ' No.',
    '

In [14]:
from cot.stats import evaluation_as_table
eval = coll.evaluate()
evaluation_as_table(eval)

Unnamed: 0_level_0,kojima-01
Unnamed: 0_level_1,text-davinci-003
strategy_qa,0.0


In [None]:
input_dict = {
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is", 
    'answer':"", 
    'cot': "", 
    'reflection_prompt':"Do you have any reason to believe that the reasoning or the answer might be wrong? Answer with one word Yes or No",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?'
}
metareason = coll.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)
#These prompts only let the model say no reason to believe the answer is wrong

In [30]:
input_dict = {
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is", 
    'answer':"", 
    'cot': "", 
    'reflection_prompt':"Improve the step-by-step thinking",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?'
}
metareason = coll.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)

Generating strategy_qa...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m


In [49]:
input_dict = {
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is", 
    'answer':"", 
    'cot': "", 
    'reflection_prompt':"Is there any irrelevant or incorrect information in the Answer",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?',
    'model_name':"gpt-3.5-turbo"
}
metareason = coll.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)

Generating strategy_qa...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m


In [50]:
st_new = {'strategy_qa':{'train':metareason}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(st_new, outfile)
collect = Collection.from_json('sample.json')

collect

| Name        |   Train | Valid   | Test   |
|-------------|---------|---------|--------|
| strategy_qa |      30 | -       | -      |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'svamp', 'worldtree']

In [41]:
collect['strategy_qa']['train'][0]['generated_cot'][1]

{'id': '03e45537-96e6-4f2d-adfc-86137df0fae0',
 'fragments_version': '0.01',
 'instruction': '',
 'cot_trigger': "Answer: Let's think step by step.",
 'cot_trigger_template': '',
 'prompt_text': '',
 'cot': 'Yes.',
 'answers': [{'id': 'cde8940d-e39c-45b4-be9d-1ad14c4d48c6',
   'answer_extraction': 'Therefore, among A through D, the answer is',
   'answer_extraction_template': '',
   'answer_extraction_text': 'self_reflection',
   'answer': 'The definite answer is False.',
   'correct_answer': True}],
 'author': '',
 'date': '2023/03/16 13:32:18',
 'api_service': '',
 'model': "{'name': '', 'temperature': 0, 'max_tokens': 800}",
 'comment': 'self_reflection cot',
 'annotations': []}

In [43]:
from cot.stats import evaluation_as_table
eval = coll.evaluate()
evaluation_as_table(eval)

  0%|          | 0/30 [00:00<?, ?ex/s]

  0%|          | 0/2207 [00:00<?, ?ex/s]

  0%|          | 0/1664 [00:00<?, ?ex/s]

  0%|          | 0/496 [00:00<?, ?ex/s]

['strategy_qa.train.accuracy.command-xlarge-nightly.None_kojima-01_kojima-yes-no']


Unnamed: 0_level_0,kojima-01
Unnamed: 0_level_1,command-xlarge-nightly
strategy_qa,0.6
worldtree,


In [51]:
from cot.stats import evaluation_as_table
eval = collect.evaluate()
evaluation_as_table(eval)

  0%|          | 0/30 [00:00<?, ?ex/s]

["strategy_qa.train.accuracy.._Answer: Let's think step by step._Therefore, among A through D, the answer is", 'strategy_qa.train.accuracy.command-xlarge-nightly.None_kojima-01_kojima-yes-no']


ValueError: too many values to unpack (expected 5)

In [55]:
from rich.pretty import pprint
pprint(eval)

In [56]:
input_dict = {
    "cot_trigger": "Answer: Let's think step by step.",
    "answer_extraction": "Therefore, among A through D, the answer is", 
    'answer':"", 
    'cot': "", 
    'reflection_prompt':"The goal is to correct the Answer if needed, let's think step by step",
    'reflect_answer_extraction':'Based on the reflection, what is the definite answer?',
    'model_name':"gpt-3.5-turbo"
}
metareason = coll.metareason_flexible(chain=reflect_overall_chain,input_dict=input_dict)

Generating strategy_qa...


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


[1m> Entering new SequentialChain chain...[0m


In [57]:
st_new = {'strategy_qa':{'train':metareason}}

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(st_new, outfile)
collect = Collection.from_json('sample.json')

collect

| Name        |   Train | Valid   | Test   |
|-------------|---------|---------|--------|
| strategy_qa |      30 | -       | -      |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'svamp', 'worldtree']

In [58]:
from rich.pretty import pprint
pprint(eval)