## Dataset manipulation function

In [1]:
import os
from cot import Collection
from cot.generate import FRAGMENTS
from rich.pretty import pprint
import json

In [4]:
"""From collection to changed collection"""
def cot_to_context(example):
    example['context'] = example['generated_cot'][0]['cot']
    example ['generated_cot'] = list()
    return example

cot_dataset = Collection.from_json("worldtree_10.json") #input dataset
cot_dataset = cot_dataset.select(split="train", number_samples=1, random_samples=True, seed=0) #input # samples,seed
updated_dataset = cot_dataset['worldtree'].map(cot_to_context) #input dataset name

#force dataset into the right format
dataset = {"train":[updated_dataset['train'][0]]}
dict_dataset = {"worldtree":dataset} 

#create and collect a json to make collection
with open("sample.json", "w") as outfile:
    json.dump(dict_dataset, outfile)
collect = Collection.from_json('sample.json')

  0%|          | 0/1 [00:00<?, ?ex/s]

In [5]:
collect

| Name      |   Train | Valid   | Test   |
|-----------|---------|---------|--------|
| worldtree |       1 | -       | -      |

Not loaded: ['aqua', 'asdiv', 'commonsense_qa', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

In [None]:
"""Write function that it works for Thoughtsource_100"""

## Langchain Implementation

In [3]:
"""Upgrades"""
#langchain.__version__ #old 0.0.14
#!pip install --upgrade langchain #langchain-0.0.92
#!pip install -U openai

In [6]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate 
from langchain.chains.llm import LLMChain
from langchain.chains.constitutional_ai.base import ConstitutionalChain
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple

# Example data:
instruction = "Answer the following question through step-by-step reasoning."
question = "Animals may fight, make threatening sounds, and act aggressively toward members of the same species. These behaviors usually occur as the result of",
answer_choices = [
                    "competition",
                    "conservation",
                    "decomposition",
                    "pollution"
                ]
cot_trigger = "Answer: Let's think step by step."
answer_extraction = "Therefore, the answer is"

In [7]:
# This is an LLMChain to write a synopsis given a title of a play.
llm = OpenAI(temperature=.0)
template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

{cot_trigger}
"""


prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger"], template=template)
cot_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="cot")

In [8]:
cot = cot_chain.run({"instruction":instruction,"question":question,"answer_choices":answer_choices,"cot_trigger":cot_trigger})

In [13]:
cot

"\n1. Competition: Animals may fight, make threatening sounds, and act aggressively toward members of the same species as a result of competition. Competition can occur when animals are competing for resources such as food, water, or mates.\n\n2. Conservation: Conservation is the protection of natural resources and ecosystems. It does not directly relate to animals fighting, making threatening sounds, or acting aggressively toward members of the same species.\n\n3. Decomposition: Decomposition is the process by which organic matter is broken down into simpler forms of matter. It does not directly relate to animals fighting, making threatening sounds, or acting aggressively toward members of the same species.\n\n4. Pollution: Pollution is the introduction of contaminants into the environment. It does not directly relate to animals fighting, making threatening sounds, or acting aggressively toward members of the same species.\n\nTherefore, the correct answer is 'competition'."

In [29]:
"""This chain tries answer extraction and supervision at once"""
supervision_template = """{instruction}

Question: {question}
Answer_choices: {answer_choices}

Cot: {cot_trigger}{cot}
{answer_extraction}
{supervision}
"""

supervision = "Double check this idea, are the reasoning and answer sound yes/no?"

prompt_template = PromptTemplate(input_variables=["instruction","question","answer_choices","cot_trigger","cot","answer_extraction","supervision"], template=supervision_template)
supervision_chain = LLMChain(llm=llm, prompt=prompt_template,output_key="self_supervision")

In [21]:
self_supervision = supervision_chain.run({"instruction":instruction,"question":question,"answer_choices":answer_choices,"cot_trigger":cot_trigger,"cot":cot,"answer_extraction":answer_extraction,"supervision":supervision})

In [30]:
self_supervision

'\nYes, the reasoning and answer are sound.'

In [34]:
"""TODO double check cot input here, which does not correspond to documentation
https://langchain.readthedocs.io/en/latest/modules/chains/generic/sequential_chains.html"""

# This is the overall chain where we run these two chains in sequence.
from langchain.chains import SequentialChain
overall_chain = SequentialChain(chains=[cot_chain, supervision_chain],input_variables=["instruction","question","answer_choices","cot_trigger",'cot',"answer_extraction","supervision"],
    # Here we return multiple variables
    output_variables=["cot", "self_supervision"],
    verbose=True)
overall_supervision = overall_chain({"instruction":instruction,"question":question,"answer_choices":answer_choices,"cot_trigger":cot_trigger,"cot":cot,"answer_extraction":answer_extraction,"supervision":supervision})



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [35]:
overall_supervision

{'instruction': 'Answer the following question through step-by-step reasoning.',
 'question': ('Animals may fight, make threatening sounds, and act aggressively toward members of the same species. These behaviors usually occur as the result of',),
 'answer_choices': ['competition',
  'conservation',
  'decomposition',
  'pollution'],
 'cot_trigger': "Answer: Let's think step by step.",
 'cot': "\n1. Competition: Animals may fight, make threatening sounds, and act aggressively toward members of the same species as a result of competition. Competition can occur when animals are competing for resources such as food, water, or mates.\n\n2. Conservation: Conservation is the protection of natural resources and ecosystems. It does not directly relate to animals fighting, making threatening sounds, or acting aggressively toward members of the same species.\n\n3. Decomposition: Decomposition is the process by which organic matter is broken down into simpler forms of matter. It does not directly