In [1]:
%load_ext autoreload
%autoreload 2

import datetime
import json
import os
import pkgutil
import time
import uuid
import pydantic
from langchain.prompts import BasePromptTemplate
from pydantic import BaseModel

from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

import datasets as ds

TEMPLATES = json.loads(pkgutil.get_data("cot", "templates.json"))

In [2]:
from cot.config import Config
from cot import Collection
from cot.generate import multiple_choice_answer_formatting
from dataclasses import asdict

In [3]:
config={
    "debug": True,
    "multiple_choice_answer_format": "Letters",
    "instruction_keys": ['qa-01'],
    "cot_trigger_keys": ['kojima-01'],
    "answer_extraction_keys": ['kojima-A-D'],
    "warn": False,
    "verbose": False,
}

In [4]:
config["template_cot_generation"] = """Does this work?"""
config["template_answer_extraction"] = """Does this work?"""

In [4]:
Config.from_dict(config)

Config(idx_range='all', multiple_choice_answer_format='Letters', instruction_keys=['qa-01'], cot_trigger_keys=['kojima-01'], answer_extraction_keys=['kojima-A-D'], template_cot_generation='\n{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}\n', template_answer_extraction='\n{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}\n{cot}\n{answer_extraction}\n', author='', api_service='huggingface_hub', engine='google/flan-t5-xl', temperature=0.0, max_tokens=128, api_time_interval=1.0, debug=True, verbose=False, warn=False)

In [31]:
# 1) Dataset load and selecting random sample
collection = Collection(["worldtree"], verbose=False)
collection = collection.select(split="train", number_samples=1, random_samples=True, seed=False)

Loading worldtree...


In [6]:
collection = Collection.from_json("../libs/cot/tests/unit_tests/data/test_1_dataset.json")

[nltk_data] Downloading package punkt to /home/kon/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [7]:
collection.generate(config=config)

  0%|          | 0/1 [00:00<?, ?ex/s]

In [10]:
collection

| Name           |   Train | Valid   | Test   |
|----------------|---------|---------|--------|
| commonsense_qa |       1 | -       | -      |
| worldtree      |    2207 | 496     | 1664   |

Not loaded: ['aqua', 'asdiv', 'entailment_bank', 'gsm8k', 'mawps', 'med_qa', 'medmc_qa', 'open_book_qa', 'pubmed_qa', 'qed', 'strategy_qa', 'svamp']

In [9]:
collection["commonsense_qa"]["train"][0]

{'id': '12345',
 'ref_id': '',
 'question': 'Question',
 'type': 'multiplechoice',
 'choices': ['choice A', 'choice B', 'choice C', 'choice D'],
 'context': '',
 'cot': ['Given cot line 1', 'Given cot line 2', 'Given cot line 3'],
 'answer': ['choice 3'],
 'generated_cot': [{'id': '92e31eb8-04ee-4ff7-b81a-e4e69a86246c',
   'templates_version': '0.01',
   'instruction': 'qa-01',
   'cot_trigger': 'kojima-01',
   'prompt_text': 'Does this work?',
   'answers': [{'id': 'c04f5109-edae-4d33-821a-f84c33d6b8e8',
     'answer_extraction': 'kojima-A-D',
     'answer_extraction_text': 'Does this work?',
     'answer': 'test',
     'correct_answer': None}],
   'cot': 'test',
   'author': '',
   'date': '2022/12/09 15:51:48',
   'api_service': 'huggingface_hub',
   'model': "{'name': 'google/flan-t5-xl', 'temperature': 0.0, 'max_tokens': 128}",
   'comment': '',
   'annotation': []}],
 'feedback': []}

In [44]:
print(collection["worldtree"]["train"][0]["generated_cot"][0]["prompt_text"])
print(collection["worldtree"]["train"][0]["generated_cot"][0]["answers"][0]["answer_extraction_text"])

Does this work?
Does this work?


In [6]:

# 1) Dataset load and selecting random sample
collection = Collection(["worldtree"], verbose=False)
collection = collection.select(split="train", number_samples=1, random_samples=True, seed=False)

# 2) Language Model generates chains of thought and then extracts answers
config={
    "debug": True,
    "multiple_choice_answer_format": "Letters",
    "instruction_keys": ['qa-01'],
    "cot_trigger_keys": ['kojima-01'],
    "answer_extraction_keys": ['kojima-A-D'],
    "warn": False,
    "verbose": False,
}
collection.generate(config=config)

# 3) Performance evaluation
collection.evaluate()


Loading worldtree...
{'accuracy': {'qa-01_kojima-01_kojima-A-D': 0.0}}


In [17]:
template_cot_generation ="\n{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}"
template_answer_extraction="\n{instruction}\n\n{question}\n{answer_choices}\n\n{cot_trigger}\n{cot}\n{answer_extraction}\n"

In [58]:
instruction_key = 'qa-01'
cot_trigger_key = 'kojima-01'
answer_extraction_key = 'kojima-A-D'
multiple_choice_answer_format = "Letters"
cot = None

In [59]:
template_dict = {
    "instruction" : TEMPLATES["instructions"][instruction_key],
    "question" : item["question"],
    "answer_choices" : multiple_choice_answer_formatting(multiple_choice_answer_format, item["choices"]),
    "cot_trigger" : TEMPLATES["cot_triggers"][cot_trigger_key],
    "cot" : cot,
    "answer_extraction" : TEMPLATES["answer_extractions"][answer_extraction_key],
}

In [12]:
input_variables = ["instruction", "question", "answer_choices", "cot_trigger", "cot", "answer_extraction"]
template_dict_keys = ["instruction", "question", "answer_choices", "cot_trigger", "cot", "answer_extraction"]
# assert all([for var in input_variables: var is in template_dict_keys])