## Task decompositions for arbitrary NLP tasks

### Task Selections

In [6]:
import seqio
import os
os.environ['CURL_CA_BUNDLE'] = "/etc/ssl/certs/ca-bundle.crt"
from bigbench.bbseqio import tasks
vocabulary=seqio.SentencePieceVocabulary("/gscratch/zlab/bparan/projects/cascades/models/t5-spiece.model")
from sklearn.metrics import accuracy_score
import json
import tqdm
import numpy as np

In [7]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [9]:
bb_mix = seqio.get_mixture_or_task("bigbench:all_json.mix.t5_default_vocab.0_shot.all_examples")
# print("SeqIO Mixture:", bb_mix.name)
print("\nNumber of SeqIO Tasks in this Mixture:", len(bb_mix.tasks))
print("\nSubtasks:")
all_subtasks = sorted([t.name for t in bb_mix.tasks]) # if ".gen." in t.name])
print("\n".join(all_subtasks) + "\n...")
print(len(all_subtasks))


Number of SeqIO Tasks in this Mixture: 1747

Subtasks:
bigbench:abstract_narrative_understanding.mul.t5_default_vocab.0_shot.all_examples.4_distractors
bigbench:abstract_narrative_understanding.mul.t5_default_vocab.0_shot.all_examples.99_distractors
bigbench:abstract_narrative_understanding.mul.t5_default_vocab.0_shot.all_examples.9_distractors
bigbench:anachronisms.mul.t5_default_vocab.0_shot.all_examples
bigbench:analogical_similarity.mul.t5_default_vocab.0_shot.all_examples
bigbench:analytic_entailment.mul.t5_default_vocab.0_shot.all_examples
bigbench:arithmetic.gen.t5_default_vocab.0_shot.all_examples.1_digit_addition
bigbench:arithmetic.gen.t5_default_vocab.0_shot.all_examples.1_digit_division
bigbench:arithmetic.gen.t5_default_vocab.0_shot.all_examples.1_digit_multiplication
bigbench:arithmetic.gen.t5_default_vocab.0_shot.all_examples.1_digit_subtraction
bigbench:arithmetic.gen.t5_default_vocab.0_shot.all_examples.2_digit_addition
bigbench:arithmetic.gen.t5_default_vocab.0_shot.

In [None]:
# Explore Big Bench task
n = 10
seq_length = 1024
task = seqio.get_mixture_or_task("bigbench:bridging_anaphora_resolution_barqa.gen.t5_default_vocab.0_shot.all_examples")
ds = task.get_dataset(split="all", sequence_length={"inputs": seq_length, "targets": seq_length})
for ex in ds.take(1):
    print(ex['inputs_pretokenized'])
    print(ex['targets_pretokenized'])
    print()

2022-09-23 10:42:47.445853: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2022-09-23 10:42:47.445892: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2022-09-23 10:42:47.447203: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-23 10:43:48.18

### Few-shot performance

In [10]:
import openai
from typing import List
# from utils.constants import OPENAI_API_KEY
from tqdm import tqdm

# OPENAI_API_KEY = "sk-e6zu43JlPzbaNlx9WMHwDGSXXjN6Sf4nZePrOVK7" # UW Zlab key
OPENAI_API_KEY = "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" # Microsoft key
openai.api_key = OPENAI_API_KEY

def request(
    prompt: str,
    engine='davinci',
    max_tokens=60,
    temperature=1.0,
    top_p=1.0,
    n=1,
    stop='\n',
    presence_penalty=0.0,
    frequency_penalty=0.0,
    ):
    # retry request (handles connection errors, timeouts, and overloaded API)
    while True:
        try:
            response = openai.Completion.create(
                engine=engine,
                prompt=prompt,
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                n=n,
                stop=stop,
                presence_penalty=presence_penalty,
                frequency_penalty=frequency_penalty,
            )
            break
        except Exception as e:
            tqdm.write(str(e))
            tqdm.write("Retrying...")
            import time
            time.sleep(60)
    
    generations = [gen['text'].lstrip() for gen in response['choices']]
    generations = [_ for _ in generations if _ != '']
    return generations

In [21]:
class Task:
    def transform_instruction(self, instruction, **kwargs):
        raise NotImplementedError
        
    def transform_example(self, example, **kwargs):
        raise NotImplementedError
    
    def transform_few_shot_prompt(self, prefix, new_instance, **kwargs):
        raise NotImplementedError
        

class SentimentAnalysisNI(Task):
    def __init__(self, filename):
        self.filename = filename
        
    def transform_instruction(self, instruction):
        return instruction[0]
    
    def transform_example(self, example):
        return "Tweet: " + example["input"] + "\nThis tweet is {0}\n".format(example["output"])
    
    def transform_few_shot_prompt(self, prefix, new_instance):
        return prefix + "Tweet: {0}\nThis tweet is".format(new_instance["input"])
#         return prefix + "Tweet: {0}\nIs this tweet positive or negative?".format(new_instance["input"])
    

class AnachronismsBB(Task):
    def __init__(self, name, seq_length):
        self.name = name
        self.sequence_lenth = seq_length
        
    def transform_instruction(self, **kwargs):
        raise NotImplementedError
    
    def transform_example(self, example, **kwargs):
        raise NotImplementedError
    
    def transform_few_shot_prompt(self, prefix, new_instance, **kwargs):
        processed = [ex.split("\n") for ex in new_instance['input'].split("\n\n")[:-1]]
        prompt = "".join(["Input:" + ex[0] + "\nAnswer:" + ex[-1] + "\n\n" for ex in processed]) 
        prompt += data[0]['input'].split("\n\n")[-1].split("\n")[0] + "\nAnswer:"
        prompt = prefix + "\n" + prompt
#         return new_instance["input"] + "\n"
#         return prefix + "\n" + new_instance["input"].replace("non-contemporaneous (anachronistic)", "contemporaneous") + "\n" #trailing new line as in other examples
        return prefix + "\n" + new_instance["input"] + "\n"
#         return prompt

class DisfluentQABB(Task):
    def __init__(self, name, seq_length):
            self.name = name
            self.sequence_lenth = seq_length

    def transform_instruction(self, **kwargs):
        raise NotImplementedError

    def transform_example(self, example, **kwargs):
        raise NotImplementedError

    def transform_few_shot_prompt(self, prefix, new_instance, **kwargs):
        return new_instance["input"]
    
class GeneralBB(Task):
    def __init__(self, name, seq_length):
        self.name = name
        self.sequence_lenth = seq_length
        
    def transform_instruction(self, **kwargs):
        raise NotImplementedError
    
    def transform_example(self, example, **kwargs):
        raise NotImplementedError
    
    def transform_few_shot_prompt(self, prefix, new_instance, **kwargs):
        raise NotImplementedError
        

class TeachArbitrayTask:
    def __init__(self, task, **kwargs):
        self.task = task
        
    def get_task_data(self, **kwargs):
        raise NotImplementedError
        
    def get_heldout_data(self, **kwargs):
        raise NotImplimentedErorr

    def exact_match(self, labels, predictions):
        correct = 0
        count = 0
        for label, predict in zip(labels, predictions):
            predict = [p.lower() for p in predict]
            if label.lower() in predict:
                correct += 1
            count += 1
        return (1.0*correct)/count
    
    def substring_match(self, labels, predictions):
        correct = 0
        count = 0
        for label, predict in zip(labels, predictions):
            predict = [p.lower() for p in predict]
            for p in predict:
                if label.lower() in p:
                    correct += 1
                    break
            count += 1
        return (1.0*correct)/count
    
    def get_teaching_data(self, instances, predictions):
        """
        Currently cheating :(
        """
        errors = []
        for ex, predict in zip(instances, predictions):
            predict = [p.lower() for p in predict]
            if ex["label"].lower() not in predict:
                errors.append((ex, predict))
        return errors
    
    def evaluate_few_shot(self, instances, prompt, engine):
        predicted = []
        labels = []
        prompts = []
        for ex in tqdm(instances):
            final_prompt = self.task.transform_few_shot_prompt(prompt, ex)
            predicted.append(request(final_prompt, engine=engine))
            labels.append(ex["label"])
            prompts.append(final_prompt)
        return predicted, labels, prompts
        
        
class TeachBigBenchTask(TeachArbitrayTask):
    def __init__(self, task, **kwargs):
        self.task = task
        self.task_name = task.name
        self.task_sequence_length = task.sequence_lenth
        
    def get_task_data(self, zero=False, few=False, explain=False):
        seq_length = self.task_sequence_length
        # By default seqio returns 3_shot.
        if zero:
            task = seqio.get_mixture_or_task(self.task_name)
        elif few:
            task = seqio.get_mixture_or_task(self.task_name.replace('0_shot', '3_shot'))
        
        ds = task.get_dataset(split="all", sequence_length={"inputs": seq_length, "targets": seq_length})
        instances = []
        for enum, ex in enumerate(ds):
            instances.append({"guid": enum, 
                              "input": ex['inputs_pretokenized'].numpy().decode(),
                              "label": ex["targets_pretokenized"].numpy().decode()})
        return instances
    
    
class TeachNaturalInstructionsTask(TeachArbitrayTask):
    
    def __init__(self, task, **kwargs):
        self.task = task
        self.task_name = task.filename
        self.nq_path = "/mmfs1/gscratch/zlab/bparan/projects/cascades/src/natural-instructions/tasks"
        
    def get_task_data(self, zero=False, few=False, explain=False):
        task_path = os.path.join(self.nq_path, self.task_name)
        task_data = json.loads(open(task_path).read())
        instances = []
        for instance in task_data["Instances"]:
            instances.append({"guid": instance['id'], "input": instance["input"], "label": instance["output"][0]})
        instruction = self.task.transform_instruction(task_data["Definition"])
        if zero:
            return instruction, instances
        if few:
            examples = ""
            for ex in task_data["Positive Examples"]:
                examples += self.task.transform_example(ex)
            return instruction, examples, instances


In [22]:
task = SentimentAnalysisNI("task195_sentiment140_classification.json")
teacher = TeachNaturalInstructionsTask(task)
instruction, examples, instances = teacher.get_task_data(few=True)

em_runs = []
for run in range(2):
    predictions, labels, prompts = teacher.evaluate_few_shot(instances[:1000], instruction + "\n" + examples, engine="curie")
    # predictions, labels, prompts = teacher.evaluate_few_shot(instances[:1000], examples, engine="curie")
#     predictions, labels, prompts = teacher.evaluate_few_shot(instances[:1000], "", engine="curie")
    em_runs.append(teacher.exact_match(labels, predictions))
print("Mean:", np.mean(em_runs))
print("Variance:", np.std(em_runs))

# predictions, labels, prompts = teacher.evaluate_few_shot(instances[:1000], instruction + "\n" + examples, engine="curie")
# predictions, labels, prompts = teacher.evaluate_few_shot(instances[:1000], examples, engine="curie")
# predictions, labels, prompts = teacher.evaluate_few_shot(instances[:100], instruction + "\n" + examples, engine="curie")
# teacher.exact_match(labels, predictions)

100%|████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:16<00:00,  6.22it/s]


0.54

In [20]:
examples

"Tweet: @justinchuan Awww! I was thinking about you lot up there! Glad you enjoyed it\nThis tweet is positive\nTweet: @jamiesmart I can't  but if you like it, you can see it with me again next week! Yay! x\nThis tweet is negative\nTweet: @mangopickle i like stab movies.... idk  im scared of paranormal shit\nThis tweet is negative\nTweet: Hanging out with basshunter! Omg! He is so nice! But its the to go home to my mike.\nThis tweet is positive\n"

## Anachronisms 

Task description: https://github.com/google/BIG-bench/tree/main/bigbench/benchmark_tasks/anachronisms
Intuition for breakdown: 
* Reversing labels (anachronism is harder to parse than the word contemporary). 
* Identifying named entities 
* Identify time periods of the named entities
* Compare time periods

In [503]:
task = AnachronismsBB(name = "bigbench:anachronisms.mul.t5_default_vocab.0_shot.all_examples",
                      seq_length=1024)
teacher = TeachBigBenchTask(task)
data = teacher.get_task_data(few=True)

def reverse_output_lists(inputs):
    outputs = []
    for item in inputs:
        output_list = []
        for ex in item:
            if "No" in ex:
                output_list.append(ex.replace("No", "Yes"))
            elif "Yes" in ex:
                output_list.append(ex.replace("Yes", "No"))
            else:
                output_list.append(ex)
        outputs.append(output_list)
    return outputs

em_runs = []
for run in range(5):
#     predictions, labels, prompts = teacher.evaluate_few_shot(data[:100], "", engine="curie")
    predictions, labels, prompts = teacher.evaluate_few_shot(data[:100], "An anachronism is an act of attributing a custom, event, or object to a period to which it does not belong. Each of these sentences have two entities which either belong to the same period or do not. Find the entities and check if they are non-contemporaneous. Explain your answer", engine="davinci")
#     predictions, labels, prompts = teacher.evaluate_few_shot(data[:100], "An anachronism is an act of attributing a custom, event, or object to a period to which it does not belong. Each of these sentences have two entities which either belong to the same period or do not. Find the entities and check if they are contemporaneous. Explain your answer", engine="curie")
#     reversed_predictions = reverse_output_lists(predictions)
    # predictions, labels, prompts = teacher.evaluate_few_shot(data[:100], "Identify two entities in the Input. If they belong to the same time period, the answer is No. Otherwise, the answer is Yes.", engine="curie")
    em_runs.append(teacher.exact_match(labels, predictions))
print("Mean:", np.mean(em_runs))
print("Variance:", np.std(em_runs))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:53<00:00,  1.87it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:59<00:00,  1.68it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:52<00:00,  1.91it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:55<00:00,  1.79it/s]
100%|███████████████████████████████████████████████████████████████████

Mean: 0.51
Variance: 0.02280350850198278





### Anachronism Program

In [25]:
# def program_anachronism(data, engine):


task = AnachronismsBB(name = "bigbench:anachronisms.mul.t5_default_vocab.0_shot.all_examples",
                      seq_length=1024)
teacher = TeachBigBenchTask(task)
data = teacher.get_task_data(few=True)
data = data[:10]

instances = [ex['input'].split("\n\n")[-1].split("\n")[0] for ex in data]
labels = [ex['label'] for ex in data]


# Identify Entities
indentify_entities_prompt = """Identify the main entities in the sentence. These are typically popular people, products, places or objects.

Sentence: Homo neanderthalensis consumed meat as a main staple of their diet.
Entities: Homo neanderthalensis

Sentence: Charlamagne and the unrest under his rule is credited with the creation of Encarta, internet for the masses.
Entities: Charlamagne & Encarta & internet

Sentence: Alexander the Great received tutelage from Seneca the Elder, a respected philosopher.
Entities: Alexander the Great & Seneca the Elder

Sentence: Beats from the MPC3000 helped inspire many modern hip hop artists.
Entities: MPC3000 & hip hop artists

Sentence: Tycho Brahe's favorite Game Boy game was Super Mario Land.
Entities: Tycho Brahe &  Game Boy & Super Mario Land

Sentence: """

print("Findng entities")
prompts, entity_outputs = [], []
for ex in tqdm(data):
    instance = ex['input'].split("\n\n")[-1].split("\n")[0]
    node_prompt = indentify_entities_prompt + instance.strip() + "\nEntities:"
    request_output = request(node_prompt, n=5, engine='davinci')
    output = None
    for o in request_output:
        entities  = o.split('&')
        present = all([ent in instance for ent in entities])
        if present:
            output = o
    if not output:
        output = request_output[0]
    entity_outputs.append(output)
    prompts.append(node_prompt)


# Process nodes : Separate entities by ampersand
print("Processing entities")
def process_entities(inputs):
    outputs = []
    for item in inputs:
        entities = item.split("&")
        new_prompt = ", ".join(entities)
        outputs.append(new_prompt)
    return outputs

inputs_to_date = process_entities(entity_outputs)

# Compare entities
compare_prompt = """Here are a list of entities - a bunch of popular persons, locations or objects. Are these entities contemporary?

Entities: Charles Babbage, Haskell
No

Entities: Lewis Hamilton, the Queen
Yes

Entities: International Atomic Energy Agency, President Woodrow Wilson
No

Entities: Abraham Lincoln, Charles Darwin
Yes

Entities: Sally Hemings, HBO's True Detective
No

Entities: Charlamagne, Internet
No

Entities: Utahraptor, Iguanodon
Yes

Entities"""
prompts, contemporary_outputs = [], []
print("Checking if entities are contemporaneous")
for instance in tqdm(inputs_to_date):
    node_prompt = compare_prompt + instance.strip() + "\n"
    request_output = request(node_prompt, n=5, engine='davinci')
    output = None
    for o in request_output:
        if "yes" in o.lower() or 'no' in o.lower():
            output = o
    if not output:
        if len(request_output):
            output = request_output[0]
        else:
            output = "Cannot tell."
    contemporary_outputs.append(output)
    prompts.append(node_prompt)

print("Reversing label for Anachronism")
def reverse_outputs(inputs):
    outputs = []
    for item in inputs:
        if "No" in item:
            outputs.append([item.replace("No", "Yes")])
        elif "Yes" in item:
            outputs.append([item.replace("Yes", "No")])
        else:
            outputs.append([item])
    return outputs

reversed_predictions = reverse_outputs(contemporary_outputs)
teacher.exact_match(labels, reversed_predictions)
# return teacher.exact_match(labels, reversed_predictions)


# em_runs = []
# for run in range(1):
#     em_runs.append(program_anachronism(data[:10], 'davinci'))
# print("Mean:", np.mean(em_runs))
# print("Variance:", np.std(em_runs))



Findng entities


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.32it/s]


Processing entities
Checking if entities are contemporaneous


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:12<00:00,  1.29s/it]

Reversing label for Anachronism





0.3

### Anachronism Program by Marco

In [None]:

task = AnachronismsBB(name = "bigbench:anachronisms.mul.t5_default_vocab.0_shot.all_examples",
                      seq_length=1024)
teacher = TeachBigBenchTask(task)
data = teacher.get_task_data(few=True)

instances = [ex['input'].split("\n\n")[-1].split("\n")[0] for ex in data]
labels = [ex['label'] for ex in data]


# Identify Entities
indentify_entities_prompt = """Identify the main entities in the sentence. These are typically popular people, products, places or objects.

Sentence: Homo neanderthalensis consumed meat as a main staple of their diet.
Entities: Homo neanderthalensis

Sentence: Charlamagne and the unrest under his rule is credited with the creation of Encarta, internet for the masses.
Entities: Charlamagne & Encarta & internet

Sentence: Alexander the Great received tutelage from Seneca the Elder, a respected philosopher.
Entities: Alexander the Great & Seneca the Elder

Sentence: Beats from the MPC3000 helped inspire many modern hip hop artists.
Entities: MPC3000 & hip hop artists

Sentence: Tycho Brahe's favorite Game Boy game was Super Mario Land.
Entities: Tycho Brahe &  Game Boy & Super Mario Land

Sentence: """

print("Findng entities")
prompts, entity_outputs = [], []
for ex in tqdm(data):
    instance = ex['input'].split("\n\n")[-1].split("\n")[0]
    node_prompt = indentify_entities_prompt + instance.strip() + "\nEntities:"
    request_output = request(node_prompt, n=5, engine='davinci')
    output = None
    for o in request_output:
        entities  = o.split('&')
        present = all([ent in instance for ent in entities])
        if present:
            output = o
    if not output:
        output = request_output[0]
    entity_outputs.append(output)
    prompts.append(node_prompt)


# Compare entities
century_prompt = """Given entities, give me their centuries.

Charles Babbage & Haskell
18th century & 20th century

Lewis Hamilton & the Queen
21st century & 20th century

International Atomic Energy Agency & President Woodrow Wilson
21st century & 20th century

Abraham Lincoln & Charles Darwin
19th century & 19th century

Sally Hemings & HBO's True Detective
20th century & 21st century

Internet & Charlemagne
20th century & 9th century

Machine Learning & The queen of england
21st century & 16th century

Norah Fatehi
21st century


"""
prompts, century_outputs = [], []
print("Output centuries for entities")
for instance in tqdm(entity_outputs):
    node_prompt = century_prompt + instance.strip() + "\n"
    request_output = request(node_prompt, n=5, engine='davinci')
    output = None
    for o in request_output:
        if "yes" in o.lower() or 'no' in o.lower():
            output = o
    if not output:
        if len(request_output):
            output = request_output[0]
        else:
            output = "Cannot tell."
    century_outputs.append(output)
    prompts.append(node_prompt)

print("Compare centuries to output Anachronism")
compare_prompt="""
18th century & 20th century
Yes

21st century & 20th century
Yes

19th century & 19th century
No

20th century & 9th century
Yes

18th century & 18th century
No


"""

prompts, anachronism_outputs = [], []
for instance in tqdm(century_outputs):
    node_prompt = compare_prompt + instance.strip() + "\n"
    request_output = request(node_prompt, n=5, engine='davinci')
    output = None
    for o in request_output:
        if "yes" in o.lower() or 'no' in o.lower():
            output = o
    if not output:
        if len(request_output):
            output = request_output[0]
        else:
            output = "Cannot tell."
    anachronism_outputs.append(output)
    prompts.append(node_prompt)

teacher.exact_match(labels, anachronism_outputs)


# em_runs = []
# for run in range(1):
#     em_runs.append(program_anachronism(data[:10], 'davinci'))
# print("Mean:", np.mean(em_runs))
# print("Variance:", np.std(em_runs))



Findng entities


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.17it/s]


Output centuries for entities


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.21it/s]


Compare centuries to output Anachronism


 40%|██████████████████████████████████████                                                         | 4/10 [00:01<00:02,  2.09it/s]

In [27]:
entity_outputs

['The author & Queen Elizabeth II',
 'Lewis Hamilton & Queen',
 'Ottoman Empire & WTO & Covid19',
 'The Student & Tlahuicole',
 'mammoth & Shih Tzu',
 'The pet chihuahua & Pangea',
 'Igor Stravinsky & the Symphonie Fantastique',
 'Otto von Bismarck',
 'Cold War',
 'The sun']

In [30]:
century_outputs

['2nd century BC & 3rd century AD',
 '20th century & 21st century',
 '11th century',
 'Morning Star & Night Star',
 '1st century',
 '18th century',
 '21st century & 21st century',
 '22st century & 20th century',
 '20th century & 18th century',
 '20th century & 19th century']

In [31]:
anachronism_outputs

['No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No', 'No']

In [514]:
for i in range(10):
    print(data[i]['input'])
    print()
    print(data[i]['label'])
    print()
    print(entity_outputs[i])
    print()
    print(reversed_predictions[i])
    print()

Marco Polo commissioned Leonardo Da Vinci for a portrait of Kublai Khan.
Does the preceding sentence contain non-contemporaneous (anachronistic) elements?
Yes

Abraham Lincoln and Charles Darwin kept a regular correspondence.
Does the preceding sentence contain non-contemporaneous (anachronistic) elements?
No

Eric the Red is credited with establishing the first permanent settlements in Greenland.
Does the preceding sentence contain non-contemporaneous (anachronistic) elements?
No

King Richard the Lionheart led the English armies in the Third Crusade.
Does the preceding sentence contain non-contemporaneous (anachronistic) elements?

No

King Richard the Lionheart & the English armies

['No']

Genghis Khan built the satellite which captured the first image of the Great Wall of China in space.
Does the preceding sentence contain non-contemporaneous (anachronistic) elements?
Yes

The first ever movie in color depicted the life of Sacagawea.
Does the preceding sentence contain non-contemp

In [371]:
# Identify Entities
indentify_entities_prompt = """Identify the main entities in the sentence. These are typically popular people, products, places or objects.

Sentence: Homo neanderthalensis consumed meat as a main staple of their diet.
Entities: Homo neanderthalensis

Sentence: Charlamagne and the unrest under his rule is credited with the creation of Encarta, internet for the masses.
Entities: Charlamagne & Encarta & internet

Sentence: Alexander the Great received tutelage from Seneca the Elder, a respected philosopher.
Entities: Alexander the Great & Seneca the Elder

Sentence: Beats from the MPC3000 helped inspire many modern hip hop artists.
Entities: MPC3000 & hip hop artists

Sentence: Tycho Brahe's favorite Game Boy game was Super Mario Land.
Entities: Tycho Brahe &  Game Boy & Super Mario Land

Sentence: """
prompts, entity_outputs = [], []
for ex in errors:
    instance = ex['input'].split("\n\n")[-1].split("\n")[0]
    node_prompt = indentify_entities_prompt + instance.strip() + "\nEntities:"
    request_output = request(node_prompt, n=5, engine='curie')
    output = None
    for o in request_output:
        entities  = o.split('&')
        present = all([ent in instance for ent in entities])
        if present:
            output = o
    if not output:
        output = request_output[0]
    entity_outputs.append(output)
    prompts.append(node_prompt)

In [372]:
for i in range(len(errors[:5])):
    print(entity_outputs[i])
    print(instances[i] + "\n")

Triceratops fossil & pet dog
The pet dog was barking loudly at the triceratops fossil outside.

Southern U.S. & slaves & U.S. Constitution
Virtually all of the Southern U.S. slaves were aware of what the 13th amendment to the U.S. Constitution meant for them.

Queen Himiko & Yayoi silk robes
Queen Himiko showed off her elegant Yayoi silk robes to her female attendants after finishing her mystical rituals at the shrine.

Claude Monet & the Renaissance
Claude Monet participated in the Renaissance.

Dr. Phil & cholera
Dr. Phil is credited with the discovery of cholera.



In [373]:
# Process nodes : Separate entities by ampersand
def process_entities(inputs):
    outputs = []
    for item in inputs:
        entities = item.split("&")
        new_prompt = ", ".join(entities)
        outputs.append(new_prompt)
    return outputs
        
inputs_to_date = process_entities(entity_outputs)

In [376]:
# Compare entities
compare_prompt = """Here are a list of entities - a bunch of popular persons, locations or objects. Are these entities contemporary?

Entities: Charles Babbage, Haskell
No

Entities: Lewis Hamilton, the Queen
Yes

Entities: International Atomic Energy Agency, President Woodrow Wilson
No

Entities: Abraham Lincoln, Charles Darwin
Yes

Entities: Sally Hemings, HBO's True Detective
No

Entities: Charlamagne, Internet
No

Entities: Utahraptor, Iguanodon
Yes

Entities"""
prompts, contemporary_outputs = [], []
for instance in inputs_to_date:
    node_prompt = compare_prompt + instance.strip() + "\n"
    request_output = request(node_prompt, n=5, engine='curie')
    output = None
    for o in request_output:
        if "yes" in o.lower() or 'no' in o.lower():
            output = o
    if not output:
        output = request_output[0]
    contemporary_outputs.append(output)
    prompts.append(node_prompt)

In [378]:
def reverse_outputs(inputs):
    outputs = []
    for item in inputs:
        if "No" in item:
            outputs.append([item.replace("No", "Yes")])
        elif "Yes" in item:
            outputs.append([item.replace("Yes", "No")])
        else:
            outputs.append([item])
    return outputs
reversed_predictions = reverse_outputs(contemporary_outputs)
print(teacher.exact_match(labels, reversed_predictions))

0.4


In [207]:
# Retreive dates (range is better but that can be harder to retrieve)
date_entities_prompt = """When was this entitity born, created or invented?

Entitity: Alexander the Great
Answer: July 356 BC

Entitity: Magna Carta
Answer: 15 June 1215
Year: """

## Disfluent Question answering

In [439]:
task = DisfluentQABB(name = "bigbench:disfl_qa.gen.t5_default_vocab.0_shot.all_examples",
                      seq_length=1024)
teacher = TeachBigBenchTask(task)
# data = teacher.get_task_data(few=True)

In [445]:
em_runs = []
for run in range(1):
    predictions, labels, prompts = teacher.evaluate_few_shot(data[:100], "", engine="curie")
#     predictions, labels, prompts = teacher.evaluate_few_shot(data[:100], "To answer the questions, make the question fluent and select the part that entails the question.", engine="curie")
    em_runs.append(teacher.substring_match(labels, predictions))
print("Mean:", np.mean(em_runs))
print("Variance:", np.std(em_runs))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.11it/s]

Mean: 0.07
Variance: 0.0





In [450]:
errors = teacher.get_teaching_data(data[:100], predictions)
for err in errors[:10]:
    print(err[0]['input'])

Question: What is the lower canal regulation of the Dornbirner Ach or uh the Rhine ?  
context: A regulation of the Rhine was called for , with an upper canal near Diepoldsau and a lower canal at Fußach , in order to counteract the constant flooding and strong sedimentation in the western Rhine Delta . The Dornbirner Ach had to be diverted , too , and it now flows parallel to the canalized Rhine into the lake . Its water has a darker color than the Rhine ; the latter ' s lighter suspended load comes from higher up the mountains . It is expected that the continuous input of sediment into the lake will silt up the lake . This has already happened to the former Lake Tuggenersee . 
Answer: Fußach

Question: When damaged no sorry for most organisms , what is the dominant system of defense ?  
context: Microorganisms or toxins that successfully enter an organism encounter the cells and mechanisms of the innate immune system . The innate response is usually triggered when microbes are identif

### Disfluency program

In [487]:
instances = [ex['input'].split("\n\n")[-1] for ex in data[:10]]
labels = [ex['label'] for ex in data[:10]]

# Separate Question and Context from instance
def separate(instances):
    questions, contexts = [], []
    for ex in instances:
        question, context, _ = ex.split("\n")
        questions.append(question)
        contexts.append(context)
    return questions, contexts

questions, contexts = separate(instances)

# Make question fluent.
fluent_question_prompt = """The following questions is not fluent because the speaker changes their intention midway. Make the question fluent. Remove disfluent words like 'uh' and 'no'.

Question: What is the lower canal regulation of the Dornbirner Ach or uh the Rhine ?
Fluent Question: What is the lower canal regulation of the Rhine?

Question: Some elements of the Brotherhood directed what action against Al - Banna oh I mean the government ? 
Fluent Question: Some elements of the Brotherhood directed what action against the government ? 

Question: What did or like instead when did the Court of Justice rule that the Commission could only propose that there must be some criminal sanctions ?
Fluent Question: When did the Court of Justice rule that the Commission could only propose that there must be some criminal sanctions ?

Question: The Huguenots were the first Europeans to live in what modern Canadian no New York borough ?
Fluent Question: The Huguenots were the first Europeans to live in what modern New York borough ?

Question: Who financed the original physical campus , uh pardon me , th board of trustees ?
Fluent Question: Who financed the original board of trustees?

Question: Can any body sorry no no Who may introduce new laws or amendments to laws already on the books as a bill ?
Fluent Question: Who may introduce new laws or amendments to laws already on the books as a bill ?

Question: Where rather when did economists reach a conclusion with the S & P ' s rating agency ?
Fluent Question: When did economists reach a conclusion with the S & P ' s rating agency ?

"""

print("Making Questions Fluent")
prompts, fluent_outputs = [], []
for question in tqdm(questions):
    instance = ex['input'].split("\n\n")[-1].split("\n")[0]
    node_prompt = fluent_question_prompt + question.strip() + "\nFluent Question: "
    request_output = request(node_prompt, n=5, engine='curie')
#     output = request_output[0]
    if len(request_output):
        output = request_output[0]
    else:
        output = question.replace("Question: ", "")
    fluent_outputs.append(output)
    prompts.append(node_prompt)

Making Questions Fluent


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.11it/s]


In [488]:
for i in range(10):
    print(questions[i])
    print(fluent_outputs[i])

Question: How many direct uh I think their called uh general questions are available to opposition leaders ?  
How many general questions are available to opposition leaders?
Question: Where rather when did economists reach a conclusion with the S & P ' s rating agency ?  
Where rather when did economists reach a conclusion with the S & P ' s rating agency ?  
Question: At what temperature do weak and radioactivity , uh electromagnetic forces , rather , appear the same ?  
At what temperature do weak and radioactivity , uh electromagnetic forces , rather , appear the same ?  
Question: Can any body sorry no no Who may introduce new laws or amendments to laws already on the books as a bill ?  
Can any body sorry no no Who may introduce new laws or amendments to laws already on the books as a bill ?  
Question: What cells er uh I guess hormones are produced mostly when awake ?  
What cells produce hormones in most frequency while awake?
Question: What is the goal of widespread change no 

In [425]:
def program_disfluency(data):
    instances = [ex['input'].split("\n\n")[-1] for ex in data]
    labels = [ex['label'] for ex in data]

    # Separate Question and Context from instance
    def separate(instances):
        questions, contexts = [], []
        for ex in instances:
            question, context, _ = ex.split("\n")
            questions.append(questions)
            contexts.append(context)
            
    questions = separate(instances)
    
    # Make question fluent.
    fluent_question_prompt = """The following questions is not fluent because the speaker changes their intention midway. Disambiguate the question and make it fluent.
    
    Question: What is the lower canal regulation of the Dornbirner Ach or uh the Rhine ?
    Fluent Question: What is the lower canal regulation of the Rhine?
    
    Question: What did or like instead when did the Court of Justice rule that the Commission could only propose that there must be some criminal sanctions ?
    Fluent Question: When did the Court of Justice rule that the Commission could only propose that there must be some criminal sanctions ?
    
    Question: The Huguenots were the first Europeans to live in what modern Canadian no New York borough ?
    Fluent Question: The Huguenots were the first Europeans to live in what modern New York borough ?
    
    Question: Where rather when did economists reach a conclusion with the S & P ' s rating agency ?
    Fluent Question: When did economists reach a conclusion with the S & P ' s rating agency ?
    
    Question: """
    
    print("Making Questions Fluent")
    prompts, fluent_outputs = [], []
    for question in tqdm(questions):
        instance = ex['input'].split("\n\n")[-1].split("\n")[0]
        node_prompt = fluent_question_prompt + question.strip() + "\nFluent Question: "
        request_output = request(node_prompt, n=5, engine='curie')
        output = request_output[0]
        fluent_outputs.append(output)
        prompts.append(node_prompt)
    
    new_data = []
    def combine_instances(new_questions, instances, data):
        pass

    # Find context that entails questions.
    # Answer fluent question given smaller context.

em_runs = []
for run in range(10):
    em_runs.append(program_anachronism(data[:100]))
print("Mean:", np.mean(em_runs))
print("Variance:", np.std(em_runs))

True

In [386]:
class Program:
    def __init__(most_list):
        self.node_list = nose_list
        


class Node:
    def __init__():
        pass


class LMNode(Node):
    def __init__(input_prompr):
        pass
    
class TeachTaskDecomposition(TeachArbitraryTask):
    def __init__(self, task, **kwargs):
        super().__init__()
        
    def execute_program(program):
        pass

### One-node task-decomposition 

### Multi-node task-decomposition

### Big Bench tasks
* Common Morpheme
* Anachronisms 
* SST-2