In [1]:
import functools
import json
import numpy as np
from colorama import Fore, Style

from bigbench.api import json_task
import bigbench.models.huggingface_models as huggingface_models
import bigbench.api.model as api_model


# Load models

In [2]:
print('Model Types: ', list(huggingface_models.MODEL_NAMES))

Model Types:  ['gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl', 'openai-gpt']


In [3]:
model_type = 'gpt2'
model = huggingface_models.BIGBenchHFModel(model_type)

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


# Load data and create JsonTask

In [4]:
with open('../data/task.json') as json_file:
    data = json.load(json_file)
    
current_task = json_task.JsonTask(
    task_data=data
)

# Quantitative Evaluation

multiple_choice_grade: Accuracy

low_score: Accuracy of random performance

high_score: Perfect Accuracy

In [5]:
score_data = current_task.evaluate_model(model)
score_data

evaluating deducing_implicit_relations for 0 shots...


[ScoreData(score_dict={'multiple_choice_grade': 0.0}, preferred_score='multiple_choice_grade', number_of_shots=0, low_score=0.27777777777777773, high_score=1.0, subtask_description='deducing_implicit_relations')]

# See predictions

In [6]:
def pretty_print_output(example, scores):
    print(example['input'] + "\n")
    probs = np.exp(scores)
    score_dict = {example['choice'][i]: probs[i] for i in range(len(probs))}
    print("Model_predictions:")
    for choice, score in score_dict.items():
        prob = round(score * 100, 2)
        label = example['target_scores'][choice]
        string = "{:<20} {}%".format(choice + ":", prob)
        if label == 1:
            color = f"{Fore.GREEN}" 
        else: 
            color = f"{Fore.BLACK}"
        print(color  + string + f"{Style.RESET_ALL}")
    print("\n")
    
def eval_example(model, example):
    scores = model.cond_log_prob(
        inputs = example['input'],
        targets = example['choice']
    )
    
    pretty_print_output(example, scores)

In [7]:
# See all predictions
for example in data['examples']:
    eval_example(model, example)

Q: "You are over-scrupulous, surely. I dare say Mr. Bingley will be very glad to see you; and I will send a few lines by you to assure him of my hearty consent to his marrying whichever he chooses of the girls; though I must throw in a good word for my little Lizzy." "I desire you will do no such thing. Lizzy is not a bit better than the others; and I am sure she is not half so handsome as Jane, nor half so good-humoured as Lydia. But you are always giving _her_ the preference." "They have none of them much to recommend them," replied he; "they are all silly and ignorant like other girls; but Lizzy has something more of quickness than her sisters." What is the relation between Jane Bennet and Elizabeth Bennet? option: cousin option: brother or sister option: child A: 

Model_predictions:
[32mbrother or sister:   14.51%[0m
[30mcousin:              41.53%[0m
[30mchild:               43.96%[0m


Q: "But consider your daughters. Only think what an establishment it would be for one of

In [8]:
sample = {
    "input" : "\"You are over-scrupulous, surely. I dare say Mr. Bingley will be very glad to see you; and I will send a few lines by you to assure him of my hearty consent to his marrying whichever he chooses of the girls; though I must throw in a good word for my little Lizzy.\" \"I desire you will do no such thing. Lizzy is not a bit better than the others; and I am sure she is not half so handsome as Jane, nor half so good-humoured as Lydia. But you are always giving _her_ the preference.\" \"They have none of them much to recommend them,\" replied he; \"they are all silly and ignorant like other girls; but Lizzy has something more of quickness than her sisters.\" What is the relation between Jane Bennet and Elizabeth Bennet?",
    "target_scores" : {
        "brother or sister" : 1,
        "cousin" : 0,
        "child" : 0
    }
}

# Prepare sample
input_prefix = current_task.task_data.get("example_input_prefix", "Q: ")
output_prefix = current_task.task_data.get("example_output_prefix", " A: ")
choice_prefix = current_task.task_data.get("choice_prefix", " choice: ")
append_choices_to_input = current_task.task_data.get("append_choices_to_input", True)

prepared_sample = json_task.default_format_fn(
                sample,
                input_prefix=input_prefix,
                output_prefix=output_prefix,
                choice_prefix=choice_prefix,
                append_choices_to_input=append_choices_to_input,
            )

eval_example(model, prepared_sample)

Q: "You are over-scrupulous, surely. I dare say Mr. Bingley will be very glad to see you; and I will send a few lines by you to assure him of my hearty consent to his marrying whichever he chooses of the girls; though I must throw in a good word for my little Lizzy." "I desire you will do no such thing. Lizzy is not a bit better than the others; and I am sure she is not half so handsome as Jane, nor half so good-humoured as Lydia. But you are always giving _her_ the preference." "They have none of them much to recommend them," replied he; "they are all silly and ignorant like other girls; but Lizzy has something more of quickness than her sisters." What is the relation between Jane Bennet and Elizabeth Bennet? option: brother or sister option: cousin option: child A: 

Model_predictions:
[32mbrother or sister:   41.39%[0m
[30mcousin:              18.53%[0m
[30mchild:               40.08%[0m


