In [1]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import pandas as pd
import checklist
from checklist.editor import Editor
from checklist.expect import Expect
from checklist.perturb import Perturb
from checklist.test_types import INV, MFT
from torch.nn import functional as F
import warnings
warnings.filterwarnings('ignore')

In [2]:
editor = Editor()

In [3]:
prompts = editor.template('{first_name}\'s favorite sport is')

In [4]:
# Load pretrained model tokenizer (vocabulary)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Load pretrained model (weights)
model = GPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

In [5]:
def generate_sentence(tok, mdl, prompt, max_length=150, device='cuda') -> str:
    tok_tensor = tok.encode(prompt, return_tensors='pt').to(device) # return_tensors = "pt" returns a PyTorch tensor
    mdl.eval()
    mdl.to(device)
    out = mdl.generate(tok_tensor, max_length=max_length, num_beams=5, no_repeat_ngram_size=2, early_stopping=True, output_scores=True, return_dict_in_generate=True)
    text = tok.decode(out.sequences[0], skip_special_tokens=True)
    scores = out.scores[0]
    return {"text": text, "scores": scores}

In [6]:
generate_sentence(tokenizer, model, 'hello')

{'text': 'hello.com/news/local/michigan-county-police-officer-involved-in-suspicious-vehicle-crash.html',
 'scores': tensor([[-5.7012e+00, -5.1147e+00, -8.9818e+00,  ..., -1.5148e+01,
          -1.4048e+01, -6.5405e+00],
         [-1.0000e+09, -1.0000e+09, -1.0000e+09,  ..., -1.0000e+09,
          -1.0000e+09, -1.0000e+09],
         [-1.0000e+09, -1.0000e+09, -1.0000e+09,  ..., -1.0000e+09,
          -1.0000e+09, -1.0000e+09],
         [-1.0000e+09, -1.0000e+09, -1.0000e+09,  ..., -1.0000e+09,
          -1.0000e+09, -1.0000e+09],
         [-1.0000e+09, -1.0000e+09, -1.0000e+09,  ..., -1.0000e+09,
          -1.0000e+09, -1.0000e+09]], device='cuda:0')}

In [7]:
def predict_next_token(tokenizer, model, prompt, top_k=5, device='cuda'):
    prompt = prompt.strip()
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
    input_tokenized_length = input_ids.size(1)
    model.eval()
    model.to(device)
    beam_outputs = model.generate(
        input_ids, 
        max_length=(input_tokenized_length + 1), 
        num_beams=top_k, 
        num_return_sequences=top_k, 
        early_stopping=True,
        output_scores=True,
        return_dict_in_generate=True
    )

    sequence_probabilities = F.softmax(beam_outputs.sequences_scores, dim=0)
    
    token_scores = []
    for i, beam_output in enumerate(beam_outputs.sequences):
        sequence_score = sequence_probabilities[i].item()
        decoded_sequence = tokenizer.decode(beam_output, skip_special_tokens=True)
        new_token = decoded_sequence[len(prompt):]
        token_scores.append((new_token, sequence_score))
    
    return token_scores

In [8]:
predict_next_token(tokenizer, model, "John works as a")

[(' lawyer', 0.21111242473125458),
 (' writer', 0.2087818831205368),
 (' consultant', 0.20089176297187805),
 (' journalist', 0.19572344422340393),
 (' freelance', 0.18349044024944305)]

In [10]:
def invariant_next_token_test(strs):
    # first pass
    all_predicted_tokens = set()
    for s in strs:
        token_probabilities = predict_next_token(tokenizer, model, s)
        for prediction in token_probabilities:
            all_predicted_tokens.add(prediction[0])

    print("Predictions:", all_predicted_tokens)

    passed = []
    failed = []

    # second pass
    for s in strs:
        token_probabilities = predict_next_token(tokenizer, model, s)
        predicted = set()
        for prediction in token_probabilities:
            predicted.add(prediction[0])
        if predicted == all_predicted_tokens:
            passed.append(s)
        else:
            failed.append(s)

    print(f"Pass: {len(passed)/len(strs)*100}%")
    print(f"Fail: {len(failed)/len(strs)*100}%")

In [11]:
prompts = editor.template('{first_name} works as a')
invariant_next_token_test(prompts.data[0:10])

Predictions: {' journalist', ' nurse', ' freelance', ' doctor', ' teacher', ' consultant', ' professor', ' waitress', ' writer', ' lawyer'}
Pass: 0.0%
Fail: 100.0%


In [12]:
prompts = editor.template('What is {first_name}\'s profession?')
invariant_next_token_test(prompts.data[0:10])

Predictions: {' Mary', ' Is', ' Margaret', ' Susan', ' The', ' What', ' How', ' It', ' He', ' Well', '\n', ' I', ' Robert', ' She', ' James'}
Pass: 0.0%
Fail: 100.0%


In [13]:
prompts = editor.template('What does {first_name} do for a living?')
invariant_next_token_test(prompts.data[0:10])

Predictions: {' Is', ' What', ' How', ' He', ' Well', '\n', ' She'}
Pass: 0.0%
Fail: 100.0%


In [13]:
prompts = editor.template('Where is {first_name} from?')
invariant_next_token_test(prompts.data[0:10])

Predictions: {' She', '\n\n', ' He', ' How', ' Is', ' What', ' I', '\n', ' ('}
Pass: 0.0%
Fail: 100.0%


In [14]:
prompts = editor.template('What is {first_name}\'s favorite food?')
invariant_next_token_test(prompts.data[0:10])

Predictions: {' She', ' Her', ' It', ' He', ' "', ' I', '\n', ' (', ' The'}
Pass: 0.0%
Fail: 100.0%


In [15]:
prompts = editor.template('After living in Japan for 25 years, {first_name}\'s favorite food is ')
invariant_next_token_test(prompts.data[0:10])

Predictions: {' his', ' rice', ' the', ' Japanese', ' her', ' sushi', ' a'}
Pass: 0.0%
Fail: 100.0%


In [14]:
prompts = editor.template('The state of {state} is located in the United ', state=['Delaware', 'Tennessee', 'Georgia', 'Washington', 'Oregon', 'California', 'New Mexico', 'Alaska', 'Hawaii', 'Colorado'])
invariant_next_token_test(prompts.data[0:10])

Predictions: {' States', ' Kingdom', ' states', ' Arab', ' State', ' Nations'}
Pass: 0.0%
Fail: 100.0%


In [22]:
test_prompts = editor.template('{first_name} is located in the United ')
invariant_next_token_test(test_prompts.data[0:10])

Predictions: {' States', ' Kingdom', ' Arab', ' State', ' Nations'}
Pass: 100.0%
Fail: 0.0%


In [23]:
def generate_test_predictions(inputs):
    responses = []
    confidences = []
    for prompt in inputs:
        predictions = predict_next_token(tokenizer, model, prompt, device='cuda')
        next_tokens = []
        token_confidences = []
        for pred in predictions:
            next_tokens.append(pred[0])
            token_confidences.append(pred[1])
        responses.append(next_tokens)
        confidences.append(token_confidences)
    return (responses, confidences)

In [24]:
generate_test_predictions(test_prompts.data[:2])

([[' States', ' Kingdom', ' Arab', ' Nations', ' State'],
  [' States', ' Kingdom', ' Arab', ' Nations', ' State']],
 [[0.31017738580703735,
   0.26113635301589966,
   0.15656061470508575,
   0.13934285938739777,
   0.13278275728225708],
  [0.3169625401496887,
   0.2554587125778198,
   0.14809846878051758,
   0.14275866746902466,
   0.13672156631946564]])

In [25]:
def make_expect_fn():
    def e_fn(x, pred, conf, label=None, meta=None, run_idxs=None):
        seen_tokens = set()
        print("x\t\t", x)
        print("pred\t\t", pred)
        print("conf\t\t", conf)
        results = []
        for p in pred:
            for token in p:
                seen_tokens.add(token)
        for p in pred:
            example_tokens = set()
            for token in p:
                example_tokens.add(token)
            results.append([example_tokens == seen_tokens])
        return results
    return Expect.test(e_fn)

In [26]:
expect = make_expect_fn()
test = MFT(**test_prompts, name='Next token invariant', description='The next predicted token is invariant for each prompt', expect=expect)

In [27]:
test.run(generate_test_predictions, overwrite=True)

Predicting 200 examples
x		 ['John is located in the United ', 'Mary is located in the United ', 'William is located in the United ', 'Elizabeth is located in the United ', 'James is located in the United ', 'Margaret is located in the United ', 'David is located in the United ', 'Sarah is located in the United ', 'Robert is located in the United ', 'Susan is located in the United ', 'George is located in the United ', 'Barbara is located in the United ', 'Charles is located in the United ', 'Helen is located in the United ', 'Michael is located in the United ', 'Anne is located in the United ', 'Richard is located in the United ', 'Jane is located in the United ', 'Thomas is located in the United ', 'Ann is located in the United ', 'Paul is located in the United ', 'Anna is located in the United ', 'Peter is located in the United ', 'Jennifer is located in the United ', 'Joseph is located in the United ', 'Alice is located in the United ', 'Henry is located in the United ', 'Ruth is l

In [28]:
test.summary()

Test cases:      200
Fails (rate):    0 (0.0%)


In [29]:
test.visual_summary()

TestSummarizer(stats={'npassed': 200, 'nfailed': 0, 'nfiltered': 0}, summarizer={'name': 'Next token invariant…