In [2]:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast

device = "mps"
model_id = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
tokenizer = GPT2TokenizerFast.from_pretrained(model_id)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from datasets import load_dataset

test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")

Found cached dataset wikitext (/Users/kevinblin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126)


KeyboardInterrupt: 

In [None]:
test

Dataset({
    features: ['text'],
    num_rows: 4358
})

In [None]:
import torch
from tqdm import tqdm

max_length = model.config.n_positions
stride = 512
seq_len = encodings.input_ids.size(1)

nlls = []
prev_end_loc = 0
for begin_loc in tqdm(range(0, seq_len, stride)):
    end_loc = min(begin_loc + max_length, seq_len)
    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
    target_ids = input_ids.clone()
    target_ids[:, :-trg_len] = -100

    with torch.no_grad():
        outputs = model(input_ids, labels=target_ids)

        # loss is calculated using CrossEntropyLoss which averages over input tokens.
        # Multiply it with trg_len to get the summation instead of average.
        # We will take average over all the tokens to get the true average
        # in the last step of this example.
        neg_log_likelihood = outputs.loss * trg_len

    nlls.append(neg_log_likelihood)

    prev_end_loc = end_loc
    if end_loc == seq_len:
        break

ppl = torch.exp(torch.stack(nlls).sum() / end_loc)

100%|█████████▉| 560/562 [02:24<00:00,  3.87it/s]


In [None]:
ppl

  nonzero_finite_vals = torch.masked_select(


tensor(5.0490, device='mps:0')

In [4]:
import evaluate
from tqdm import tqdm

perplexity = evaluate.load("perplexity", module_type="metric")
predictions = ["What is the 2nd root of 343?"]
results = perplexity.compute(predictions=predictions, model_id='gpt2')

Using pad_token, but it is not set yet.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:00<00:00, 17.39it/s]


In [5]:
perplexity =  evaluate.load("perplexity", module_type="metric")
input_texts = ["lorem ipsum Wanna Bonjour cane"]
results = perplexity.compute(model_id='gpt2',
                             add_start_token=False,
                             predictions=input_texts)
print(list(results.keys()))
print(round(results["mean_perplexity"], 2))
print(round(results["perplexities"][0], 2))
results

Using pad_token, but it is not set yet.
100%|██████████| 1/1 [00:00<00:00, 28.27it/s]

['perplexities', 'mean_perplexity']
438.12
438.12





{'perplexities': [438.1231384277344], 'mean_perplexity': 438.1231384277344}

In [16]:
## Perplexity from a dataset
perplexity = evaluate.load("perplexity", module_type= "measurement")
input_texts = load_dataset("wikitext", "wikitext-2-raw-v1",
                                    split="test")["text"][:50]
input_texts = [s for s in input_texts if s!='']
results = perplexity.compute(model_id='gpt2',
                             data=input_texts)
print(list(results.keys()))
print(round(results["mean_perplexity"], 2))
print(round(results["perplexities"][0], 2))

Found cached dataset wikitext (/Users/kevinblin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126)
Using pad_token, but it is not set yet.
100%|██████████| 2/2 [00:00<00:00,  6.33it/s]

['perplexities', 'mean_perplexity']
489.41
567.91





In [10]:
## Perplexity from a dataset
perplexity = evaluate.load("perplexity", module_type= "measurement")
input_texts = load_dataset("wikitext",
                                    "wikitext-2-raw-v1",
                                    split="test")["text"][:50]
input_texts = [s for s in input_texts if s!='']
results = perplexity.compute(model_id='gpt2',
                             data=input_texts)
print(list(results.keys()))
print(round(results["mean_perplexity"], 2))
print(round(results["perplexities"][0], 2))

Found cached dataset wikitext (/Users/kevinblin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126)
Using pad_token, but it is not set yet.
100%|██████████| 2/2 [00:00<00:00,  6.59it/s]

['perplexities', 'mean_perplexity']
489.41
567.91





In [21]:
with open('sentences.txt', 'r') as file:
    lines = file.readlines()
    strings = [line.strip() for line in lines]

print(strings)


['What is the 2nd root of 343?\n', 'What is the square root of 343?']
