# <u>Chapter 9</u>: Generating Text in Chatbots

In [3]:
import sys
import subprocess
import pkg_resources

# Find out which packages are missing.
installed_packages = {dist.key for dist in pkg_resources.working_set}
required_packages = {'torch', 'transformers'}
missing_packages = required_packages - installed_packages

# If there are missing packages install them.
if missing_packages:
    print('Installing the following packages: ' + str(missing_packages))
    python = sys.executable
    subprocess.check_call([python, '-m', 'pip', 'install', *missing_packages], stdout=subprocess.DEVNULL)

## Perplexity

In the code that follows, we measure the perplexity of the `gpt2` model using three datasets:

In [None]:
import torch 
from transformers import GPT2LMHeadModel, GPT2TokenizerFast

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the models.
model_name = "gpt2"

model = GPT2LMHeadModel.from_pretrained(model_name).to(device)
tokenizer = GPT2TokenizerFast.from_pretrained(model_name)

The perplexity calculation consists of various steps.

In [None]:
from tqdm import tqdm

# Calculate the perplexity of the model.
def calc_perplexity(encodings):

    max_length = model.config.n_positions
    #  Use at least 512 tokens for context.
    stride = 512

    nlls = []
    
    # Read the data using a sliding window for the context.
    for i in tqdm(range(0, encodings.input_ids.size(1), stride)):
        begin_loc = max(i + stride - max_length, 0)
        end_loc = min(i + stride, encodings.input_ids.size(1))
        trg_len = end_loc - i
        input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
        target_ids = input_ids.clone()
        target_ids[:, :-trg_len] = -100

        # Calculate the negative log likelihood.
        with torch.no_grad():
            outputs = model(input_ids, labels=target_ids)
            neg_log_likelihood = outputs[0] * trg_len

        # Negative log-likelihood stack.
        nlls.append(neg_log_likelihood)
    
    return torch.exp(torch.stack(nlls).sum() / end_loc).item()

It's time to evaluate the model on the three diverse datasets.

In [None]:
from datasets import load_dataset

# Load the dataset.
# The specific loading requires sevel minutes.
test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")

encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
print("The perplexity of the wikitext model: %.2f" % calc_perplexity(encodings))

# Load the dataset.
test = load_dataset("tiny_shakespeare", "default", split="test")

encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
print("The perplexity of the tiny_shakespeare model: %.2f" % calc_perplexity(encodings))

# Load the dataset.
test = load_dataset("iamholmes/tiny-imdb", "iamholmes--tiny-imdb", split="test")

encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
print("The perplexity of the tiny-imdb model is: %.2f" % calc_perplexity(encodings))