In [3]:
# Check if GPU is available
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the pre-trained DialoGPT model
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium").to(device)

In [5]:
# load the dataset for fine-tuning
from datasets import load_dataset

# Load DailyDialog dataset
dataset = load_dataset("daily_dialog", split="validation", trust_remote_code=True)

In [6]:
# Load the tokenizer for DialoGPT
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
tokenizer.special_tokens_map

{'bos_token': '<|endoftext|>',
 'eos_token': '<|endoftext|>',
 'unk_token': '<|endoftext|>'}

In [7]:
tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
tokenizer.special_tokens_map

{'bos_token': '<|endoftext|>',
 'eos_token': '<|endoftext|>',
 'unk_token': '<|endoftext|>',
 'pad_token': '<|endoftext|>'}

In [8]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch
from tqdm import tqdm

def calculate_perplexity(model, tokenizer, dataset, max_context_length=400):
    model.eval()  # Set the model to evaluation mode
    total_log_likelihood = 0
    total_token_count = 0

    for dialog in tqdm(dataset, desc="Calculating Perplexity"):
        for i in range(len(dialog['dialog']) - 1):
            prompt = dialog['dialog'][i]
            response = dialog['dialog'][i + 1]

            # Concatenate prompt and response for language modeling
            input_text = prompt + tokenizer.eos_token + response + tokenizer.eos_token
            input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

            if input_ids.shape[1] > max_context_length:
                input_ids = input_ids[:, -max_context_length:]

            # Generate predictions for all tokens
            with torch.no_grad():
                outputs = model(input_ids, labels=input_ids)
                log_likelihood = outputs.loss * input_ids.shape[1]  # Total log-likelihood for the sequence

            total_log_likelihood += log_likelihood.item()
            total_token_count += input_ids.shape[1]

    avg_log_likelihood = total_log_likelihood / total_token_count
    perplexity = torch.exp(torch.tensor(avg_log_likelihood))

    return perplexity.item()

# Load the DailyDialog validation dataset
dataset = load_dataset("daily_dialog", split="validation")

# Load the fine-tuned model and tokenizer
model_name = "microsoft/DialoGPT-medium"
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Calculate perplexity
perplexity = calculate_perplexity(model, tokenizer, dataset)
print(f"Perplexity: {perplexity}")

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
Calculating Perplexity: 100%|██████████| 1000/1000 [01:34<00:00, 10.61it/s]

Perplexity: 103.19329071044922



