In [5]:
# Check if GPU is available
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [6]:
# Load the fine-tuned model and tokenizer

from transformers import AutoTokenizer, AutoModelForCausalLM
from pathlib import Path
path = Path("./models/fine-tuned-dialogpt_new")
#path.absolute()
tokenizer = AutoTokenizer.from_pretrained(path.absolute())
model = AutoModelForCausalLM.from_pretrained(path.absolute()).to(device)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
# load the dataset for fine-tuning
from datasets import load_dataset

# Load DailyDialog dataset
dataset = load_dataset("daily_dialog", split="validation", trust_remote_code=True)

In [8]:
from tqdm import tqdm

def calculate_perplexity(model, tokenizer, dataset, max_context_length=400):
    model.eval()  # Set the model to evaluation mode
    total_log_likelihood = 0
    total_token_count = 0

    for dialog in tqdm(dataset, desc="Calculating Perplexity"):
        for i in range(len(dialog['dialog']) - 1):
            prompt = dialog['dialog'][i]
            response = dialog['dialog'][i + 1]

            # Concatenate prompt and response for language modeling
            input_text = prompt + tokenizer.eos_token + response + tokenizer.eos_token
            input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

            if input_ids.shape[1] > max_context_length:
                input_ids = input_ids[:, -max_context_length:]

            # Generate predictions for all tokens
            with torch.no_grad():
                outputs = model(input_ids, labels=input_ids)
                log_likelihood = outputs.loss * input_ids.shape[1]  # Total log-likelihood for the sequence

            total_log_likelihood += log_likelihood.item()
            total_token_count += input_ids.shape[1]

    avg_log_likelihood = total_log_likelihood / total_token_count
    perplexity = torch.exp(torch.tensor(avg_log_likelihood))

    return perplexity.item()


# Calculate perplexity
perplexity = calculate_perplexity(model, tokenizer, dataset)
print(f"Perplexity: {perplexity}")

Calculating Perplexity: 100%|██████████| 1000/1000 [01:35<00:00, 10.47it/s]

Perplexity: 20.560758590698242



