In [10]:
import torch
import transformers
import time
import numpy as np
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from datasets import load_dataset
import pandas as pd

device = torch.device("cpu")

model_path = "../ML/LLM-s/test_silu/checkpoint-12500"
model = GPT2LMHeadModel.from_pretrained(model_path).to(device)
tokenizer = GPT2Tokenizer.from_pretrained(model_path)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

model.eval()

dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
prompts = [t for t in dataset["text"][:1000] if len(t.strip()) > 10]

# Perplexity measuring
ppls = []
loss_fct = torch.nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)

for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=128)
    input_ids = inputs["input_ids"]

    if input_ids.size(1) < 2:
        continue

    with torch.no_grad():
        outputs = model(**inputs)
        shift_logits = outputs.logits[..., :-1, :].contiguous()
        shift_labels = input_ids[..., 1:].contiguous()

        loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)),
                        shift_labels.view(-1))
        ppl = torch.exp(loss).item()
        ppls.append(ppl)

mean_ppl = np.mean(ppls) if ppls else float("nan")

# speed measure
num_iterations = 100
num_repeats = 5
times = []

inputs = tokenizer("This is a speed test.", return_tensors="pt").to(device)
for _ in range(num_repeats):
    start = time.time()
    with torch.no_grad():
        for _ in range(num_iterations):
            outputs = model(**inputs)
    end = time.time()
    times.append(end - start)

mean_time = np.mean(times)
std_time = np.std(times)
latency = mean_time / num_iterations
num_tokens = inputs["input_ids"].numel() * num_iterations
tokens_per_sec = num_tokens / mean_time

# Results in csv:
results = pd.DataFrame([{
    "Model": model_path,
    "Device": "CPU",
    "Perplexity": mean_ppl,
    "Inference Time (s)": mean_time,
    "Std Dev (s)": std_time,
    "Latency (ms)": latency * 1000,
    "Throughput (tokens/sec)": tokens_per_sec
}])

results.to_csv("../ML/Metrics/Silu_metrics_cpu.csv", index=False)