In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import sys

import torch
from transformers import (
    LlamaForCausalLM,
    LlamaTokenizer
)
from peft import PeftModel

# Data
A real dialogue between chat-bot and the user

In [3]:
data_path = '../logs/user_history_338217620.json'
model_path = 'meta-llama/Llama-2-7b-chat-hf'
adapter_weights_path = '../models/llama-chat-7b-lora-friendly-dialogue'
model_load_params = {
    "device_map": "auto",
    "load_in_8bit": True,
    "torch_dtype": torch.float16
}

In [4]:
with open(data_path, 'r') as fp:
    dialogue = list(json.load(fp).values())[0]['prompt']

# Perplexity

How confident is our model when generating such a dialogue

### Original model

In [5]:
model = LlamaForCausalLM.from_pretrained(
    model_path,
    **model_load_params,
)
tokenizer = LlamaTokenizer.from_pretrained(model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
input_ids = tokenizer.encode(dialogue, return_tensors="pt")

In [15]:
model.eval()
with torch.no_grad():
    loss = model(input_ids, labels=input_ids).loss
perplexity = torch.exp(loss.float())

In [17]:
print(f'Perplexity of the original model: {perplexity}')

Perplexity of the original model: 5.301388263702393


### LoRA fine-tuned model

In [19]:
peft_model = PeftModel.from_pretrained(
    model,
    adapter_weights_path,
    torch_dtype=torch.float16
)

In [20]:
model.eval()
with torch.no_grad():
    loss = peft_model(input_ids, labels=input_ids).loss
perplexity = torch.exp(loss.float())

In [21]:
print(f'Perplexity of slightly fine-tuned model: {perplexity}')

Perplexity of slightly fine-tuned model: 3.2949230670928955
