<a href="https://colab.research.google.com/github/G0nkly/pytorch_sandbox/blob/main/gpts/finetuning/TinyLlama_gsm8k_eval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
import os
import math
import torch
from torch.utils.data import DataLoader
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, default_data_collator
from peft import PeftModel

In [41]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
adapter_path = "/home/bonda/tinyllama-lora-tuned-adapther-math"

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
).eval()

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

tmp_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tuned_model = PeftModel.from_pretrained(tmp_model, adapter_path)
tuned_model = tuned_model.merge_and_unload().eval()

In [42]:
def tokenize(batch):
  texts = [
      f"### Instruction:\n{instruction}\n### Response:\n{out}"
      for instruction, out in zip(batch['question'], batch['answer'])
  ]

  tokens = tokenizer(
      texts,
      padding="max_length",
      max_length=256,
      truncation=True,
      return_tensors="pt"
  )

  tokens["labels"] = tokens["input_ids"].clone()

  return tokens

In [43]:
eval_ds = load_dataset("openai/gsm8k", "main", split="train[200:300]")
eval_ds = eval_ds.map(tokenize, batched=True, remove_columns=["question", "answer"])
eval_ds = eval_ds.with_format("torch")

In [44]:
eval_loader = DataLoader(
    eval_ds,
    batch_size=8,
    collate_fn=default_data_collator
)

In [45]:
@torch.no_grad()
def compute_perplexity(model):
  losses = []

  for batch in eval_loader:
    batch = {k: v.to("cuda") for k,v in batch.items()}
    loss = model(**batch).loss
    losses.append(loss.item())

  return math.exp(sum(losses) / len(losses))

In [46]:
print(f"Base Model Perplexity: {compute_perplexity(base_model):.2f}")
print(f"Tuned Model Perplexity: {compute_perplexity(tuned_model):.2f}")

Base Model Perplexity: 229.65
Tuned Model Perplexity: 3.77


In [48]:
import random
raw_data = load_dataset("openai/gsm8k", "main", split="train[200:300]")
refs = raw_data["answer"]

def generate(model, instruction):
  token_ids = tokenizer(f"### Instruction:\n{instruction}\n### Response:\n", return_tensors="pt").input_ids.to("cuda")
  with torch.no_grad():
    out = model.generate(token_ids, max_new_tokens=256)

  return tokenizer.decode(out[0], skip_special_tokens=True)

In [49]:
raw_data["question"][0]

'Sansa is a famous artist, she can draw a portrait and sell it according to its size. She sells an 8-inch portrait for $5, and a 16-inch portrait for twice the price of the 8-inch portrait. If she sells three 8-inch portraits and five 16-inch portraits per day, how many does she earns every 3 days?'

In [50]:
refs[0]

'Sansa earns $5 x 3 = $<<5*3=15>>15 every day by selling three 8-inch portraits.\nThe price of the 16-inch portrait is $5 x 2 = $<<5*2=10>>10 each.\nSo, she earns $10 x 5 = $<<10*5=50>>50 every day by selling five 16-inch portraits.\nHer total earnings is $50 + $15 = $<<50+15=65>>65 every day.\nTherefore, the total amount she earns after 3 days is $65 x 3 = $<<65*3=195>>195.\n#### 195'

In [51]:
generate(base_model, raw_data["question"][0])

'### Instruction:\nSansa is a famous artist, she can draw a portrait and sell it according to its size. She sells an 8-inch portrait for $5, and a 16-inch portrait for twice the price of the 8-inch portrait. If she sells three 8-inch portraits and five 16-inch portraits per day, how many does she earns every 3 days?\n### Response:\nSansa earns $100 per day, which means she earns $300 per week, and $1,200 per month, and $5,000 per year.'

In [52]:
generate(tuned_model, raw_data["question"][0])

'### Instruction:\nSansa is a famous artist, she can draw a portrait and sell it according to its size. She sells an 8-inch portrait for $5, and a 16-inch portrait for twice the price of the 8-inch portrait. If she sells three 8-inch portraits and five 16-inch portraits per day, how many does she earns every 3 days?\n### Response:\nShe draws 8+16= <<8+16=24>>24 portraits per day\nShe sells them 5x24 = $ <<5*24=120>>120\nShe earns 120/3=$ <<120/3=40>>40 every three days\n#### 40'