In [1]:
import transformers
import torch
import datasets
from torch.utils.data import DataLoader
from functools import partial
from torch import nn
import dataclasses

import sys
sys.path.append('..')
import token_data
import micro_model

@dataclasses.dataclass
class TesterConfig:
    batch_size: int = 10
    max_seq_len: int = 1024
    device: str = "cuda" if torch.cuda.is_available() else "cpu"

class Tester:
    def __init__(self, model, dataset, tokenizer, loss, config: TesterConfig = None):
        self.tokenizer = tokenizer
        self.model = model
        self.dataset = dataset
        self.config = config if config is not None else TesterConfig()
        self.dataloader = DataLoader(self.dataset, batch_size=self.config.batch_size, num_workers=1, collate_fn=partial(
            token_data.data_collator, self.tokenizer, self.config.max_seq_len))
        self.model = self.model.to(self.config.device)
        self.model.eval()
        self.model.to(torch.bfloat16)
        self.loss = loss

    @torch.no_grad()
    def test(self):
        i = 0
        for batch in self.dataloader:
            batch["input_ids"] = batch["input_ids"].to(self.config.device)
            batch["labels"] = batch["labels"].to(self.config.device)
            i += 1
            logits = self.model.forward(batch["input_ids"])
            if not isinstance(logits, torch.Tensor):
                logits = logits.logits
            loss = self.loss(logits.view(-1, logits.size(-1)), batch["labels"].view(-1))
            print(f"Loss: {loss}")
            if i == 10:
                break

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
dataset = datasets.load_dataset(path="cerebras/SlimPajama-627B", split='test', trust_remote_code=True, streaming=True)

In [13]:
loss = nn.CrossEntropyLoss()

In [14]:

tokenizer = transformers.AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v0.1")
model = transformers.AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v0.1")

In [15]:
tester1 = Tester(model, dataset, tokenizer, loss)
tester1.test()

Loss: 3.0625
Loss: 2.484375
Loss: 3.453125
Loss: 3.328125
Loss: 2.84375
Loss: 2.953125
Loss: 3.515625
Loss: 2.65625
Loss: 3.28125
Loss: 3.03125


In [16]:
tokenizer = token_data.load_tokenizer()

model = micro_model.get_model()
ckpt = torch.load('/home/tl2020/train_micro/checkpoints/micro_29oct.ckpt')
model.load_state_dict({ k[6:]: v for k, v in ckpt["state_dict"].items() })
tester2 = Tester(model, dataset, tokenizer, loss)
tester2.test()

  ckpt = torch.load('/home/tl2020/train_micro/checkpoints/micro_29oct.ckpt')


Loss: 3.2799911499023438
Loss: 2.6166486740112305
Loss: 3.383049249649048
Loss: 3.2342450618743896
Loss: 3.2254295349121094
Loss: 3.1496567726135254
Loss: 2.8281991481781006
Loss: 2.8293678760528564
Loss: 3.2402546405792236
Loss: 2.7298648357391357
