In [None]:
# Call library 

In [None]:

import torch
import evaluate 
import numpy as np
from transformers import T5Tokenizer, T5ForConditionalGeneration, get_scheduler
from torch.utils.data import DataLoader, random_split
from torch.optim import AdamW
from utils import read_json, collote_fn, MAX_TARGET_LENGTH
from dataset import MengziT5Dataset
from pathlib import Path
from tqdm import tqdm 
from dotenv import load_dotenv 
load_dotenv()

checkpoint = "Langboat/mengzi-t5-base"

# Preprocess

In [None]:
DATA_DEV_PATH = "data/dev.json"

valid_data = read_json(DATA_DEV_PATH)
print("First valid data: ", valid_data[0])

valid_dataset = MengziT5Dataset(valid_data)

test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=valid_batch_size, collate_fn=lambda x: collote_fn(x, model, tokenizer))
test_data = next(iter(test_dataloader))
print("test input_ids: ", test_data['input_ids'])
print("test attention_mask: ", test_data['attention_mask'])
print("test decoder_input_ids: ", test_data['decoder_input_ids'])
print("test labels:", test_data['labels'])

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

best_model_name = "best_t5.pt"
foldername =  '???????????_ckpt'
checkpoint_path = Path(f"./checkpoint/{foldername}")
file_path = checkpoint_path / best_model_name

checkpoint = "Langboat/mengzi-t5-base"
model = T5ForConditionalGeneration.from_pretrained(checkpoint)
tokenizer = T5Tokenizer.from_pretrained(checkpoint)

model.load_state_dict(torch.load(file_path, weight_only=True))

In [None]:
def test_loop(dataloader, model, tokenizer):
    model.eval()
    bleu = evaluate.load("bleu")
    with tqdm(total=len(dataloader)) as pbar:
        with torch.no_grad():
            for batch_idx, batch_data in enumerate(dataloader, start=1):
                batch_data = batch_data.to(device)
                outputs = model.generate(
                    batch_data["input_ids"],
                    attention_mask=batch_data["attention_mask"],
                    max_new_token=MAX_TARGET_LENGTH,
                    num_beams=4
                    )
                decoded_outputs = tokenizer.batch_decode(
                    outputs,
                    skip_special_tokens=True
                    )
                labels = batch_data['labels']
                labels = torch.where(labels != -100, labels, tokenizer.pad_token_id)
                decoded_labels = tokenizer.batch_decode(
                    labels,
                    skip_special_tokens=True
                )

                preds = [' '.join(pred.strip()) for pred in decoded_outputs]
                labels = [' '.join(label.strip()) for label in decoded_labels]
            bleu_result = bleu.compute(predictions=preds, references=labels)
            result = {f"bleu-{i}" : value for i, value in enumerate(bleu_result["precisions"], start=1)}
            result['avg'] = np.mean(result.values())
            print(f"Test result: BLEU1={result["bleu-1"]}, BLEU2={result["bleu-2"]}, BLEU3={result["bleu-3"]}, BLEU4={result["bleu-4"]}")
            return result

In [None]:
test_loop(test_dataloader, model, tokenizer)