<a href="https://colab.research.google.com/github/HamdanXI/nlp_adventure/blob/main/Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch



In [2]:
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer, DataCollatorForSeq2Seq, AutoModelForSeq2SeqLM
from transformers import DataCollatorForSeq2Seq
from torch.utils.data import DataLoader
import torch

# Load dataset and model
raw_datasets = load_dataset("aslg_pc12")
tokenizer = AutoTokenizer.from_pretrained("HamdanXI/test-trainer")
model = AutoModelForSeq2SeqLM.from_pretrained("HamdanXI/test-trainer")

raw_datasets = raw_datasets.rename_column("text", "labels")
raw_datasets = raw_datasets["train"].train_test_split(test_size=0.1)
test_data = raw_datasets['test']

# Tokenize
def tokenize_function(example):
    source = example["gloss"]
    target = example["labels"]

    tokenized_source = tokenizer(source, truncation=True, max_length=512, return_tensors="pt")
    tokenized_target = tokenizer(target, truncation=True, max_length=512, return_tensors="pt")

    return {
        "input_ids": tokenized_source["input_ids"][0],
        "attention_mask": tokenized_source["attention_mask"][0],
        "labels": tokenized_target["input_ids"][0]
    }

tokenized_test_data = test_data.map(tokenize_function)

# Use DataCollatorWithPadding to handle padding
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

test_loader = DataLoader(tokenized_test_data, batch_size=16, collate_fn=data_collator)

# Evaluate
model.eval()
predictions = []
with torch.no_grad():
    for data in test_loader:
        inputs = data['input_ids'].to(model.device)
        attention_mask = data['attention_mask'].to(model.device)

        output = model.generate(inputs, attention_mask=attention_mask)
        predictions.extend(output)

decoded_preds = [tokenizer.decode(pred, skip_special_tokens=True) for pred in predictions]

# Compute Metrics
bleu_metric = load_metric("bleu")
rouge_metric = load_metric("rouge")
meteor_metric = load_metric("meteor")

bleu_score = bleu_metric.compute(predictions=decoded_preds, references=test_data['labels'])
rouge_score = rouge_metric.compute(predictions=decoded_preds, references=test_data['labels'])
meteor_score = meteor_metric.compute(predictions=decoded_preds, references=test_data['labels'])

print("BLEU:", bleu_score)
print("ROUGE:", rouge_score)
print("METEOR:", meteor_score)

Map:   0%|          | 0/8771 [00:00<?, ? examples/s]

AttributeError: ignored