# Evaluation of Modell

In [1]:
import json

import pytorch_lightning as pl
import pytorch_lightning.loggers
import torch
import torch.nn.functional as F
import wandb
import numpy as np
import evaluate
from tqdm import tqdm
from datasets import load_dataset, load_metric
from pytorch_lightning import Trainer, seed_everything
from torch.utils.data import DataLoader
from torch.optim import AdamW
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, get_linear_schedule_with_warmup
from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training, TaskType
from sklearn.model_selection import train_test_split

class DataModule(pl.LightningDataModule):
    def __init__(self, model_name, batch_size: int = 32):
        super().__init__()
        self.test_dataset = None
        self.validation_dataset = None
        self.train_dataset = None
        self.datasets = None
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, truncation_side='left')
        # make sure the tokenizer truncates the beginning of the input, not the end
        self.tokenizer.padding_side = "left"
        self.batch_size = batch_size

    def prepare_data(self):
        # load data
        self.datasets = load_dataset('csv', data_files={'train': './data/cleaned_with_context.csv',
                                                        'test': './data/cleaned_with_context_test.csv'})

        # tokenize
        self.datasets = self.datasets.map(self.tokenize_data, batched=True)

        # remove unused columns
        self.datasets = self.datasets.remove_columns(['humor', 'context', 'target'])

        # set correct format
        self.datasets.set_format(type="torch")

    def tokenize_data(self, datasets, padding="max_length"):
        # tokenize inputs
        model_inputs = self.tokenizer(list(map(str, datasets['context'])), max_length=512, padding=padding,
                                      truncation=True, return_tensors="pt")

        # Tokenize targets with the `text_target` keyword argument
        labels = self.tokenizer(text_target=list(map(str, datasets['target'])), max_length=512, padding=padding,
                                truncation=True, return_tensors="pt")

        # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
        # padding in the loss.
        if padding == "max_length":
            labels["input_ids"] = [
                [(l if l != self.tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
            ]

        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

    def setup(self, stage: str):
        # split data
        message_tree_ids = self.datasets['train']['message_tree_id']
        train_ids, val_ids = train_test_split(list(set(message_tree_ids)), test_size=0.1)
        print(len(train_ids))
        print(len(val_ids))
        self.train_dataset = self.datasets['train'].filter(lambda sample: sample['message_tree_id'] in train_ids)
        self.validation_dataset = self.datasets['train'].filter(lambda sample: sample['message_tree_id'] in val_ids)
        self.test_dataset = self.datasets['test']
        
        self.train_dataset = self.train_dataset.remove_columns(['message_tree_id'])
        self.validation_dataset = self.validation_dataset.remove_columns(['message_tree_id'])
        self.test_dataset = self.test_dataset.remove_columns(['message_tree_id'])

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.validation_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)

    def predict_dataloader(self):
        pass


class Model(pl.LightningModule):
    def __init__(self, model_name, batch_size, learning_rate):
        super().__init__()
        self.learning_rate = learning_rate
        self.batch_size = batch_size

        self.save_hyperparameters()

        lora_config = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["q", "v"],
            lora_dropout=0.05,
            bias="none",
            task_type=TaskType.SEQ_2_SEQ_LM
        )
        native_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
        self.model = get_peft_model(native_model, lora_config)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, truncation_side='left')

        #self.metric = load_metric('bleu')

    def forward(self, batch):
        return self.model(**batch)

    def training_step(self, batch, batch_nb):
        outputs = self(batch)

        self.log('train_loss', outputs.loss)

        return {'loss': outputs.loss}

    def validation_step(self, batch, batch_nb):
        outputs = self(batch)

        if batch_nb < 3:
            inputs = self.tokenizer.decode(batch['input_ids'][0])

            label_ids = batch['labels'][0]
            # Replace -100 in the prediction with the pad token id in the tokenizer, otherwise an error occures while decoding
            label_ids[label_ids == -100] = self.tokenizer.pad_token_id

            generated_ids = self.model.generate(**batch, max_new_tokens=200)
            label = self.tokenizer.decode(label_ids)
            generated_text = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            columns = ["Input", "Label", "Prediction"]
            data = [[inputs, label, generated_text[0]]]
            self.logger.log_text(key=f"Sample-Epoch{self.current_epoch}-Batch{batch_nb}", columns=columns, data=data)

        self.log('val_loss', outputs.loss)
        return {'val_loss': outputs.loss}

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=self.hparams.learning_rate)

    def _num_steps(self) -> int:
        """Get number of steps"""
        train_dataloader = self.trainer.datamodule.train_dataloader()
        dataset_size = len(train_dataloader.dataset)
        num_steps = dataset_size * self.trainer.max_epochs // self.batch_size
        return num_steps

  warn(f"Failed to load image Python extension: {e}")
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = Model.load_from_checkpoint(checkpoint_path="checkpoints/google/flan-t5-base-batch4-v19.ckpt")

In [3]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", truncation_side='left')

In [5]:
model.model.eval()

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): T5ForConditionalGeneration(
      (shared): Embedding(32128, 768)
      (encoder): T5Stack(
        (embed_tokens): Embedding(32128, 768)
        (block): ModuleList(
          (0): T5Block(
            (layer): ModuleList(
              (0): T5LayerSelfAttention(
                (SelfAttention): T5Attention(
                  (q): Linear(
                    in_features=768, out_features=768, bias=False
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768, out_features=16, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=16, out_features=768, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                    (lora_embedding_B):

In [6]:
datasets = load_dataset('csv', data_files={'train': './data/cleaned_with_context.csv'})

message_tree_ids = datasets['train']['message_tree_id']
_, val_ids = train_test_split(list(set(message_tree_ids)), test_size=0.1, random_state=42)
print(len(val_ids))
validation_dataset = datasets['train'].filter(lambda sample: sample['message_tree_id'] in val_ids)

Downloading and preparing dataset csv/default to /home/jovyan/.cache/huggingface/datasets/csv/default-50483a3a9d6decac/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1...


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2299.51it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 551.59it/s]
                                                                   

Dataset csv downloaded and prepared to /home/jovyan/.cache/huggingface/datasets/csv/default-50483a3a9d6decac/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1. Subsequent calls will reuse this data.


100%|██████████| 1/1 [00:00<00:00, 378.72it/s]


985


                                                                       

In [7]:
# Metric
metric = evaluate.load("rouge")

In [10]:
def batches(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [14]:
len(validation_dataset) / 32

162.90625

In [20]:
# run predictions
predictions, references = [] , []
for i, sample in tqdm(enumerate(batches(validation_dataset, 32))):
    if i < 2:
        print(i)
        model_inputs = tokenizer(sample['context'], max_length=512, padding="max_length",
                                 truncation=True, return_tensors="pt").to('cuda')
        output = model.model.generate(input_ids=model_inputs['input_ids'], do_sample=True, top_p=0.9, max_new_tokens=200)
        output_text = tokenizer.batch_decode(output, skip_special_tokens=True)
        predictions.extend(output_text)
        references.extend(sample['target'])
        print('Sample of Batch\nContext: ')
        print(sample['context'][-1])
        print('Label: ')
        print(sample['target'][-1])
        print('Prediction: ')
        print(output_text[-1])
        

# compute metric 
rogue = metric.compute(predictions=predictions, references=references, use_stemmer=True)

# print results 
print(f"Rogue1: {rogue['rouge1']* 100:2f}%")
print(f"rouge2: {rogue['rouge2']* 100:2f}%")
print(f"rougeL: {rogue['rougeL']* 100:2f}%")

0it [00:00, ?it/s]

0


1it [00:12, 12.16s/it]

Sample of Batch
Context: 
 Crea un script en Bash que recorra todos los ficheros del directorio donde se ejecuta el script y los renombre de por la fecha y hora de creación del mismo respetando su extensión
Label: 
No tengo acceso a los recursos necesarios, como un sistema operativo que permita la ejecución de scripts en Bash.
Prediction: 
Sin embargo, esto es una script para que se recorra todos los ficheros del directorio en el que ejecuta el script. No siempre es el script de trás definiciones. Estos siete son solo los ficheros en una directorio en una directorio. En primer lugar, estos siete son en una directorio en una directorio en una directorio. Sin embargo, estos siete son el script de trás definiciones. Espero que estos siete son las ficheros de un directorio en una directorio. No siete siete en una directorio
1


163it [00:24,  6.69it/s]

Sample of Batch
Context: 
 Sigue los siguientes pasos. Dado el nombre de una profesión, escribe un evento feliz y uno trágico relacionado con dicha profesión.

1. Ingeniería electrónica 1. Ingeniería electrónica:

Evento feliz: Un ingeniero electrónico recibe un premio por su innovadora solución tecnológica que ha mejorado la vida de las personas de manera significativa.

Evento trágico: Un ingeniero electrónico es responsable de un fallo en un sistema crítico que resulta en una catástrofe, causando la pérdida de vidas y daños materiales considerables.

¿Hay algo más con lo que pueda ayudarte? Qué tipo de catástrofe, con pérdida de vidas y daños materiales, podría ocasionar un simple ingeniero electrónico?
Label: 
Un ingeniero electrónico podría ocasionar una catástrofe con pérdida de vidas y daños materiales si se produjera una falla en un sistema crítico, como una red eléctrica, una central nuclear o un sistema de control de tráfico aéreo. Un fallo en uno de estos sistemas podría pro




Rogue1: 18.489594%
rouge2: 3.086718%
rougeL: 12.063082%
