# Notebook used for evaluate the wav2vec with the huggingface model

In [1]:
import torch
import torchaudio
from datasets import load_dataset, load_metric
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from enelvo import normaliser
import re
import os
import numpy as np
import pytorch_lightning as pl

## Parameters

In [None]:
# version = "wav2vec2_adam_test4" #@param {type: "string"}
lr = 1e-05#@param {type: "number"}
w_decay = 0#@param {type: "number"}
bs = 25#@param {type: "integer"}
accum_grads = 1#@param {type: "integer"}
patience = 20#@param {type: "integer"}
max_epochs = 300#@param {type: "integer"}
hold_epochs = 20#@param {type: "integer"}

# Define hyperparameters
hparams = {"version": version,
          "lr": lr,
          "w_decay": w_decay,
          "bs": bs,
          "pretrained": 'facebook/wav2vec2-large-xlsr-53', 
          "patience": patience,
          "accum_grads": accum_grads,
          "max_epochs": max_epochs}
hparams

In [3]:
processor = Wav2Vec2Processor.from_pretrained('Wav2Vec2/wav2vec2-large-xlsr-portuguese/wav2vec2_adam_test4')
print(len(processor.tokenizer))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


46


In [4]:
class Wav2VecNet(pl.LightningModule):
    def __init__(self, *args, **kwargs):
        super().__init__()

        self.hparams = hparams

        # Note como a arquitetura esta dependente dos hiperparâmetros salvos.
        self.model = Wav2Vec2ForCTC.from_pretrained(
                    hparams["pretrained"],
                    gradient_checkpointing=True,
                    mask_time_prob=0.1,
                    mask_time_length=10,
                    mask_feature_prob=0.1,
                    mask_feature_length=10,
                    ctc_loss_reduction="mean",
                    pad_token_id=processor.tokenizer.pad_token_id,
                    vocab_size=44
                    )
        self.model.freeze_feature_extractor()
    
    def predict_step(self, batch, batch_idx, dataloader_idx=None):
        
        input_values, labels = batch['input_values'], batch['labels']
        attention_mask = batch['attention_mask'] 
        
        logits = self.model(input_values, attention_mask=attention_mask).logits
        pred_ids = torch.argmax(logits, dim=-1)
        return pred_ids
    
    def forward(self, input_values, attention_mask):
        logits = self.model(input_values, attention_mask=attention_mask).logits
        return logits

    def training_step(self, train_batch, batch_idx):
        
        input_values, labels = train_batch['input_values'], train_batch['labels']
        attention_mask = train_batch['attention_mask'] 

        # loss ctc compute
        loss = self.model(input_values, attention_mask=attention_mask, labels=labels).loss
        # print('loss', loss)

        self.log('ctc_loss_step', loss, prog_bar=True)
        
        return loss

    def training_epoch_end(self, outputs):
        loss = torch.stack([x['loss'] for x in outputs]).mean()       

        self.log("train_loss", loss, prog_bar=True)
  
    def validation_step(self, val_batch, batch_idx):
        
        input_values, labels = val_batch['input_values'], val_batch['labels']
        attention_mask = val_batch['attention_mask'] 

        # predict 
        val_loss = self.model(input_values, attention_mask=attention_mask, labels=labels).loss

        logits = self.forward(input_values, attention_mask=attention_mask)

        # print('preds', preds.shape)
        # print('labels', labels.shape) 

        wer = compute_metric_wer(logits, labels)
        # wer = compute_metric_wer(labels, labels)

        self.log('val_loss_step', val_loss, prog_bar=True)
        self.log('val_wer_step', wer, prog_bar=True)

        return {"val_loss_step": val_loss, "val_wer_step": wer}

    def validation_epoch_end(self, outputs):
        val_loss = torch.stack([x['val_loss_step'] for x in outputs]).mean()
        val_wer = np.stack([x['val_wer_step'] for x in outputs]).mean()

        self.log("val_loss", val_loss, prog_bar=True)
        self.log("val_wer", val_wer, prog_bar=True)
  
    def test_step(self, test_batch, batch_idx):
        
        input_values, labels = test_batch['input_values'], test_batch['labels']
        attention_mask = test_batch['attention_mask']

        test_loss = self.model(input_values, attention_mask=attention_mask, labels=labels).loss

        logits = self.forward(input_values, attention_mask=attention_mask)

        wer = compute_metric_wer(logits, labels)

        self.log("test_loss_step", test_loss, prog_bar=True)
        self.log("test_wer_step", wer, prog_bar=True)
        
        # Retornamos as losses do batch para podermos fazer a média no validation_epoch_end.
        return {"test_loss_step": test_loss, "test_wer_step": wer}

    def test_epoch_end(self, outputs):
        loss = torch.stack([x['test_loss_step'] for x in outputs]).mean()
        wer = np.stack([x['test_wer_step'] for x in outputs]).mean()

        self.log("test_loss", loss, prog_bar=True)
        self.log("test_wer", wer, prog_bar=True)
    
    # learning rate warm-up
    def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, optimizer_closure,
        on_tpu=False, using_native_amp=False, using_lbfgs=False):
        # skip the first 500 steps
        if self.trainer.global_step < 500:
            lr_scale = min(1., float(self.trainer.global_step + 1) / 500.)
            for pg in optimizer.param_groups:
                pg['lr'] = lr_scale * self.hparams["lr"]

        # update params
        optimizer.step(closure=optimizer_closure)

    def configure_optimizers(self):  

        optimizer = torch.optim.Adam(self.parameters(),
                         lr=self.hparams["lr"],
                         weight_decay=self.hparams["w_decay"])
        
        scheduler = LinearWarmupCosineAnnealingLR(optimizer, 
                                                  eta_min=0, # final-lr
                                                  warmup_start_lr=self.hparams["lr"],
                                                  warmup_epochs=self.hparams["hold_epochs"], # hold_epochs
                                                  max_epochs=self.hparams["max_epochs"])
        
        return {'optimizer': optimizer, 'lr_scheduler': scheduler, 'monitor':'val_loss'}  

In [5]:
best_model = '/home/nm/phd-wav2vec2-xlsr-53/notebooks/Wav2Vec2/wav2vec2_adam_test4-epoch=99-step=37799.ckpt'

trainer = pl.Trainer(resume_from_checkpoint=best_model)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores


In [6]:
model = Wav2VecNet.load_from_checkpoint(best_model, hparams=hparams)
model.to("cuda")

Some weights of the model checkpoint at facebook/wav2vec2-large-xlsr-53 were not used when initializing Wav2Vec2ForCTC: ['project_q.weight', 'quantizer.weight_proj.weight', 'project_hid.bias', 'project_hid.weight', 'quantizer.weight_proj.bias', 'quantizer.codevectors', 'project_q.bias']
- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-xlsr-53 and are newly initialized: ['lm_head.weight', 'lm_head.bias']
You should probably TRAIN this model on a down-stream task to be able to u

Wav2VecNet(
  (model): Wav2Vec2ForCTC(
    (wav2vec2): Wav2Vec2Model(
      (feature_extractor): Wav2Vec2FeatureExtractor(
        (conv_layers): ModuleList(
          (0): Wav2Vec2LayerNormConvLayer(
            (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,))
            (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          )
          (1): Wav2Vec2LayerNormConvLayer(
            (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
            (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          )
          (2): Wav2Vec2LayerNormConvLayer(
            (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
            (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          )
          (3): Wav2Vec2LayerNormConvLayer(
            (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
            (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          )
          (4): Wav2Vec2LayerNor

In [7]:
test_dataset = load_dataset("common_voice", "pt", split="test")
wer = load_metric("wer")


chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\'\�]'
resampler = torchaudio.transforms.Resample(48_000, 16_000)
norm = normaliser.Normaliser()

Reusing dataset common_voice (/home/nm/.cache/huggingface/datasets/common_voice/pt/6.1.0/bb59ce0bb532485ab64b5d488a8dd2addc3104f694e06bcd2c272dc608bb1112)


# Evaluate using enelvo (4-gram)

In [8]:
# Preprocessing the datasets.
# We need to read the aduio files as arrays
def speech_file_to_array_fn(batch):
    batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
    speech_array, sampling_rate = torchaudio.load(batch["path"])
    batch["speech"] = resampler(speech_array).squeeze().numpy()
    return batch

test_dataset = test_dataset.map(speech_file_to_array_fn)

HBox(children=(FloatProgress(value=0.0, max=4641.0), HTML(value='')))




In [9]:
# Preprocessing the datasets.
# We need to read the aduio files as arrays
def evaluate(batch):
    inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)

    with torch.no_grad():
        logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda"))

    pred_ids = torch.argmax(logits, dim=-1)
    
    pred_strings = processor.batch_decode(pred_ids)
    
    batch["pred_strings"] = pred_strings
    
    batch["pred_strings_norm"] = [norm.normalise(i) for i in pred_strings]
    return batch

result = test_dataset.map(evaluate, batched=True, batch_size=8)



HBox(children=(FloatProgress(value=0.0, max=581.0), HTML(value='')))

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)





## no-LM

In [10]:
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))

WER: 13.608008


## Enelvo (4-gram LM)

In [11]:
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings_norm"], references=result["sentence"])))

WER: 12.504962


In [12]:
del result
torch.cuda.empty_cache()

# Using PTT5 LM

In [16]:
class PTT5Net(pl.LightningModule):
    def __init__(self, *args, **kwargs):
        super().__init__()

        self.hparams = hparams

        # Note como a arquitetura esta dependente dos hiperparâmetros salvos.
        self.model = model_pt

        # for param in self.model.bert.parameters():
        #     param.requires_grad = False

        self.tokenizer = tokenizer

    def forward(self, input_values):
        logits = self.model(**input_values).prediction_logits
        return logits

    def predict_step(self, batch, batch_idx, dataloader_idx=None, gen_type=None):

        if gen_type==0:
          # normal beam search generation  
          pred_tokens = self._generate_tokens(batch["input_ids"])
        elif gen_type==1:
          # fast beam search generation
          pred_tokens = self._fast_generate_tokens(batch["input_ids"])  
        elif gen_type==2:
          # Top-K Sampling  generation
          pred_tokens = self._topK_generate_tokens(batch["input_ids"])  
        elif gen_type==3:   
          # Top-p generation
          pred_tokens = self._topp_generate_tokens(batch["input_ids"]) 
        elif gen_type==4:  
          # # Top-p e Top-K Sampling generation
          pred_tokens = self._toppK_generate_tokens(batch["input_ids"]) 
        else:
            # greedy decoding
          pred_tokens = self._greedy_generate_tokens(batch["input_ids"])

        # Tokens -> String
        decoded_pred = self.tokenizer.batch_decode(pred_tokens, skip_special_tokens=True)
        
        if 'labels' in batch:
            target = batch["labels"]
            decoded_target = [self.tokenizer.decode(tokens[tokens!=-100], skip_special_tokens=True) for tokens in target] 
            print(f"\nSample Target: {decoded_target[0]}\nPrediction: {decoded_pred[0]}\n")
            return decoded_pred, decoded_target   
        else:
            return decoded_pred
    
    def _greedy_generate_tokens(self, input_ids):
        
        decoded_ids = self.model.generate(
            input_ids, 
            # max_length=512,
            max_length=input_ids.shape[1]+1, 
            # pad_token_id=0,
            # eos_token_id=1,
            # early_stopping=True
        )


        return decoded_ids

    def _fast_generate_tokens(self, input_ids):
        '''
        Token generation
        '''
          
        # print(input_ids.shape)
        decoded_ids = self.model.generate(
            input_ids, 
            max_length=input_ids.shape[1]+1, 
            num_beams=50, 
            temperature=0.1,
            no_repeat_ngram_size=2, 
            num_return_sequences=1,
            length_penalty=0.8,
            repetition_penalty=0.8,
            num_beam_groups=5,
#             pad_token_id=0,
#             eos_token_id=1,
#             early_stopping=True
        )

        return decoded_ids   
    
    def _generate_tokens(self, input_ids):
        '''
        Token generation
        '''
          
        # print(input_ids.shape)
        decoded_ids = self.model.generate(
            input_ids, 
            max_length=input_ids.shape[1]+1, 
            num_beams=300, 
            temperature=0.1,
            no_repeat_ngram_size=2, 
            num_return_sequences=1,
            length_penalty=0.8,
            repetition_penalty=0.8,
            num_beam_groups=5,
#             pad_token_id=0,
#             eos_token_id=1,
#             early_stopping=True
        )

        return decoded_ids   
    
    def _topK_generate_tokens(self, input_ids):
        '''
        Token TopK generation
        '''
          
        # print(input_ids.shape)
        decoded_ids = self.model.generate(
            input_ids, 
            max_length=input_ids.shape[1]+1, 
            do_sample=True,  
            top_k=500, 
            temperature=0.1,
            num_return_sequences=1,
            length_penalty=0.8,
            repetition_penalty=0.8,
#             pad_token_id=0,
#             eos_token_id=1,
#             early_stopping=True
        )

        return decoded_ids
    
    def _topp_generate_tokens(self, input_ids):
        '''
        Token TopK generation
        '''
          
        # print(input_ids.shape)
        decoded_ids = self.model.generate(
            input_ids, 
            max_length=input_ids.shape[1]+1, 
            do_sample=True,  
            top_p=0.92, 
            top_k=0,
            temperature=0.1,
            num_return_sequences=1,
            length_penalty=0.8,
            repetition_penalty=0.8,
            pad_token_id=0,
            eos_token_id=1,
            early_stopping=True
        )

        return decoded_ids
    
    def _toppK_generate_tokens(self, input_ids):
        '''
        Token TopK generation
        '''
          
        # print(input_ids.shape)
        decoded_ids = self.model.generate(
            input_ids, 
            max_length=input_ids.shape[1]+1, 
            do_sample=True,  
            top_p=0.95, 
            top_k=1500,
            temperature=0.1,
            num_return_sequences=1,
            length_penalty=0.5,
            repetition_penalty=0.8,
#             pad_token_id=0,
#             eos_token_id=1,
#             early_stopping=True
        )

        return decoded_ids

    def training_step(self, train_batch, batch_idx):
        # loss compute
        loss = self.model(**train_batch).loss
        # print('loss', loss)

        self.log('cross_loss_step', loss, on_step=True, prog_bar=True)
        
        return loss

    def training_epoch_end(self, outputs):
        loss = torch.stack([x['loss'] for x in outputs]).mean()       

        self.log("train_loss", loss, prog_bar=True)
  
    def validation_step(self, val_batch, batch_idx):

        # predict 
        target = val_batch["labels"]
        val_loss = self.model(**val_batch).loss

        # pred_tokens = self._fast_generate_tokens(val_batch["input_ids"])
        pred_tokens = self._greedy_generate_tokens(val_batch["input_ids"])
        # Tokens -> String
        # decoded_pred = [self.tokenizer.decode(tokens, skip_special_tokens=True) for tokens in pred_tokens]
        decoded_pred = self.tokenizer.batch_decode(pred_tokens, skip_special_tokens=True)
        
        decoded_target = [self.tokenizer.decode(tokens[tokens!=-100], skip_special_tokens=True) for tokens in target]
        return {"val_loss_step": val_loss, "pred": decoded_pred, "target": decoded_target}

    def validation_epoch_end(self, outputs):
        val_loss = torch.stack([x['val_loss_step'] for x in outputs]).mean()
        trues = sum([list(x['target']) for x in outputs], [])
        preds = sum([list(x['pred']) for x in outputs], [])
        
        n = random.choices(range(len(trues)), k=2)
        for i in n:
          print(f"\nSample Target: {trues[i]}\nPrediction: {preds[i]}\n")
        
        f1 = []
        for true, pred in zip(trues, preds):
            f1.append(compute_f1(a_gold=true, a_pred=pred))
        f1_val = np.mean(f1)


        self.log("val_loss", val_loss, prog_bar=True)
        self.log("val_f1", f1_val, prog_bar=True)
  
    def test_step(self, test_batch, batch_idx):
        
        # input_values, labels = test_batch['input_values'], test_batch['labels']
        target = test_batch["labels"]
        test_loss = self.model(**test_batch).loss
        
        pred_tokens = self._greedy_generate_tokens(test_batch["input_ids"])
        decoded_pred = self.tokenizer.batch_decode(pred_tokens, skip_special_tokens=True)

        decoded_target = [self.tokenizer.decode(tokens[tokens!=-100], skip_special_tokens=True) for tokens in target]
        return {"test_loss_step": test_loss, "pred": decoded_pred, "target": decoded_target}

    def test_epoch_end(self, outputs):
        loss = torch.stack([x['test_loss_step'] for x in outputs]).mean()
        trues = sum([list(x['target']) for x in outputs], [])
        preds = sum([list(x['pred']) for x in outputs], [])

        n = random.choices(range(len(trues)), k=2)
        for i in n:
          print(f"\nSample Target: {trues[i]}\nPrediction: {preds[i]}\n")
        
        f1 = []
        for true, pred in zip(trues, preds):
            f1.append(compute_f1(a_gold=true, a_pred=pred))
        f1_test = np.mean(f1)

        self.log("test_loss", loss, prog_bar=True)
        self.log("test_f1", f1_test, prog_bar=True)

    def configure_optimizers(self):

        optimizer = torch.optim.Adam(self.parameters(),
                         lr=self.hparams["lr"],
                         weight_decay=self.hparams["w_decay"])
        
        scheduler = LinearWarmupCosineAnnealingLR(optimizer, 
                                                  eta_min=0, # final-lr
                                                  warmup_start_lr=self.hparams["lr"],
                                                  warmup_epochs=self.hparams["hold_epochs"], # hold_epochs
                                                  max_epochs=self.hparams["max_epochs"])
        
        return {'optimizer': optimizer, 'lr_scheduler': scheduler, 'monitor':'val_f1'}  

In [17]:
def evaluate_ptt5(batch, gen_type):
    inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)

    with torch.no_grad():
        logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda"))

    pred_ids = torch.argmax(logits, dim=-1)
    print('labels:', batch['sentence'][0])
    
    preds_str = processor.batch_decode(pred_ids)
    print('preds_str:', preds_str[0])
    
    inputs = tokenizer(preds_str,
                       padding=True,
                       return_tensors='pt')
    # print(inputs)
    inputs['input_ids'] = inputs['input_ids'].cuda()
    # inputs['attention_mask'] = inputs['attention_mask'].cuda()
    with torch.no_grad():
        pred_strings = model_ptt5.predict_step(inputs, 0, gen_type=gen_type)
    print('pred_ptt5:', pred_strings[0])
    print('-----------')
    
    batch["pred_strings"] = pred_strings
     
    return batch

In [18]:
from transformers import T5Tokenizer
from transformers import T5ForConditionalGeneration

In [19]:
tokenizer = T5Tokenizer.from_pretrained('unicamp-dl/ptt5-base-portuguese-vocab')
model_pt = T5ForConditionalGeneration.from_pretrained('unicamp-dl/ptt5-base-portuguese-vocab')

sentence = 'a garoa fria vai parar a unidade de ligação'
input_tokens = tokenizer(sentence)
print(input_tokens)

{'input_ids': [7, 6367, 43, 44, 10139, 1057, 20, 33, 7, 1589, 4, 2496, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [20]:
PATH = "PTT5_LM/PTT5_LM/PTT5_noise_oldvocab_adafactor_test7-epoch=13-step=2071.ckpt"

model_ptt5 = PTT5Net.load_from_checkpoint(PATH, hparams=hparams).cuda().eval()

## Greedy decoding

In [18]:
result_ptt5 = test_dataset.map(evaluate_ptt5, batched=True, batch_size=8, fn_kwargs={'gen_type':None})

HBox(children=(FloatProgress(value=0.0, max=581.0), HTML(value='')))

labels: nem o radar nem os outros instrumentos detectaram o bombardeiro stealth
preds_str: nem um vadamen os autros infrumentos de tet um bambader sta
pred_ptt5: nem um pouco os altos investimentos deteve um bambader keras
-----------
labels: duas mulheres que sentamse para baixo lendo jornais
preds_str: duas mnescas sentamse para baixo de ndo jornais
pred_ptt5: duas meninas sentamse para baixo de dois jornais
-----------
labels: nenhum quarto pode ser visto no país
preds_str: nenhum quarto pode ser visto no país
pred_ptt5: nenhum quarto pode ser visto no país
-----------
labels: todo mundo é especial
preds_str: todo mundo é especial
pred_ptt5: todo mundo é especial
-----------
labels: também esperando pelo carro
preds_str: também ele esperando pelo carro
pred_ptt5: também ele esperando pelo carro
-----------
labels: antônia rodrigues de araújo
preds_str: antonia rodrigue dera úsel
pred_ptt5: antonia rodrigue deusel
-----------
labels: o rumo do páis é incerto
preds_str: o mundo paz é 

In [19]:
print("WER: {:2f}".format(100 * wer.compute(predictions=result_ptt5["pred_strings"], references=result_ptt5["sentence"])))

WER: 12.417059


In [20]:
del result_ptt5
torch.cuda.empty_cache()

## Beam-decoder

In [21]:
result_ptt5 = test_dataset.map(evaluate_ptt5, batched=True, batch_size=2, fn_kwargs={'gen_type':0})

HBox(children=(FloatProgress(value=0.0, max=2321.0), HTML(value='')))

labels: nem o radar nem os outros instrumentos detectaram o bombardeiro stealth
preds_str: nem um vadamen os autros infrumentos de tet um bambader sta


  "Passing `max_length` to BeamSearchScorer is deprecated and has no effect."


pred_ptt5: nem um pouco os altos equipamentos de ter um badminton está
-----------
labels: oito
preds_str: oito
pred_ptt5: oito
-----------
labels: realizar uma investigação para resolver o problema
preds_str: realizar uma investigação para resolver o problema
pred_ptt5: realizar uma investigação para resolver o problema
-----------
labels: menina e menino beijando nas sombras
preds_str: menina e menino beijando nas sombras
pred_ptt5: menina e menino beijando nas sombras
-----------
labels: duas mulheres que sentamse para baixo lendo jornais
preds_str: duas mnescas sentamse para baixo de ndo jornais
pred_ptt5: duas mulheres sentamse para baixo de dois jornais
-----------
labels: o mago lançou um feitiço muito poderoso sobre a cidade
preds_str: o mavo laxou o feitixo muito poderoso nas cidade
pred_ptt5: o menino lavou o nariz muito poderoso nas cidades
-----------
labels: duas mulheres e uma menina levantam com troféus
preds_str: duas mulheres e uma menina levantam com trofés
pred_ptt5:

In [22]:
print("WER: {:2f}".format(100 * wer.compute(predictions=result_ptt5["pred_strings"], references=result_ptt5["sentence"])))

WER: 12.246923


In [23]:
del result_ptt5
torch.cuda.empty_cache()

## Top-p and Top-K  decoding

In [22]:
result_ptt5 = test_dataset.map(evaluate_ptt5, batched=True, batch_size=8, fn_kwargs={'gen_type':4})

HBox(children=(FloatProgress(value=0.0, max=581.0), HTML(value='')))

labels: nem o radar nem os outros instrumentos detectaram o bombardeiro stealth
preds_str: nem um vadamen os autros infrumentos de tet um bambader sta
pred_ptt5: nem um pouco os altos investimentos deteve um bambader keras
-----------
labels: duas mulheres que sentamse para baixo lendo jornais
preds_str: duas mnescas sentamse para baixo de ndo jornais
pred_ptt5: duas mulheres sentamse para baixo de dois jornais
-----------
labels: nenhum quarto pode ser visto no país
preds_str: nenhum quarto pode ser visto no país
pred_ptt5: nenhum quarto pode ser visto no país
-----------
labels: todo mundo é especial
preds_str: todo mundo é especial
pred_ptt5: todo mundo é especial
-----------
labels: também esperando pelo carro
preds_str: também ele esperando pelo carro
pred_ptt5: também ele esperando pelo carro
-----------
labels: antônia rodrigues de araújo
preds_str: antonia rodrigue dera úsel
pred_ptt5: antonia rodrigue paraús
-----------
labels: o rumo do páis é incerto
preds_str: o mundo paz é

In [23]:
print("WER: {:2f}".format(100 * wer.compute(predictions=result_ptt5["pred_strings"], references=result_ptt5["sentence"])))

WER: 12.425566


In [24]:
del result_ptt5
torch.cuda.empty_cache()