 # **Fine-tuning Wav2Vec2 for Torgo DataSet with 🤗 Transformers**

# **Ensure that GPU and RAM is set up: will be needed for training purpose**

In [1]:
speaker = "M02"
repo_name = "torgo_xlsr_finetune-{}-2".format(speaker)


In [2]:

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Mon Jul 24 22:21:41 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P8    11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# ensure enough memory present so that training does not stop
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime


# **Packages needed:** <br>
`datasets`: to transform the dataset <br>
`transformers`: upgraded version of RNN (allows to process a large quantity of text) <br>
`librosa`: needed for the audio files <br>
`jiwer`: **most important:** WER metric

In [4]:
%%capture
!pip install datasets==1.18.3
!pip install transformers==4.26.1
!pip install jiwer
!pip install librosa
!pip install huggingface-hub
# %cd /content/espnet/tools
!make CUDA_VERSION=10.2

# **Download the torgo dataset that will be finetuned against the state of the art model**

In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
%cd /content
#!gdown 1TaP_UX2QiiKR_qWMeYtlMgHaXlYRliJ9

!mkdir downloads
%cd downloads
# Original Link
# !gdown 1Olr1d6Ro9gb5r9wF7JOk9YSmmbW97djs
# !tar -xzf torgo.tar.gz && ls torgo
# !ls torgo


/content
/content/downloads


In [7]:
!pwd

/content/downloads


In [51]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [52]:
!pwd

/content/downloads


In [19]:
# copy Torgo dataset from Google Drive
!pwd
!cp -r /content/drive/MyDrive/Torgo /content/downloads/Torgo


/content/downloads


In [53]:
!cd ..

In [54]:
!pwd

/content/downloads


In [6]:
%cd /content

/content


In [7]:
!pwd

/content


In [8]:
# download the output.csv file
!cp -r /content/drive/MyDrive/output_modified.csv /content/

cp: cannot stat '/content/drive/MyDrive/output_modified.csv': No such file or directory


# **Connect to Hugging Face to store the results of the model**

In [83]:
# to store the model checkpoints, we will need to install another package
%%capture
!apt install git-lfs

# **Crucial stage: Preparation of data, Tokenizer and Feature Extractor**

ASR models transcribe speech to text which leads to the requirement of a feature extractor and tokenizer: <br>
`feature extractor`: processes speech signal to the required input format: audio processing: feature vector <br>
`tokenizer`: converts the model's output to text format <br>
`wave2vec2` has the following tokenizer: `wave2vec2CTCTokenizer` and feature extractor: `wave2vec2FeatureExtractor`

# **Tokenizer**

In [9]:
# load the dataset, observe structure, divide into training and test set (evaluation later)
from datasets import load_dataset, load_metric, DatasetDict, Dataset, Audio

data = load_dataset('csv', data_files='/content/output_modified.csv')
print(data)

FileNotFoundError: ignored

In [85]:
repo_name

'torgo_xlsr_finetune-M02-2'

In [86]:
# creating a train and testing dataset
torgo_dataset = DatasetDict()

torgo_dataset['train'] = data['train'].filter(lambda x: x != speaker, input_columns=['speaker_id'])
torgo_dataset['test'] = data['train'].filter(lambda x: x == speaker, input_columns=['speaker_id'])

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

In [87]:
# remove columns that we do not need
torgo_dataset = torgo_dataset.remove_columns(["Unnamed: 0"])
torgo_dataset

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0.1', 'session', 'text', 'audio', 'speaker_id', 'duration'],
        num_rows: 4663
    })
    test: Dataset({
        features: ['Unnamed: 0.1', 'session', 'text', 'audio', 'speaker_id', 'duration'],
        num_rows: 764
    })
})

In [88]:
# ignore special characters: with no language model hard to classify them
# also convert all the text into lowercase: makes life much more easier
import re
chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"]'

def remove_special_characters(batch):
    batch["text"] = re.sub(chars_to_ignore_regex, '', batch["text"]).lower() + " "
    return batch

In [89]:
# use map function to carry out the process/transformation
torgo_dataset = torgo_dataset.map(remove_special_characters)

0ex [00:00, ?ex/s]

0ex [00:00, ?ex/s]

In [90]:
# write a function that will first concatenate all the transcriptions to one single transcription and them we map them to characters
# In short: creating tokens: determine the length of array

def extract_all_chars(batch):
  all_text = " ".join(batch["text"])
  vocab = list(set(all_text))
  return {"vocab": [vocab], "all_text": [all_text]}

vocabs = torgo_dataset.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=torgo_dataset.column_names["train"])


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [91]:
# we create the union of all distinct letters in the training dataset and test dataset and convert the resulting list into
# an enumerated dictionary

vocab_list = list(set(vocabs["train"]["vocab"][0]) | set(vocabs["test"]["vocab"][0]))
vocab_dict = {v: k for k, v in enumerate(vocab_list)}
vocab_dict

{'p': 0,
 'a': 1,
 'r': 2,
 'o': 3,
 'q': 4,
 'g': 5,
 'm': 6,
 'b': 7,
 'y': 8,
 'i': 9,
 'd': 10,
 'n': 11,
 "'": 12,
 'h': 13,
 't': 14,
 'l': 15,
 's': 16,
 'c': 17,
 'j': 18,
 'x': 19,
 'z': 20,
 'e': 21,
 'k': 22,
 ' ': 23,
 'u': 24,
 'w': 25,
 'f': 26,
 'v': 27}

In [92]:
# from the above tokens: we given the space token visibility by using the symbol (|)
vocab_dict["|"] = vocab_dict[" "]
del vocab_dict[" "]

In [93]:
# adding tokens for anything unknown discovered and padding for the blank token
vocab_dict["[UNK]"] = len(vocab_dict)
vocab_dict["[PAD]"] = len(vocab_dict)
print(len(vocab_dict))

30


Linear layer we add on top of the pretrained checkpoint will have an output dimension of 32.

In [94]:
# jsonify the file next
import json
with open('vocab.json', 'w') as vocab_file:
    json.dump(vocab_dict, vocab_file)

In [95]:
# instantiate an object of the tokenizer class
from transformers import Wav2Vec2CTCTokenizer

tokenizer = Wav2Vec2CTCTokenizer("./vocab.json", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|")

In [96]:
# upload tokenizer to the Hugging Face Repo
# repo_name = "base-torgo"

In [97]:
# push it to Hugging face to use it later
tokenizer.push_to_hub(repo_name)

CommitInfo(commit_url='https://huggingface.co/monideep2255/torgo_xlsr_finetune-M02-2/commit/18455693f0308c0c1c84479b96d95b10623682ef', commit_message='Upload tokenizer', commit_description='', oid='18455693f0308c0c1c84479b96d95b10623682ef', pr_url=None, pr_revision=None, pr_num=None)

# **Feature Extractor**

To convert speech to text: it has to first discretized: create individual units: called **sampling**

A higher sampling rate leads to a better approximation of the real speech signal but also necessitates more values per second

A Wav2Vec2 feature extractor object requires the following parameters to be instantiated:

- `feature_size`: Speech models take a sequence of feature vectors as an input. While the length of this sequence obviously varies, the feature size should not. In the case of Wav2Vec2, the feature size is 1 because the model was trained on the raw speech signal ${}^2$.
- `sampling_rate`: The sampling rate at which the model is trained on.
- `padding_value`: For batched inference, shorter inputs need to be padded with a specific value
- `do_normalize`: Whether the input should be *zero-mean-unit-variance* normalized or not. Usually, speech models perform better when normalizing the input
- `return_attention_mask`: Whether the model should make use of an `attention_mask` for batched inference. In general, models should **always** make use of the `attention_mask` to mask padded tokens. However, due to a very specific design choice of `Wav2Vec2`'s "base" checkpoint, better results are achieved when using no `attention_mask`.

In [98]:
from transformers import Wav2Vec2FeatureExtractor

feature_extractor = Wav2Vec2FeatureExtractor(feature_size=1, sampling_rate=16000, padding_value=0.0, do_normalize=True, return_attention_mask=True)

In [99]:
# wrap the feature extractor and tokenizer into a single processor class: when testing will only need model and processor object
from transformers import Wav2Vec2Processor

processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)

#**Prepare Dataset**

In [100]:
torgo_dataset = torgo_dataset.cast_column("audio", Audio(sampling_rate=16000))

In [101]:
torgo_dataset["train"][4]["audio"]

{'path': '/content/downloads/Torgo/F01/Session1/wav_arrayMic/0012.wav',
 'array': array([-0.00201416, -0.00595093, -0.00567627, ...,  0.0005188 ,
         0.00094604, -0.00186157], dtype=float32),
 'sampling_rate': 16000}

In [102]:
# testing out sample audio files that have been loaded
import IPython.display as ipd
import numpy as np
import random

rand_int = random.randint(0, len(torgo_dataset["train"]))

#print(torgo_dataset["train"][rand_int]["text"])
#ipd.Audio(data=np.asarray(torgo_dataset["train"][rand_int]["audio"]["array"]), autoplay=True, rate=16000)

# **Processing the dataset expected by the model**

1. load and resample the audio data: call batch["audio"]
2. extract values from the loaded audio file
3. encode the transcriptions to label ids

In [103]:
def prepare_dataset(batch):
    # load the the audio data into batch
    audio = batch["audio"]

    # extract the values from the audio files
    batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
    batch["input_length"] = len(batch["input_values"])

    # encode it to the label ids
    with processor.as_target_processor():
        batch["labels"] = processor(batch["text"]).input_ids
    return batch

In [104]:
torgo_dataset = torgo_dataset.map(prepare_dataset, remove_columns=torgo_dataset.column_names["train"], num_proc=4)



Long input sequences require a lot of memory. Since `Wav2Vec2` is based on `self-attention` the memory requirement scales quadratically with the input length for long input sequences.

In [105]:
torgo_dataset

DatasetDict({
    train: Dataset({
        features: ['input_values', 'input_length', 'labels'],
        num_rows: 4663
    })
    test: Dataset({
        features: ['input_values', 'input_length', 'labels'],
        num_rows: 764
    })
})

### changed

In [106]:
max_input_length_in_sec = 9.0
min_input_length_in_sec= 1.0
torgo_dataset["train"] = torgo_dataset["train"].filter(lambda x: x < max_input_length_in_sec * processor.feature_extractor.sampling_rate, input_columns=["input_length"])
torgo_dataset["train"] = torgo_dataset["train"].filter(lambda x: x > min_input_length_in_sec * processor.feature_extractor.sampling_rate, input_columns=["input_length"])
torgo_dataset["test"] = torgo_dataset["test"].filter(lambda x: x < max_input_length_in_sec * processor.feature_extractor.sampling_rate, input_columns=["input_length"])
torgo_dataset["test"] = torgo_dataset["test"].filter(lambda x: x > min_input_length_in_sec * processor.feature_extractor.sampling_rate, input_columns=["input_length"])


  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [107]:
!git pull

fatal: not a git repository (or any of the parent directories): .git


# **Training and Evaluation**

**Need for a  data collabtor** <br>
wave2vec2 has a much larger input length as compared to the output length. For the input size, it is efficient to pad training batches to the longest sample in the batch (not overall sample)

In [108]:
# data collator

import torch

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union

@dataclass
class DataCollatorCTCWithPadding:
    """
    Data collator that will dynamically pad the inputs received.
    Args:
        processor (:class:`~transformers.Wav2Vec2Processor`)
            The processor used for proccessing the data.
        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
            among:
            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
              sequence if provided).
            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
              maximum acceptable input length for the model if that argument is not provided.
            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
              different lengths).
    """

    processor: Wav2Vec2Processor
    padding: Union[bool, str] = True

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
        # split inputs and labels since they have to be of different lenghts and need
        # different padding methods
        input_features = [{"input_values": feature["input_values"]} for feature in features]
        label_features = [{"input_ids": feature["labels"]} for feature in features]

        batch = self.processor.pad(
            input_features,
            padding=self.padding,
            return_tensors="pt",
        )
        with self.processor.as_target_processor():
            labels_batch = self.processor.pad(
                label_features,
                padding=self.padding,
                return_tensors="pt",
            )

        # replace padding with -100 to ignore loss correctly
        labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)

        batch["labels"] = labels

        return batch

In [109]:
data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)

- Evaluation metric. During training, the model should be evaluated on the word error rate. We should define a `compute_metrics` function accordingly

- Load a pretrained checkpoint. We need to load a pretrained checkpoint and configure it correctly for training.

- Define the training configuration.

After having fine-tuned the model, we will correctly evaluate it on the test data and verify that it has indeed learned to correctly transcribe speech.

In [110]:
# load the word error rate metric
wer_metric = load_metric("wer")

Downloading:   0%|          | 0.00/1.90k [00:00<?, ?B/s]

In [111]:
def compute_metrics(pred):
    pred_logits = pred.predictions
    pred_ids = np.argmax(pred_logits, axis=-1)

    pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id

    pred_str = processor.batch_decode(pred_ids)
    # we do not want to group tokens when computing the metrics
    label_str = processor.batch_decode(pred.label_ids, group_tokens=False)

    print(pred_str)
    wer = wer_metric.compute(predictions=pred_str, references=label_str)

    return {"wer": wer}

### **Model assigning**

In [112]:
# assign the model
from transformers import Wav2Vec2ForCTC

model = Wav2Vec2ForCTC.from_pretrained(
    # "yip-i/uaspeech-pretrained",
    "facebook/wav2vec2-large-xlsr-53",
    ctc_loss_reduction="mean",
    pad_token_id=processor.tokenizer.pad_token_id,
)


# model: yongjian/wav2vec2-large-a contains [self-training] and has best wer of 0.557

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.27G [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/wav2vec2-large-xlsr-53 were not used when initializing Wav2Vec2ForCTC: ['project_q.weight', 'quantizer.codevectors', 'project_hid.weight', 'project_q.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias']
- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-xlsr-53 and are newly initialized: ['lm_head.weight', 'lm_head.bias']
You should probably TRAIN this model on a down-stream task to be able to u

The first component of wav2vec2 has acoustic features from the raw speech signal. This portion has been pretrained sufficiently and does not need to be pretrained anymore and hence freezed.

# **Define the parameters that are related to model training**

To give more explanation on some of the parameters:
- `group_by_length` makes training more efficient by grouping training samples of similar input length into one batch. This can significantly speed up training time by heavily reducing the overall number of useless padding tokens that are passed through the model
- `learning_rate` and `weight_decay` were heuristically tuned until fine-tuning has become stable. Note that those parameters strongly depend on the Timit dataset and might be suboptimal for other speech datasets.

In [113]:
model.freeze_feature_encoder()

In [114]:
for name, param in model.named_parameters():
     print(name, param.requires_grad)

wav2vec2.masked_spec_embed True
wav2vec2.feature_extractor.conv_layers.0.conv.weight False
wav2vec2.feature_extractor.conv_layers.0.conv.bias False
wav2vec2.feature_extractor.conv_layers.0.layer_norm.weight False
wav2vec2.feature_extractor.conv_layers.0.layer_norm.bias False
wav2vec2.feature_extractor.conv_layers.1.conv.weight False
wav2vec2.feature_extractor.conv_layers.1.conv.bias False
wav2vec2.feature_extractor.conv_layers.1.layer_norm.weight False
wav2vec2.feature_extractor.conv_layers.1.layer_norm.bias False
wav2vec2.feature_extractor.conv_layers.2.conv.weight False
wav2vec2.feature_extractor.conv_layers.2.conv.bias False
wav2vec2.feature_extractor.conv_layers.2.layer_norm.weight False
wav2vec2.feature_extractor.conv_layers.2.layer_norm.bias False
wav2vec2.feature_extractor.conv_layers.3.conv.weight False
wav2vec2.feature_extractor.conv_layers.3.conv.bias False
wav2vec2.feature_extractor.conv_layers.3.layer_norm.weight False
wav2vec2.feature_extractor.conv_layers.3.layer_norm.bia

In [115]:
# clear out cuda memory
import torch
torch.cuda.empty_cache()

### changed

In [116]:
# parameters for the training
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir = repo_name,
  group_by_length=True,
  per_device_train_batch_size=8,
  evaluation_strategy="steps",
  num_train_epochs=30,
  fp16=False,
  gradient_checkpointing=True,
  save_steps=500,
  eval_steps=500,
  logging_steps=500,
  learning_rate=1e-4,
  weight_decay=0.005,
  warmup_steps=1000,
  save_total_limit=2,
)

In [117]:
# pass all instances to the trainer as the final step before training
from transformers import Trainer

trainer = Trainer(
    model=model,
    data_collator=data_collator,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=torgo_dataset["train"],
    eval_dataset=torgo_dataset["test"],
    tokenizer=processor.feature_extractor,
)

In [None]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4416
  Num Epochs = 30
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 16560
  Number of trainable parameters = 311261344


Step,Training Loss,Validation Loss,Wer
500,23.2194,3.297529,0.981969
1000,3.3856,3.222471,0.981969
1500,2.9403,2.780463,0.981969
2000,2.6255,2.382952,0.983356
2500,1.8901,1.792012,1.377947
3000,1.2594,1.784516,1.334258
3500,1.0008,1.692319,1.31484
4000,0.7896,1.544352,1.282247
4500,0.6373,1.554703,1.26699
5000,0.5639,1.592442,1.193481


The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i',

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-500/preprocessor_config.json
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i',

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-1000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-1000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-1000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-1000/preprocessor_config.json
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i',

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-1500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-1500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-1500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-1500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'r  i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', '

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-2000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-2000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-2000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-2000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-1000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['r  i', 'd i', 'p  i', 'pt  i', 'm  i', 'ip i', 't i', 'r i', 'r i', 't i', 'ip    i', 'prp  i', 'gd fs  t rmtn nn lc wit i', 'pr  i', 'pt   i', 's  nr nne t   i', 't  i', 'cr i', 'r  i', 'i', 'gt i', 'n  i', 'trn  i', 'g  i', 'wrm   i', 'r  i', 'trn i', 'iy cnmt  i', 't  i', 'trp  i', 'rt  i', 'd  i', 'tp i', 'n  i', 'i', 'tr  i', 'c i', 'wr  i', 'bt  i', 'tbb i', 'trp    i', 'h t dcktns t r  t i', 'd  i', 't  i', 'pg    i', 'e  i', 't  i', 'cr  i', 'n   i', 'n  i', 'yt st tingngt w l   i', 'rinn btn   i', 'cr  i', 'trn  i', 'bt  i', 'bw ent tm t wt nnm gs i', 'hr r   nntpt i', 'bt  i', 'pt  i', 'n  i', 'rt  i', 'r  i', 'ip  i', 'tik  i', 'trn  i', 'db  i', 'dw  i', 'i', 'g  i', 'frm  i', 'fn  i', 'br  i', 'bnd  i', 'crp  i', 'drk  i', 'tm  i', 'mrt  i', 'fr  i', 'pt  i', 'mnf rnby pdc t ws c nd i', 'drk  i', 'rgc  i', 'r i', 'r  i', 'igy pnr  i', 'r    i', 'ct  i', 'thnc mtr cn r   tht i', 'c i', 'rt i', 'prk  i', 'fr  i', 'c  i', 'tgt i', 'crn i', 'pr i', 't  i', 'the cpmfcwm th   

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-2500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-2500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-2500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-2500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-1500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['ro   i', 'fed i', 'alha  i', 'pat  i', 'farm  i', 'sirp i', 'dage i', 'rin i', 'fair i', 'sreat i', 'ship i', 'parp i', 'garfatleslov to by mdan anas langa i', 'buble i', 'bit  i', 'vl sh as snearly nintev lisorslov     i', 'trubble i', 'chair i', 'rin i', 'tho    i', 'goglet i', 'varm i', 'porn i', 'grow i', 'swarm i', 'shair i', 'i o tarant i', 'i can mroad  i', 'st i', 'starp   i', 'rite i', 'doum  i', 'trip i', 'no i', 'lef i', 'storm i', 'chair i', 'slorm i', 'bat  i', 'stubble i', 'rap  i', 'she avs hor douk so anihewas wbatther al yar  i', 'horm  i', 'sheat  i', 'bauga i', 'sisiv  i', 'sfeap   i', 'arm  i', 'ktin  i', 'know  i', 'yatstel thinks theos wer il yoslevr i', 'msy inars sevl bn i', 'flor   i', 'triaig  i', 'biup i', 'bwe avvovehore ntim t wal mor an smo glas i', 'ver jou mntollaf i', 'brn  i', 'beat  i', 'd    i', 'rite      i', 'rangng  i', 'sip i', 'stik i', 'storm i', 'dubble  i', 'dowm i', 'fom i', 'glow i', 'form  i', 'fwlarm i', 'brve i', 'varm   i', 'shirp i',

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-3000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-3000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-3000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-3000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-2000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['ggrof  i', 'fed i', 'alha  i', 'pat  i', 'fum  i', 'serip i', 'tagge i', 'rad i', 'fai i', 'sreat    i', 'sip i', 'parf i', 'godfather loved  to be mo tan anas lacka i', 'puble i', 'bet  i', 'wil she yess nearly nonjus pi yo sv   i', 'trouble i', 'char i', 'rade i', 'thea   i', 'gaget i', 'boun i', 'torn i', 'go i', 'swarm i', 'shr i', 'i hal tarn i', 'i can mead i', 'sfat     i', 'starp  i', 'wright i', 'do i', 'tar i', 'kno  i', 'le i', 'storm i', 'chair i', 'slorm  i', 'bat  i', 'stuble i', 'trape    i', 'she had yor doksu anwic ahe fatthera er i', 'phon  i', 'shet     i', 'paca i', 'se     i', 'stink  i', 'car  i', 'nuting i', 'no   i', 'yet is sevthingthecs  weps the yeslever i', 'usuly mina sevr bon i', 'ftlor   i', 'trae  i', 'bu i', 'we hav ofte nos chim wal more and smol gles i', 'the jugged nontol let i', 'bond  i', 'beat  i', 'ne i', 'wright   i', 'rang   i', 'sip i', 'step     i', 'storm i', 'tube i', 'tow i', 't i', 'glof i', 'form  i', 'florm i', 'brave i', 'born  i', '

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-3500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-3500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-3500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-3500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-2500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow  i', 'fed  i', 'alpha  i', 'pat  i', 'farm  i', 'sirp i', 'dagger i', 'raind i', 'fair i', 'sht  i', 'ship i', 'par    i', 'god fathere leoved to be mo dern anars lenka i', 'buble   i', 'bet  i', 'whi she is nearly notive tw a er lo   i', 'trouble  i', 'chair i', 'raive  i', 'th  i', 'godglet i', 'van   i', 'torn  i', 'gow i', 'swarm i', 'shair  i', 'i le ctaron i', 'i can mede   i', 'shit  i', 'steirp  i', 'right  i', 'dume  i', 'tiep  i', 'known  i', 'leftt i', 'stor  i', 'chair i', 'swarm  i', 'bat  i', 'stuble i', 'trape  i', 'she ad or dok sut  andgiwh vatther a yere  i', 'pown  i', 'shet  i', 'paa i', 'uet  i', 'pik  i', 'car  i', 'knutting  i', 'known  i', 'yt  seve fing futs weps ther yeevr i', 'usue mina several bn i', 'flor    i', 'trainee  i', 'bu   i', 'we have ofe nuse jhim towa more and smo gleas i', 'ver jlou nontl left i', 'bowrn  i', 'bet  i', 'ther  i', 'rihte  i', 'ranke  i', 'sip   i', 'stick  i', 'storm i', 'duble  i', 'down   i', 'fit i', 'glow i', 'form  i

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-4000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-4000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-4000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-4000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-3000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'fed i', 'alpha  i', 'pot  i', 'farm i', 'si i', 'dagger i', 'rain i', 'fair i', 'set     i', 'ship i', 'park i', 'god father lifed to be modarn anas langage i', 'puble i', 'bat    i', 'vi sh as nearly nine t years o i', 'troble i', 'chair i', 'rage i', 'thugh i', 'goadgeet i', 'farn i', 'torn i', 'go i', 'swarm i', 'shaire i', 'ia teon i', 'i cane mead i', 'shite i', 'stirp    i', 'wrighte i', 'no i', 'tair i', 'no i', 'lef i', 'stor i', 'chair i', 'slor i', 'bat    i', 'stuble i', 'trape i', 'she ad your dok shu  in griace ash fater a yer i', 'pon  i', 'shet        i', 'pacha i', 'si i', 'spane   i', 'car  i', 'kneain i', 'kno i', 'yeet hy stethins s thucs wery yuseer i', 'musiminas several bne i', 'flor i', 'trae i', 'bug i', 'we have oft nouge jimtwal more and smo glas i', 'ta jous montal lest i', 'bon i', 'beat   i', 'nw i', 'wright i', 'range i', 'sip i', 'stic i', 'storm i', 'touble i', 'tow i', 'fu i', 'glow i', 'form i', 'florm i', 'brave i', 'born i', 'chair i', 's

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-4500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-4500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-4500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-4500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-3500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'fed i', 'alpha  i', 'pot  i', 'farm i', 'sip i', 'dager i', 'range i', 'fair i', 'st i', 'ship i', 'park i', 'godfather licf to be modernanerslackage i', 'spuble i', 'bit   i', 'whei she snearly notepisarso i', 'trouble i', 'chair i', 'raite i', 'thegh i', 'godglet i', 'varm i', 'torn i', 'go i', 'swarm i', 'share i', 'i atarn i', 'i canmeate i', 'fite i', 'starp     i', 'write i', 'do i', 'tair i', 'no i', 'left i', 'store i', 'chair i', 'swore i', 'bat    i', 'stuble i', 'trope i', 'she had our dok in gdwhwather a year i', 'por   i', 'shet  i', 'paa i', 'e i', 'pake i', 'carm  i', 'ne i', 'no i', 'yet thestethinks  thu wer  ebere i', 'usuinus severoa bnd i', 'flore i', 'traee i', 'bu i', 'we hav oftenoe jump wa more and smok gles i', 'thejug monl left i', 'brn i', 'beat  i', 'de i', 'rite i', 'rage i', 'sip i', 'stick   i', 'storm i', 'tuble i', 'tom i', 'two i', 'glow i', 'form i', 'form i', 'rav i', 'bornm i', 'chair i', 'dark i', 'storm i', 'groat  i', 'swarm   i', 'ho

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-5000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-5000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-5000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-5000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-4000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'fed i', 'alpha  i', 'pot  i', 'farm i', 'sharp i', 'dagoe i', 'range i', 'fair i', 'sht     i', 'ship i', 'park i', 'god fatherslife to be mo drn ans lackage i', 'parble i', 'bit  i', 'whil sh as nearly nocive xis years  ov i', 'trouble i', 'hair i', 'rate i', 'though i', 'godglet i', 'vam i', 'torn i', 'go i', 'swarm i', 'share i', 'i a chairn  i', 'i can meat i', 'shate  i', 'starp  i', 'write i', 'new    i', 'tair i', 'no   i', 'left i', 'stor i', 'chair i', 'swore i', 'bat    i', 'stubble i', 'trap      i', 'she had your toxsiv and griace walse fater al yhere  i', 'form  i', 'sht  i', 'para i', 'sbit   i', 'fk  i', 'car  i', 'kne  i', 'kno  i', 'yeet he stiltinks s thoucs swocly ys av i', 'musua minus several bons i', 'fflor   i', 'trafe    i', 'bou i', 'we have often hoed jiumd wal more and smo glas i', 'there is juges munal left i', 'fon i', 'fet  i', 'd  i', 'rite   i', 'range  i', 'sip i', 'stick i', 'storm i', 'stubble  i', 'down i', 'chwo i', 'glow i', 'form i', '

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-5500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-5500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-5500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-5500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-4500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'fed i', 'alpha  i', 'pat i', 'farm i', 'sip i', 'daggoe i', 'rave i', 'fair i', 'shet i', 'ship i', 'park i', 'godfathher licetd to be sm tearn ansers langage i', 'sprgble i', 'pit  i', 'will she is nearly not teyears o i', 'trouble i', 'chair i', 'raide i', 'theh i', 'goadglet i', 'varm i', 'torn i', 'gow i', 'swarm i', 'share i', 'i  tarn i', 'i can meoat i', 'shet i', 'stirp  i', 'write i', 'down   i', 'tairp i', 'no i', 'left i', 'store i', 'chair i', 'swore i', 'bat  i', 'stouble i', 'trap i', 'she ad your darksut in gitwalh father al year i', 'porn   i', 'shet  i', 'paa i', 'i    i', 'spinke  i', 'car  i', 'knein i', 'no   i', 'yet he stdd trinkstheu  swart ebl i', 'usuly inus several bown i', 'flor  i', 'trave   i', 'boup i', 'we have often  ourgeed jump  wal more and smokegleas i', 'thejuges nonaa left i', 'born i', 'beat   i', 'down    i', 'write i', 'range   i', 'ship i', 'stick i', 'storm i', 'stouble i', 'down i', 'two i', 'glow i', 'form i', 'sorm   i', 'rave i

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-6000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-6000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-6000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-6000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-5000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow  i', 'fed i', 'alpha  i', 'pot  i', 'farm  i', 'sip i', 'dager i', 'range i', 'fair i', 'shet   i', 'ship i', 'park  i', 'godfather liged to be modern aners lankage i', 'spubble   i', 'bit  i', 'wi she as nearly nonciyor to    i', 'trouble  i', 'chair i', 'raid i', 'though  i', 'godget i', 'ban   i', 'corn i', 'gow i', 'swarm i', 'share  i', 'i a tarn i', 'i can meat  i', 'fate  i', 'stirp  i', 'wrighte  i', 'dou  i', 'tair i', 'know  i', 'les i', 'store i', 'chair i', 'swore i', 'bat    i', 'stubble i', 'trap      i', 'she had yor docsut in gdwash fater all year i', 'horn  i', 'shet  i', 'praga    i', 'ssi  i', 'fanke  i', 'car  i', 'kmean  i', 'kno  i', 'yeet hsstthingthes wer  abl i', 'usd reminus several bon i', 'flor   i', 'trane  i', 'wou   i', 'we have often notgeed jhuim more and smoke glas i', 'the jugs muna left i', 'font   i', 'beat  i', 'do  i', 'wrihte    i', 'range  i', 'sip   i', 'stick  i', 'storm i', 'stubble  i', 'down i', 'fwo i', 'glow i', 'form  i', 'florm  

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-6500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-6500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-6500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-6500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-5500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow   i', 'fed i', 'alpha  i', 'pot i', 'farm i', 'sirp i', 'gagger i', 'ranve i', 'fair i', 'shet i', 'ship i', 'park i', 'goad fatherslifet to be my dernanmers lankage i', 'spuble i', 'bit    i', 'wil she is  nearly nocive  yerso i', 'trouble i', 'chair i', 'rave i', 'theugh i', 'godglet i', 'brarn i', 'corn i', 'go i', 'swarm i', 'share i', 'i al ctarn i', 'i can mead i', 'fed   i', 'steirp        i', 'righte i', 'nom   i', 'tair i', 'no i', 'yef i', 'storm i', 'chair i', 'swlore i', 'bat  i', 'stubble i', 'trape i', 'she had your dorksuit in gredswae fater al year i', 'harn  i', 'shet  i', 'praca i', 'sze    i', 'spink   i', 'car  i', 'nonu i', 'no  i', 'yeet he  stil trinks tefss  alyasever i', 'musuly minus several brouwns i', 'flor   i', 'train   i', 'boupe i', 'we have often ure jump fore more and smoke glas i', 'there is jugs nonta left i', 'fborn   i', 'beat   i', 'new  i', 'righte   i', 'range     i', 'sip i', 'stick  i', 'storm i', 'stubble  i', 'down i', 'two i', 'glow 

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-7000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-7000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-7000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-7000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-6000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'fed i', 'alpha  i', 'pat i', 'farm i', 'sirp i', 'daor i', 'rave i', 'fair i', 'shet i', 'sip i', 'par i', 'godfatherslieds to be m jrnaners lankage i', 'sparkle i', 'pit  i', 'wil she yesnearly notc expit yor lo i', 'trouble i', 'chair i', 'rate i', 'thagh i', 'godget i', 'brawn i', 'torn i', 'go i', 'swarm i', 'share i', 'i tron i', 'i can mead i', 'fate      i', 'stirp   i', 'wrighte i', 'dom   i', 'tair i', 'now i', 'lys i', 'store i', 'chair i', 'swore i', 'bat   i', 'stubble i', 'trap i', 'she had your doxsuit and gredware fwater all year i', 'horm  i', 'shet  i', 'paca i', 'selict   i', 'sike   i', 'car  i', 'kning i', 'know  i', 'yeeth stif trink thouhtss swrlyebr i', 'musuly minus several butons i', 'flor i', 'train  i', 'bot i', 'we have often uged jum a more and smoke glas i', 'there juges mun left i', 'forn i', 'beat   i', 'dorm    i', 'right  i', 'range  i', 'sip i', 'stirp i', 'storm i', 'stubble  i', 'down i', 'two i', 'glow i', 'form i', 'florm   i', 'braave

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-7500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-7500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-7500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-7500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-6500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'fed  i', 'alpha  i', 'pat   i', 'farm i', 'si i', 'daggeor i', 'rave i', 'fair i', 'sete    i', 'ship i', 'park i', 'godfatherslicet to be m drnaneurs lankeage i', 'sparkle i', 'bat    i', 'will she etnearly notceth expsel lo i', 'trouble i', 'chair i', 'rave i', 'thaugh   i', 'godglet i', 'bawn i', 'torn i', 'go i', 'swarm i', 'share  i', 'rextearn i', 'i can meate i', 'shate        i', 'starp  i', 'wrigt i', 'no    i', 'tair i', 'no   i', 'lef i', 'store i', 'chair i', 'slore i', 'bat  i', 'stubble i', 'trap       i', 'she had t toxsi and greaswashe father al year i', 'hormn  i', 'shet  i', 'paca i', 'set      i', 'spainke  i', 'car  i', 'knainge i', 'no   i', 'yetthe  stilfying  tectswearlyaleble i', 'musualy minus several brruoton i', 'flor    i', 'train    i', 'bot   i', 'we have often noticged jum to wae more and smoe glas i', 'thejugs muna left i', 'bown i', 'beat  i', 'dorm  i', 'right   i', 'range  i', 'sip   i', 'stick    i', 'storm i', 'doubble  i', 'town i', 'tw

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-8000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-8000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-8000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-8000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-7000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'fed   i', 'alpha  i', 'pot   i', 'farm  i', 'sip i', 'dago i', 'rave i', 'fair i', 'shet  i', 'ship i', 'park i', 'goadfather liget to bad my tran ansers lackgage i', 'spurkle i', 'bit   i', 'will she yst nearly notc expit your tlog i', 'trouble i', 'chair i', 'rate i', 'thegh i', 'godgeet i', 'brawn   i', 'orn i', 'go i', 'swarm i', 'share   i', 'al tant i', 'i can meat i', 'fate    i', 'starp  i', 'right i', 'no  i', 'tair i', 'no   i', 'lef i', 'store i', 'chair i', 'swlor i', 'bat  i', 'stubble i', 'trop      i', 'she had th dcsi and godwae water al year i', 'hoan  i', 'sht  i', 'paca i', 'slipt   i', 'spink  i', 'car  i', 'kneing  i', 'no      i', 'yet he  stilltink thts wrly able i', 'muse onus several btons i', 'flor  i', 'train  i', 'bu   i', 'we have often use jump wal mare and smokelas i', 'ther jugs munal left i', 'boant i', 'beat  i', 'new  i', 'right      i', 'range  i', 'sip   i', 'stic     i', 'storm i', 'sdubble  i', 'down i', 'two i', 'glow i', 'form i', 'f

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-8500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-8500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-8500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-8500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-7500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'sed    i', 'alpha  i', 'pat  i', 'farm  i', 'slip i', 'dago i', 'rave i', 'fair i', 'shet   i', 'slip i', 'park i', 'gadfather liges to beak mo dananswers lackage i', 'purkle    i', 'bet  i', 'wille she yas nearly notces the xpis yolo i', 'trouble i', 'chair i', 'rade  i', 'theugh      i', 'gadget i', 'brawn     i', 'torn i', 'go i', 'swarm i', 'share  i', 'i l tans i', 'i can beate i', 'shate     i', 'steirp  i', 'rought  i', 'no  i', 'tair i', 'no   i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trot     i', 'she had you dxsu in gredwashe water al year i', 'part  i', 'shet  i', 'praca  i', 'sdicpt    i', 'faint  i', 'car  i', 'mane  i', 'no    i', 'yet  hestiilfinkd fts  swarsly aleuble i', 'musu minus several butous i', 'flor  i', 'traince  i', 'bu   i', 'we have often uge jump twa moe and smoke gleas i', 'the juged munt left i', 'bornt i', 'bit  i', 'no  i', 'right  i', 'rage   i', 'sip   i', 'stick  i', 'storm i', 'buble  i', 'down i', 'two i', '

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-9000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-9000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-9000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-9000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-8000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow   i', 'fed  i', 'alpha  i', 'pat  i', 'farm  i', 'sip i', 'daggor i', 'rave i', 'fair i', 'shet   i', 'ship i', 'park i', 'gadfatherligets to bea m ananers lankgage i', 'parkle   i', 'pit  i', 'will sh snearly notcis ti yearsov i', 'trouble  i', 'cchair i', 'rat  i', 'thagh  i', 'gadge i', 'brawn  i', 'torn i', 'go i', 'sworm i', 'share  i', 'i have tarn i', 'i can meoat i', 'shet   i', 'starp    i', 'wright  i', 'knew  i', 'tar i', 'no     i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trot  i', 'she had your dksuit in greaw waterall year i', 'hoat  i', 'shet  i', 'praca  i', 'it     i', 'paint  i', 'car   i', 'knone  i', 'no  i', 'yeetsse frink thots   sworsly aeble i', 'musuay minus several butoon i', 'flor   i', 'train  i', 'bho   i', 'we have often ue jump amoe and smoke gla i', 'thejug mun left i', 'poat     i', 'beat  i', 'knew  i', 'wriht  i', 'range  i', 'sip  i', 'stick  i', 'storm i', 'buble  i', 'town i', 'two i', 'glow i', 'form  i', 'florm  i

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-9500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-9500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-9500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-9500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-8500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'fed  i', 'alpha  i', 'pot   i', 'farm  i', 'sip i', 'daggo i', 'rave i', 'fair i', 'shet i', 'ship i', 'park i', 'god fatherslichts to be my darn ansers lackgage i', 'spurkble i', 'pit  i', 'wll sh ye snearly notcis with yearso i', 'trouble i', 'chair i', 'rat i', 'thegh  i', 'gadget i', 'brawn i', 'torn i', 'go i', 'swarm i', 'share i', 'y i teroncs i', 'i can meat i', 'shate  i', 'starp      i', 'wright i', 'dub   i', 'tair i', 'no i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop     i', 'she had your doksic in ged swe fwater all year i', 'hoat  i', 'shet  i', 'parca i', 'slect  i', 'fite   i', 'car  i', 'knaing i', 'no   i', 'yetthe still triinks tefs swoarsly as ever i', 'usualy minus several bottouns i', 'flor  i', 'trainc   i', 'bo   i', 'we have often urge jump tfo moe and smoke glas i', 'there jugs munt lef i', 'fornt i', 'bight     i', 'do  i', 'wright   i', 'rage  i', 'sip i', 'stick   i', 'storm i', 'stubble  i', 'torm i', 'two i', 'glow

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-10000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-10000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-10000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-10000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-9000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'feed   i', 'alpha  i', 'pot   i', 'farm  i', 'sip i', 'daggoe i', 'rave i', 'fiar i', 'fate i', 'sip i', 'park i', 'godfathers light to be mo dernaners lackgage i', 'sparkble i', 'pit  i', 'wll sh  snearly notce ith your o i', 'trouble i', 'chair i', 'raid  i', 'thugh            i', 'godget i', 'brawn    i', 'torn i', 'go i', 'swarm i', 'share i', 'i  teront i', 'i can meat i', 'shate  i', 'star     i', 'wrighte  i', 'no    i', 'tar i', 'no    i', 'leef i', 'store i', 'chair i', 'slore i', 'bat  i', 'stubble i', 'trop       i', 'she had your darksiut in ged swse fater all year i', 'hart  i', 'sheet  i', 'paca i', 'sebict    i', 'spainge  i', 'car   i', 'knoing i', 'no     i', 'yeet the stelffing thefht ssworsly thas eval i', 'musualy minus several buttons i', 'flor  i', 'train   i', 'b  i', 'we have often urge jumped a more and smoe glas i', 'ther juges muna left i', 'ban i', 'bight   i', 'neow  i', 'wrihe  i', 'rage   i', 'slip i', 'stick   i', 'storm i', 'stubble  i', 'do

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-10500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-10500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-10500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-10500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-9500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'sheed  i', 'alpha  i', 'pot  i', 'farm  i', 'share i', 'daggor i', 'rave i', 'fair i', 'shet     i', 'ship i', 'par i', 'godfathersliget to be m dan aners lackage i', 'sparble i', 'pit   i', 'will sh y nearly notceis with yor ow i', 'trouble i', 'chair i', 'rav  i', 'thagh      i', 'goadget i', 'barwn  i', 'torn i', 'go i', 'swarm i', 'share i', 'tarn i', 'i can meat i', 'shat   i', 'starp         i', 'right i', 'don         i', 'tair i', 'now   i', 'yes i', 'store i', 'chair i', 'flor i', 'bat  i', 'stubble i', 'trop         i', 'she had your dorksit in grewash water all year i', 'hart  i', 'sheet  i', 'paca i', 'eit  i', 'paine  i', 'car   i', 'knoing   i', 'no   i', 'yeethstel taink thouf sworscialy thas eval i', 'musualy minus several butonss i', 'flor  i', 'train   i', 'bot   i', 'we have often urge jumpoal mo and small glas i', 'ther juges muna left i', 'barn i', 'beat  i', 'now  i', 'right     i', 'range    i', 'sip  i', 'stick      i', 'storm i', 'tuble  i', 'dawm i

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-11000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-11000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-11000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-11000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-10000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'sheed i', 'alpha  i', 'pot i', 'farm i', 'share i', 'dagger i', 'rave i', 'fare i', 'shet i', 'ship i', 'park i', 'gadfathersligkets to be my dern answers lackuage i', 'spurble i', 'bit  i', 'will she yetsnearly notcis it yor o i', 'trouble i', 'chair i', 'raive i', 'thagh i', 'goadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'all yor tarn i', 'i can meat i', 'sheat i', 'star i', 'right i', 'dom  i', 'tar i', 'no i', 'yes i', 'store i', 'chair i', 'slore i', 'bat   i', 'stubble i', 'trot i', 'she had your doxsoit in greswa fatter all year i', 'horn    i', 'sheet  i', 'paca i', 'it   i', 'stainke   i', 'car  i', 'knotting i', 'no  i', 'yeet he stlf fhink thoufts  socaly thes evel i', 'usually minus several buotton i', 'flor i', 'train i', 'boot i', 'we have often urge jump fo more and smae glas i', 'there jugs muna left i', 'born i', 'bingt  i', 'knew   i', 'right  i', 'range  i', 'sip i', 'stick  i', 'storm i', 'double i', 'dawm i', 'two i', 'glow i', 'form i',

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-11500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-11500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-11500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-11500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-10500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'sheed i', 'alpha  i', 'pat  i', 'farm i', 'share i', 'dagger i', 'rave i', 'far i', 'sheet i', 'ship i', 'park i', 'goadfatherslickes to be my dern answers lackage i', 'spubble i', 'bit   i', 'will she yas nearly notceis fis yearsold i', 'trouble i', 'chair i', 'raid i', 'shagh i', 'goadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'i tarn i', 'i can mread i', 'shate i', 'starp i', 'right i', 'dom  i', 'tair i', 'no i', 'yes i', 'store i', 'chair i', 'slore i', 'bat  i', 'stubble i', 'trap  i', 'she had your dolksouit in greawash father al year i', 'horn   i', 'sheet   i', 'paca i', 'e  i', 'fainte  i', 'car  i', 'knotting i', 'no  i', 'yet he sstel fhink sefss swoasely thas ever i', 'musually minus several buttoun i', 'flor i', 'train   i', 'bhop i', 'we have often urcge jumpd vall more and smole glas i', 'theur juges nuna left i', 'boarn i', 'beiat  i', 'new    i', 'right    i', 'range  i', 'ship i', 'stick  i', 'storm i', 'double  i', 'dawm i', 'thwo i', 'glo

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-12000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-12000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-12000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-12000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-11000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'seed i', 'alpha  i', 'pot  i', 'farm i', 'ship i', 'dagger i', 'rave i', 'far i', 'sheet i', 'ship i', 'park i', 'goadfatherslights to be m dernanerslackage i', 'bubble i', 'bit    i', 'will she yasnearly notctis pis yearso i', 'trouble i', 'chair i', 'raid i', 'thagh i', 'godget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'a  tarns i', 'i can meat i', 'sheat i', 'starp    i', 'right i', 'no  i', 'tair i', 'know i', 'yes i', 'store i', 'chair i', 'slore i', 'bat    i', 'stubble i', 'trot   i', 'she had you daksouit in greadwash father al year i', 'horn  i', 'sheet  i', 'paca i', 'zere    i', 'painkte   i', 'car  i', 'knoing  i', 'know  i', 'yeetthestl fying thets swarsly haseval i', 'musualy minus several button i', 'flor i', 'trace i', 'bot i', 'we have often  uged jump tova more and small glas i', 'the jugs muna left i', 'born i', 'bigt  i', 'knew         i', 'right i', 'rage  i', 'ship i', 'stick  i', 'storm i', 'double i', 'tam i', 'two i', 'glow i', 'form i',

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-12500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-12500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-12500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-12500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-11500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow i', 'feed  i', 'alpha  i', 'pot  i', 'farm  i', 'ship i', 'dagger i', 'rave i', 'fair i', 'sheet i', 'ship i', 'park i', 'goadfatherslighets to bea my dern answers lackage i', 'spubble i', 'bit  i', 'will she as nearly notcis twit yor o i', 'trouble i', 'chair i', 'rat i', 'though  i', 'goadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'tarn i', 'i can meat i', 'sheet   i', 'starp  i', 'right i', 'dw  i', 'tair i', 'no i', 'yeeft i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop        i', 'she had your doksuit in grewash watter all year i', 'horn  i', 'sheet         i', 'paca i', 'sit  i', 'spaint  i', 'car    i', 'knoing  i', 'kno    i', 'yeethe  stl thrink tets swasly has eval i', 'musually minus several bton i', 'flor   i', 'traine  i', 'bhoop   i', 'we have often urged jump fo more and smo gleas i', 'the jugs munal left i', 'forn i', 'binat  i', 'knew  i', 'right    i', 'range    i', 'sip i', 'stick     i', 'storm i', 'bubble  i', 'tarwm i', 'two

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-13000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-13000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-13000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-13000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-12000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow  i', 'seed   i', 'alpha  i', 'pot  i', 'farm  i', 'share i', 'daggor i', 'rave i', 'far i', 'sheet i', 'ship i', 'park i', 'gadfatherslices to be my dern answers lackage i', 'spubble i', 'pit  i', 'well she as nearly notcis it yorsove i', 'trouble i', 'chair i', 'rat i', 'thagh     i', 'goadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'i  tarn i', 'i can meat i', 'sheat  i', 'star    i', 'right i', 'do     i', 'tair i', 'know  i', 'yeef i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop      i', 'she had your dksouit in goroeod was fater all year i', 'horn  i', 'sheet      i', 'paca i', 'set  i', 'fate    i', 'car     i', 'knoying   i', 'know  i', 'yeetthe stels tfhink sefs swarsely thas evel i', 'musually minus several buttons i', 'flor  i', 'trane    i', 'whoop   i', 'we have often  urged jumpd o more and smow gleas i', 'tou jugs muna left i', 'borm i', 'beigat  i', 'new  i', 'right  i', 'range  i', 'ship  i', 'stick   i', 'storm i', 'dubble  i', 't

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-13500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-13500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-13500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-13500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-12500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow   i', 'feed   i', 'alpha  i', 'pot  i', 'farm  i', 'sip i', 'daggor i', 'rave i', 'fair i', 'sheet i', 'ship i', 'park i', 'goadfathersligkes to be my dernanswers lackguage i', 'spubble i', 'bit  i', 'will she as nearly notcis sist yearsov i', 'trouble i', 'chair i', 'raid i', 'theugh  i', 'gadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'tarn i', 'i can meat i', 'sheat  i', 'starp  i', 'rought   i', 'don    i', 'chair i', 'know  i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop      i', 'she had your darksouit in goroed wash fater all year i', 'horn  i', 'sheet     i', 'paca i', 'set  i', 'spainkte    i', 'car      i', 'knoying   i', 'know      i', 'yeetthe stels thinks selfs swoasly as everl i', 'musually minus several buttons i', 'flor  i', 'traine  i', 'bhop  i', 'we have often uzged jumpd vo more and smow gleas i', 'the jugs smuna left i', 'barn i', 'beigat  i', 'down  i', 'right  i', 'range  i', 'sip  i', 'stick   i', 'storm i', 'doubl

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-14000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-14000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-14000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-14000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-13000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow   i', 'feed    i', 'alpha  i', 'pot  i', 'farm  i', 'shirp i', 'dagor i', 'rave i', 'fare i', 'sheet i', 'ship i', 'par i', 'goadfatherslikets to bea my dern answers lackage i', 'spubble i', 'pit  i', 'will she s snearly notcis tit yearsod i', 'trouble i', 'chair i', 'raid i', 'thagh  i', 'gadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'tarn i', 'i can meat i', 'sheat  i', 'starp   i', 'rought    i', 'do     i', 'cair i', 'now i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop         i', 'she had your darksouit in goroed swas fater all year i', 'horn  i', 'sheet         i', 'paca i', 'set  i', 'staint    i', 'car  i', 'knoying  i', 'now      i', 'yeet the stelthink sefs swroarsly as everl i', 'musually minus several buttons i', 'flor  i', 'train  i', 'bhop  i', 'we have often urzged jumpd  more and smow glas i', 'the jugs muna left i', 'born i', 'bigt  i', 'dorw  i', 'right  i', 'range  i', 'sip   i', 'stick   i', 'storm i', 'bubble  i', 't

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-14500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-14500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-14500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-14500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-13500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow   i', 'feed   i', 'alpha  i', 'pot  i', 'farm  i', 'ship i', 'daggeor i', 'rave i', 'far i', 'sheet i', 'ship i', 'park i', 'godfatherslikets to bea my dernanswers lanckgage i', 'spubble i', 'pit  i', 'will she is nearly notcis it yerso i', 'trouble i', 'chair i', 'raid i', 'theugh  i', 'goadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'tarn i', 'i can meat i', 'sheate    i', 'starp  i', 'rought  i', 'neow    i', 'chair i', 'now i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop     i', 'she had your doksouit in gored wash water all year i', 'horn  i', 'sheet     i', 'paca i', 'set  i', 'stainte  i', 'car       i', 'knoying  i', 'now    i', 'yeet the stillthink sefs swoasly as everl i', 'musually minus several buttons i', 'flor  i', 'train  i', 'bhop   i', 'we have oftenusze jumpd o more and smowe gleas i', 'the jugs muna left i', 'born i', 'beiat  i', 'new    i', 'right  i', 'range  i', 'ship   i', 'stick   i', 'storm i', 'buble  i', 'tarm i

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-15000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-15000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-15000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-15000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-14000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow   i', 'feed   i', 'alpha  i', 'pot  i', 'farm  i', 'ship i', 'daggor i', 'rave i', 'fair i', 'sheet i', 'ship i', 'park i', 'godfathersligkets to bea my dernanswers lankgage i', 'spubble i', 'bit  i', 'will she yis snearly notcis it yors o i', 'trouble i', 'chair i', 'raid i', 'theugh  i', 'goadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'i a tarn i', 'i can meat i', 'sheate  i', 'starp    i', 'rought    i', 'no    i', 'tair i', 'know i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop      i', 'she had your doksouit in gored swash fwater all year i', 'horn  i', 'sheet    i', 'paca i', 'set  i', 'spaine   i', 'car     i', 'knoying   i', 'know   i', 'yeetthe stelthinke sefs swoarsly as eval i', 'musually minus several buttons i', 'flor  i', 'train  i', 'bhoop    i', 'we have oftenuszed jumpd o more and smoe glas i', 'theu jugs smuna left i', 'boarn i', 'beiat  i', 'now   i', 'right  i', 'range   i', 'sip    i', 'stick   i', 'storm i', 'bubble 

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-15500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-15500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-15500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-15500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-14500] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow   i', 'feed   i', 'alpha  i', 'pot  i', 'farm  i', 'sip i', 'dagger i', 'rave i', 'fair i', 'sheet i', 'ship i', 'park i', 'goadfathersligkhts to be my dern answers lankgage i', 'spurgble i', 'bit  i', 'will she yis snearly notcis sit yorso i', 'trouble i', 'chair i', 'raid i', 'thaugh  i', 'goadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'ig a tarn i', 'i can meat i', 'sheate  i', 'starp     i', 'rought  i', 'no    i', 'tair i', 'know i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop      i', 'she had your doksouit in gored swash fwater all year i', 'horn  i', 'sheet     i', 'paca i', 'slect  i', 'spaine    i', 'car    i', 'knoying  i', 'know      i', 'yeet the stel think sefs swoasly as ever i', 'musually minus several buttons i', 'flor   i', 'traine  i', 'bhotp   i', 'we have often uszged jumpd o more and smoe gleas i', 'theue jugs smuna left i', 'boarn i', 'beiaht  i', 'nowm  i', 'right  i', 'range   i', 'sip   i', 'stick    i', 'storm 

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-16000
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-16000/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-16000/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-16000/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-15000] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 714
  Batch size = 8


['grow   i', 'feed   i', 'alpha  i', 'pot  i', 'farm  i', 'sip i', 'daggeor i', 'rave i', 'fair i', 'sheet i', 'ship i', 'park i', 'gadfathers ligkhts to beae my dernanswers lankgage i', 'spurbble i', 'bit  i', 'will she yis snearly notcis it yors o i', 'trouble i', 'chair i', 'raid i', 'thaugh  i', 'goadget i', 'barn i', 'torn i', 'go i', 'swarm i', 'share i', 'ig a tarn i', 'i can meat i', 'sheate  i', 'starp    i', 'rought    i', 'no    i', 'tair i', 'know i', 'yes i', 'store i', 'chair i', 'swore i', 'bat  i', 'stubble i', 'trop       i', 'she had your doksouit in gored swash fwater all year i', 'horn  i', 'sheet       i', 'paca i', 'sect  i', 'spaine    i', 'car      i', 'knoying  i', 'know     i', 'yeet the stel think sefs swoasly as ever i', 'musually minus several buttons i', 'flor  i', 'train  i', 'bhop   i', 'we have often uszged jumpd o more and smowe glas i', 'theu jugs smuna left i', 'born i', 'beat  i', 'nowm   i', 'right  i', 'range   i', 'sip   i', 'stick    i', 'storm 

Saving model checkpoint to torgo_xlsr_finetune-M02-2/checkpoint-16500
Configuration saved in torgo_xlsr_finetune-M02-2/checkpoint-16500/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/checkpoint-16500/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/checkpoint-16500/preprocessor_config.json
Deleting older checkpoint [torgo_xlsr_finetune-M02-2/checkpoint-15500] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=16560, training_loss=1.3274605709573497, metrics={'train_runtime': 24071.3345, 'train_samples_per_second': 5.504, 'train_steps_per_second': 0.688, 'total_flos': 1.2378565275038376e+19, 'train_loss': 1.3274605709573497, 'epoch': 30.0})

In [None]:
# push to trained model to huggingface
# trainer.push_to_hub("yip-i/" + repo_name)
trainer.push_to_hub(repo_name)

Cloning https://huggingface.co/yip-i/torgo_xlsr_finetune-M02-2 into local empty directory.
Saving model checkpoint to torgo_xlsr_finetune-M02-2
Configuration saved in torgo_xlsr_finetune-M02-2/config.json
Model weights saved in torgo_xlsr_finetune-M02-2/pytorch_model.bin
Feature extractor saved in torgo_xlsr_finetune-M02-2/preprocessor_config.json


Upload file pytorch_model.bin:   0%|          | 32.0k/1.18G [00:00<?, ?B/s]

Upload file training_args.bin: 100%|##########| 3.43k/3.43k [00:00<?, ?B/s]

remote: Scanning LFS files of refs/heads/main for validity...        
remote: LFS file scan complete.        
To https://huggingface.co/yip-i/torgo_xlsr_finetune-M02-2
   5d18e70..ad97514  main -> main

remote: LFS file scan complete.        
To https://huggingface.co/yip-i/torgo_xlsr_finetune-M02-2
   5d18e70..ad97514  main -> main

Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'metrics': [{'name': 'Wer', 'type': 'wer', 'value': 1.0790568654646324}]}
To https://huggingface.co/yip-i/torgo_xlsr_finetune-M02-2
   ad97514..613a746  main -> main

   ad97514..613a746  main -> main



'https://huggingface.co/yip-i/torgo_xlsr_finetune-M02-2/commit/ad97514361362b3d5d9ea484568eaca59b9fe17e'

# **Evaluate the model and generate WER**

In [None]:
!pip install evaluate
!pip install jiwer

In [None]:
import re
from datasets import load_dataset, DatasetDict, Dataset, Audio
from huggingface_hub import Repository
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
import torch

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
from tqdm import tqdm
from evaluate import load

In [None]:
# Already present in the training section

'''
def remove_special_characters(batch):
    batch["text"] = re.sub(chars_to_ignore_regex, '', batch["text"]).lower() + " "
    return batch

def prepare_dataset(batch):
    audio = batch["audio"]

    # batched output is "un-batched" to ensure mapping is correct
    batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
    batch["input_length"] = len(batch["input_values"])

    with processor.as_target_processor():
        # batch["labels"] = processor(batch["text"]).input_ids
        batch["labels"] = batch["text"]
        # print(processor(batch["text"]))
    return batch
'''

In [None]:
# Function to be used for prediction
def map_to_result(batch):
    with torch.no_grad():
      input_values = torch.tensor(batch["input_values"], device="cpu").unsqueeze(0)
      logits = model(input_values).logits

    pred_ids = torch.argmax(logits, dim=-1)
    batch["pred_str"] = processor.batch_decode(pred_ids)[0]
    batch["text"] = batch["labels"]

    return batch

In [None]:
def get_result(torgo_dataset):
    pred_str = []
    actual = []
    for i in range(torgo_dataset.num_rows):
      inputs = processor(torgo_dataset[i]["input_values"], sampling_rate=16_000, return_tensors="pt")
      with torch.no_grad():
        logits = model(**inputs).logits
      transcription = processor.batch_decode(logits.numpy()).text
      pred_str.append(transcription[0].lower())
      actual = processor.decode(torgo_dataset[i]["labels"]).text

    return pred_str, actual

In [None]:
# change speaker and model_name (this is the model that was trained and should be on Huggingface)
target_lang="en"  # change to your target lang
speaker = "M02"
model_name = "yip-i/torgo_xlsr_finetune-" + speaker + "-2"

In [None]:
"""Cloning and uploading of modeling files can be done conveniently with the `huggingface_hub`'s `Repository` class.

More information on how to use the `huggingface_hub` to upload any files, please take a look at the [official docs](https://huggingface.co/docs/hub/how-to-upstream).
"""
repo = Repository(local_dir="model_staging", clone_from=model_name)

In [None]:
"""### Processing Data"""


data = load_dataset('csv', data_files='/content/output_modified.csv')
data = data.cast_column("audio", Audio(sampling_rate=16_000))
timit = data['train'].filter(lambda x: x == speaker, input_columns=['speaker_id'])


processor = Wav2Vec2Processor.from_pretrained("model_staging")
model = Wav2Vec2ForCTC.from_pretrained(model_name)

chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"]'



timit = timit.map(remove_special_characters)


timit = timit.map(prepare_dataset, remove_columns=timit.column_names, num_proc=4)
timit = timit.filter(lambda x: x < 25 * processor.feature_extractor.sampling_rate, input_columns=["input_length"])

timit[0]

In [None]:
# Generate the WER
pred_str = []
actual = []
for i in tqdm(range(timit.num_rows)):
  inputs = processor(timit[i]["input_values"], sampling_rate=16_000, return_tensors="pt")
  with torch.no_grad():
    logits = model(**inputs).logits
  transcription = processor.batch_decode(logits.numpy()).text
  pred_str.append(transcription[0].lower())

  actual.append(timit[i]["labels"])

wer_metric = load("wer")
wer = wer_metric.compute(predictions = pred_str, references = actual)
print("WER LOCATION")
print(wer)