### My Tokenizer

archivos tokados con prints modeling_whisper.py, utils.py

In [1]:

import os
import json
import torch
from transformers import PreTrainedTokenizer

class MyCustomTokenizer(PreTrainedTokenizer):
    def __init__(self, vocab_file, **kwargs):
        """
        Args:
            vocab_list: list of str containing the vocabulary (tokens básicos)
        """
        self.bos_token = "<|startoftranscript|>"
        self.eos_token = "<|endoftext|>" #Se utiliza tambien como padding. Creo que tambien como bos_token pero de momento no lo uso como tal
        self.transcribe_token = "<|transcribe|>"
        self.lang_token = "<|ca|>"
        self.notimestamps_token = "<|notimestamps|>"
        self.start_of_prev_token = "<|startofprev|>"

        self.special_tokens = [
            self.bos_token,
            self.eos_token,
            self.transcribe_token,
            self.lang_token,
            self.notimestamps_token,
            self.start_of_prev_token
        ]
        
        self.model_input_names = ["input_ids"]

        with open(vocab_file, encoding="utf-8") as vocab_handle:
            self.vocab = json.load(vocab_handle)

        offset = len(self.vocab)

        for j, token in enumerate(self.special_tokens):
            self.vocab[token] = offset + j

        self.inv_vocab = {i: word for word, i in self.vocab.items()}

        self.do_lower_case = True

        super().__init__(
            errors="replace",
            unk_token=self.eos_token ,
            bos_token=self.eos_token ,
            eos_token=self.eos_token ,
            pad_token=None,
            add_prefix_space=False,
            **kwargs,
        )

    def _tokenize(self, text: str) -> list:
        """
        Tokeniza el texto.(las etiquetas de los audios)
        """
        if self.do_lower_case:
            text = text.lower()
        tokens = text.split(',')
        return tokens

    def encode(self, text: str, add_special_tokens: bool = True) -> list:
        """
        Convierte un string en una lista de IDs.
        Si add_special_tokens es True, añade los tokens especiales correspondientes.
        """
        tokens = self._tokenize(text)
        token_ids = [self._convert_token_to_id(token) for token in tokens]
        if add_special_tokens:
            token_ids = (
                [self.vocab[self.bos_token], self.vocab[self.lang_token], self.vocab[self.transcribe_token], self.vocab[self.notimestamps_token]]
                + token_ids
                + [self.vocab[self.eos_token]]
            )
        return token_ids

    def decode(self, token_ids: list, skip_special_tokens: bool = False) -> str:
        """
        Convierte una lista de IDs en un string.
        Si skip_special_tokens es True, omite los tokens especiales en la cadena resultante.
        """
        tokens = [self._convert_id_to_token(i) for i in token_ids]
        if skip_special_tokens:
            tokens = [t for t in tokens if t not in self.special_tokens]
        return self.convert_tokens_to_string(tokens)

    def _convert_token_to_id(self, token: str) -> int:
        """
        Convierte un token a su ID.
        """
        return self.vocab.get(token)

    def _convert_id_to_token(self, index: int) -> str:
        """
        Convierte un ID a su token correspondiente.
        """
        return self.inv_vocab.get(index)

    def convert_tokens_to_string(self, tokens: list) -> str:
        """
        Convierte una lista de tokens en una cadena de texto.
        """
        return " ".join(tokens)

    def get_vocab(self) -> dict:
        """
        Devuelve el vocabulario completo.
        """
        return self.vocab

    def save_vocabulary(self, save_directory: str, filename_prefix: str = None) -> tuple:
        """
        Guarda el vocabulario en un archivo JSON.
        """
        if not os.path.isdir(save_directory):
            os.makedirs(save_directory)
        vocab_file = os.path.join(
            save_directory,
            (filename_prefix + "-" if filename_prefix else "") + "vocab.json"
        )
        with open(vocab_file, "w", encoding="utf-8") as f:
            json.dump(self.vocab, f, ensure_ascii=False)
        return (vocab_file,)

    def pad(self, lists_input_ids, return_tensors: bool = False):
        """
        Rellena (paddea) una lista de listas de IDs hasta la longitud máxima de la lista.
        Usa -100 como valor de padding, que en el cálculo de la pérdida se ignora.
        """
        max_length = max(len(ids) for ids in lists_input_ids)
        padded_input_ids = [ids + [-100] * (max_length - len(ids)) for ids in lists_input_ids]
        attention_mask = [[1] * len(ids) + [0] * (max_length - len(ids)) for ids in lists_input_ids]
        if return_tensors:
            padded_input_ids = torch.tensor(padded_input_ids)
            attention_mask = torch.tensor(attention_mask)
        return padded_input_ids, attention_mask

    def batch_encode(self, texts: list) -> list:
        """
        Aplica la función encode a una lista de textos.
        """
        return [self.encode(text) for text in texts]

    def batch_decode(self, batch_ids: list, skip_special_tokens: bool = False) -> list:
        """
        Decodifica una lista (batch) de secuencias de IDs a textos.
        Permite omitir tokens especiales si skip_special_tokens es True.
        """
        return [self.decode(ids, skip_special_tokens=skip_special_tokens) for ids in batch_ids]

    def __call__(self, *args, **kwargs):
        return self.encode(*args, **kwargs)
    
    def __len__(self):
        return len(self.vocab)
    
    @property
    def vocab_size(self):
        """
        Devuelve el tamaño del vocabulario.
        """
        return len(self.vocab)



In [2]:
"""
from transformers import WhisperTokenizer

old_tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="catalan", task="transcribe")
class MyWhisperTokenizer(WhisperTokenizer):
    def __init__(self, old_tokenizer: WhisperTokenizer, **kwargs):
        super().__init__(**kwargs)
        self.add_special_tokens({
        'additional_special_tokens': old_tokenizer.special_tokens_map['additional_special_tokens']
        })
        self.do_lower_case = False

    def bpe(self, token):
        return token
    
    def _tokenize(self, text: str) -> list:

        if self.do_lower_case:
            text = text.lower()
        tokens = text.split(',')
        return tokens
    
myt = MyWhisperTokenizer(old_tokenizer, 
                            vocab_file='cat-vocab.json',
                            merges_file='cat-merges.txt',
                            unk_token= '<|endoftext|>',
                            bos_token= '<|endoftext|>',
                            eos_token= '<|endoftext|>',
                            pad_token= None,
                            model_max_length = 36,
                            language='catalan', task='transcribe')


vocab_dict = myt.get_vocab()
for token, idx in vocab_dict.items():
    print(f"'{token}': {idx}")

texto = "agafar,caixa"
tokens = myt.tokenize(texto)
print("Tokens:", tokens)
"""


'\nfrom transformers import WhisperTokenizer\n\nold_tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="catalan", task="transcribe")\nclass MyWhisperTokenizer(WhisperTokenizer):\n    def __init__(self, old_tokenizer: WhisperTokenizer, **kwargs):\n        super().__init__(**kwargs)\n        self.add_special_tokens({\n        \'additional_special_tokens\': old_tokenizer.special_tokens_map[\'additional_special_tokens\']\n        })\n        self.do_lower_case = False\n\n    def bpe(self, token):\n        return token\n\n    def _tokenize(self, text: str) -> list:\n\n        if self.do_lower_case:\n            text = text.lower()\n        tokens = text.split(\',\')\n        return tokens\n\nmyt = MyWhisperTokenizer(old_tokenizer, \n                            vocab_file=\'cat-vocab.json\',\n                            merges_file=\'cat-merges.txt\',\n                            unk_token= \'<|endoftext|>\',\n                            bos_token= \'<|endoftext|>\

### My Data Collator

In [3]:
from dataclasses import dataclass
from typing import Any, Dict, List, Union
import torch

@dataclass
class MyDataCollator:
    feature_extractor: object
    tokenizer: MyCustomTokenizer

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
        input_features = [{"input_features": feature["input_features"]} for feature in features]
        batch = self.feature_extractor.pad(input_features, return_tensors="pt")

        label_features = [feature["labels"] for feature in features]
        labels_batch, attention_mask = self.tokenizer.pad(label_features, return_tensors=True)

        if (labels_batch[:, 0] == self.tokenizer.vocab[self.tokenizer.bos_token]).all().cpu().item():
            labels_batch = labels_batch[:, 1:]

        batch["labels"] = labels_batch
        batch["attention_mask"] = attention_mask
        
        return batch

In [4]:
from transformers import WhisperProcessor

model_id = "openai/whisper-small"
language = "ca"
task = "transcribe"

processor = WhisperProcessor.from_pretrained(model_id, language=language, task=task)

myTokenizer = MyCustomTokenizer(vocab_file="cat-vocab.json")

print(myTokenizer.tokenize("agafar,caixa"))

print(myTokenizer.vocab)

['agafar', 'caixa']
{'agafar': 0, 'llençar': 1, 'deixar': 2, 'caixa': 3, 'pilota': 4, '<|startoftranscript|>': 5, '<|endoftext|>': 6, '<|transcribe|>': 7, '<|ca|>': 8, '<|notimestamps|>': 9, '<|startofprev|>': 10}


### Dades

In [5]:
from datasets import load_from_disk

dataset_dict = load_from_disk("Audios/dataset1")
print(dataset_dict)
print(dataset_dict["train"][0]["tags"])

DatasetDict({
    train: Dataset({
        features: ['audio_sentence', 'audio_rate', 'text_sentence', 'tags'],
        num_rows: 900
    })
    test: Dataset({
        features: ['audio_sentence', 'audio_rate', 'text_sentence', 'tags'],
        num_rows: 100
    })
})
deixar,caixa,llençar,pilota


In [6]:
print(dataset_dict["train"][0])

{'audio_sentence': [-0.0018622098723426461, -0.0026804334484040737, -0.002878882922232151, -0.0030405528377741575, -0.0023102853447198868, -0.0009550179238431156, -0.00019208842422813177, 0.0007619480020366609, 0.0009668144048191607, 4.609959796653129e-05, -0.0013538372004404664, -0.002687052357941866, -0.003382754744961858, -0.0030865101143717766, -0.0024442512076348066, -0.0013947555562481284, 0.0001877207396319136, 0.0010492057772353292, 0.0007689557387493551, -6.646740075666457e-05, -0.0007862519123591483, -0.0019127695122733712, -0.002300682943314314, -0.0012369113974273205, -2.65511389443418e-05, 0.00016243425488937646, 0.00011750951671274379, -7.396738510578871e-05, 0.00045936211245134473, 0.0013620909303426743, 0.00021725607803091407, -0.0014085238799452782, -0.001066819648258388, 0.0005250285612419248, 0.0016032191924750805, 0.0014415960758924484, 0.0011876736534759402, 0.0015043059829622507, 0.0019769794307649136, 0.0014815815957263112, 0.0011155139654874802, 0.00020379698253

In [7]:
def prepare_dataset(batch):
    audio = batch["audio_sentence"]
    sampling_rate = batch["audio_rate"]
    text = batch["tags"]

    batch["input_features"] = processor.feature_extractor(audio, sampling_rate=sampling_rate).input_features[0]
    batch["labels"] = myTokenizer(text)
    return batch

dataset_dict_prepared = dataset_dict.map(prepare_dataset, remove_columns=dataset_dict.column_names["train"], num_proc=4)

Map (num_proc=4):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/100 [00:00<?, ? examples/s]

In [8]:
import numpy as np

print(dataset_dict_prepared)
print(dataset_dict_prepared["train"][0]["labels"])
print(np.array(dataset_dict_prepared["train"][0]["input_features"]).shape)
print(myTokenizer.decode(dataset_dict_prepared["train"][0]["labels"]))
print(myTokenizer.decode(dataset_dict_prepared["train"][0]["labels"], skip_special_tokens=True))

print(dataset_dict_prepared["test"][0]["labels"])
print(np.array(dataset_dict_prepared["test"][0]["input_features"]).shape)
print(myTokenizer.decode(dataset_dict_prepared["test"][0]["labels"]))
print(myTokenizer.decode(dataset_dict_prepared["test"][0]["labels"], skip_special_tokens=True))

DatasetDict({
    train: Dataset({
        features: ['input_features', 'labels'],
        num_rows: 900
    })
    test: Dataset({
        features: ['input_features', 'labels'],
        num_rows: 100
    })
})
[5, 8, 7, 9, 2, 3, 1, 4, 6]
(80, 3000)
<|startoftranscript|> <|ca|> <|transcribe|> <|notimestamps|> deixar caixa llençar pilota <|endoftext|>
deixar caixa llençar pilota
[5, 8, 7, 9, 1, 3, 2, 4, 6]
(80, 3000)
<|startoftranscript|> <|ca|> <|transcribe|> <|notimestamps|> llençar caixa deixar pilota <|endoftext|>
llençar caixa deixar pilota


### Data Collator

In [9]:
my_datacollator = MyDataCollator(feature_extractor=processor.feature_extractor, tokenizer=myTokenizer)

aux = []
for i in range(5):
    aux.append(dataset_dict_prepared["train"][i])
batch = my_datacollator(aux)
print(batch["labels"])
print(batch["input_features"].shape)

tensor([[8, 7, 9, 2, 3, 1, 4, 6],
        [8, 7, 9, 2, 4, 0, 4, 6],
        [8, 7, 9, 1, 4, 2, 3, 6],
        [8, 7, 9, 2, 4, 0, 3, 6],
        [8, 7, 9, 2, 3, 0, 4, 6]])
torch.Size([5, 80, 3000])


### Carregant Model i canviant capes

In [10]:
import torch
from transformers import WhisperForConditionalGeneration

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model = WhisperForConditionalGeneration.from_pretrained(model_id)

max_output_length = len(myTokenizer) * 3

print(max_output_length)

print(model.config)

model.config.bos_token_id = myTokenizer.vocab[myTokenizer.eos_token]
model.config.eos_token_id = myTokenizer.vocab[myTokenizer.eos_token]
model.config.pad_token_id = myTokenizer.vocab[myTokenizer.eos_token]
model.config.decoder_start_token_id = myTokenizer.vocab[myTokenizer.bos_token]
model.config.forced_decoder_ids = None
model.config.begin_suppress_tokens = [myTokenizer.vocab[myTokenizer.eos_token]]
model.config.suppress_tokens = []
model.config.max_length = max_output_length #importante
model.config.max_target_positions = max_output_length 
print(model.config)

model.to("cpu")

model.generation_config.language = "ca"
model.generation_config.task = "transcribe"
model.generation_config.forced_decoder_ids=None


model.generation_config.bos_token_id = model.config.bos_token_id
model.generation_config.eos_token_id = model.config.eos_token_id
model.generation_config.pad_token_id = model.config.pad_token_id
model.generation_config.decoder_start_token_id = model.config.decoder_start_token_id
model.generation_config.begin_suppress_tokens = model.config.begin_suppress_tokens
model.generation_config.suppress_tokens = model.config.suppress_tokens
model.generation_config.no_timestamps_token_id = myTokenizer.vocab[myTokenizer.notimestamps_token]
model.generation_config.prev_sot_token_id = myTokenizer.vocab[myTokenizer.start_of_prev_token]
model.generation_config.lang_to_id[myTokenizer.lang_token] = myTokenizer.vocab[myTokenizer.lang_token]
model.generation_config.task_to_id["transcribe"] = myTokenizer.vocab[myTokenizer.transcribe_token]
model.generation_config.max_length = max_output_length #importante

print(myTokenizer.batch_decode([[5, 8, 7, 9]]))



33
WhisperConfig {
  "_attn_implementation_autoset": true,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "apply_spec_augment": false,
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "classifier_proj_size": 256,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 50257,
  "forced_decoder_ids": [
    [
      1,
      50259
    ],
    [
      2,
      50359
    ],
    [
      3,
      50363
    ]
  ],
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "mask_feature_length": 10,
  "mask_feature_min_masks": 0,
  "mask_feature_prob": 0.0,
  "mask_time_length": 10,
  "mask_time_min_masks": 2,
  "

### Creamos Nueva Capa de embedding y capa lineal final de proyección 

In [11]:
whisper_tokenizer = processor.tokenizer

print(whisper_tokenizer.special_tokens_map)

t_ids = []
for w in myTokenizer.vocab.keys():
    tokenized = whisper_tokenizer.tokenize(w)
    t_ids.append(whisper_tokenizer.convert_tokens_to_ids(tokenized))

print(t_ids)

## embedding
whisper_embedding = model.model.decoder.embed_tokens.weight

my_embedding = []
for i in range(len(t_ids)):
    sum = torch.zeros(whisper_embedding.shape[1])
    for j in t_ids[i]:
        sum += whisper_embedding[j]
    avg = sum / len(t_ids[i])
    my_embedding.append(avg)

my_embedding = torch.stack(my_embedding)


#cambiando la capa de embedding
new_embedding = torch.nn.Embedding(my_embedding.shape[0], my_embedding.shape[1])
new_embedding.weight.data = my_embedding
print(id(model.model.decoder.embed_tokens.weight) == id(model.proj_out.weight)) #weights iguales hay que mantenerlos iguales
model.model.decoder.embed_tokens = new_embedding


#cambiando la capa de proyección
### linear projection
linear_projection = torch.nn.Linear(model.config.d_model, len(myTokenizer), bias=False)
model.proj_out = linear_projection
#Misma referencia de pesos que la capa de embedding
model.proj_out.weight = model.model.decoder.embed_tokens.weight
model.proj_out.weight.requires_grad = False
model.proj_out.weight.requires_grad = True

print(model.proj_out.weight.shape)
print(model.model.decoder.embed_tokens.weight.shape)

#Importante cambiamos el vocab_size de la config del modelo
model.config.vocab_size = len(myTokenizer)

## frezeamos el encoder
model.freeze_encoder()

print(device)
model.to(device)




{'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|endoftext|>', '<|startoftranscript|>', '<|en|>', '<|zh|>', '<|de|>', '<|es|>', '<|ru|>', '<|ko|>', '<|fr|>', '<|ja|>', '<|pt|>', '<|tr|>', '<|pl|>', '<|ca|>', '<|nl|>', '<|ar|>', '<|sv|>', '<|it|>', '<|id|>', '<|hi|>', '<|fi|>', '<|vi|>', '<|he|>', '<|uk|>', '<|el|>', '<|ms|>', '<|cs|>', '<|ro|>', '<|da|>', '<|hu|>', '<|ta|>', '<|no|>', '<|th|>', '<|ur|>', '<|hr|>', '<|bg|>', '<|lt|>', '<|la|>', '<|mi|>', '<|ml|>', '<|cy|>', '<|sk|>', '<|te|>', '<|fa|>', '<|lv|>', '<|bn|>', '<|sr|>', '<|az|>', '<|sl|>', '<|kn|>', '<|et|>', '<|mk|>', '<|br|>', '<|eu|>', '<|is|>', '<|hy|>', '<|ne|>', '<|mn|>', '<|bs|>', '<|kk|>', '<|sq|>', '<|sw|>', '<|gl|>', '<|mr|>', '<|pa|>', '<|si|>', '<|km|>', '<|sn|>', '<|yo|>', '<|so|>', '<|af|>', '<|oc|>', '<|ka|>', '<|be|>', '<|tg|>', '<|sd|>', '<|gu|>', '<|am|>', '<|yi|>', '<|lo|>', '<|uz|>', '<|fo|>', '<|ht|>

WhisperForConditionalGeneration(
  (model): WhisperModel(
    (encoder): WhisperEncoder(
      (conv1): Conv1d(80, 768, kernel_size=(3,), stride=(1,), padding=(1,))
      (conv2): Conv1d(768, 768, kernel_size=(3,), stride=(2,), padding=(1,))
      (embed_positions): Embedding(1500, 768)
      (layers): ModuleList(
        (0-11): 12 x WhisperEncoderLayer(
          (self_attn): WhisperSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=False)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
        

### Trainer 

In [12]:
import evaluate

metric = evaluate.load("wer")

In [13]:
def compute_metrics(pred):
    pred_ids = pred.predictions
    label_ids = pred.label_ids

    # replace -100 with the pad_token_id
    label_ids[label_ids == -100] = model.config.pad_token_id

    # we do not want to group tokens when computing the metrics
    pred_str = myTokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    label_str = myTokenizer.batch_decode(label_ids, skip_special_tokens=True)

    print("predicted: ", pred_str)
    print("labels: ", label_str)

    wer = 100 * metric.compute(predictions=pred_str, references=label_str)

    print("wer in %: ", wer)

    return {"wer": wer}


In [14]:
from transformers import Seq2SeqTrainingArguments

training_args = Seq2SeqTrainingArguments(
    output_dir="whisper-small-ca",  # cambia el nombre del repositorio si lo deseas
    per_device_train_batch_size=16,
    gradient_accumulation_steps=1,  # aumenta de 2x por cada 2x de disminución en el batch size
    learning_rate=1e-4,
    warmup_steps=100,
    num_train_epochs=5,             # Entrenamiento durante 10 epochs
    #gradient_checkpointing=True,
    fp16=True,
    eval_strategy="epoch",     # Evalúa al final de cada epoch
    per_device_eval_batch_size=8,
    predict_with_generate=True,
    logging_steps=25,
    report_to=["tensorboard"],
    metric_for_best_model="wer",
    greater_is_better=False,
    save_strategy="no",  # Esto evitará que el modelo se guarde automáticamente
    save_steps=None  # Si no quieres que se guarde en pasos
)




In [15]:
from transformers import Seq2SeqTrainer

trainer = Seq2SeqTrainer(
    args=training_args,
    model=model,
    train_dataset=dataset_dict_prepared["train"],
    eval_dataset=dataset_dict_prepared["test"],
    data_collator=my_datacollator,
    compute_metrics=compute_metrics,
    processing_class=myTokenizer,
)

### Prueba de evaluación

In [16]:
trainer.train()
print("entrenamiento completado")

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss,Wer
1,0.2232,0.080034,7.837838
2,0.3554,0.029342,2.432432
3,0.0191,0.026362,1.081081
4,0.0022,0.011089,1.351351
5,0.0017,0.003021,0.540541


predicted:  ['llençar caixa deixar caixa', 'agafar caixa llençar caixa', 'llençar caixa deixar caixa', 'llençar pilota', 'agafar caixa agafar pilota', 'llençar caixa', 'agafar pilota', 'deixar pilota llençar caixa', 'agafar caixa', 'deixar pilota llençar pilota', 'agafar pilota agafar caixa', 'llençar caixa', 'llençar pilota', 'llençar pilota agafar pilota', 'deixar caixa llençar pilota', 'llençar pilota', 'llençar pilota agafar caixa', 'llençar pilota deixar caixa', 'deixar pilota', 'agafar caixa agafar pilota', 'agafar caixa llençar pilota', 'deixar caixa llençar pilota', 'agafar pilota deixar pilota', 'llençar caixa llençar caixa', 'llençar caixa agafar pilota', 'deixar pilota llençar pilota', 'llençar pilota', 'agafar caixa deixar pilota', 'agafar pilota', 'deixar pilota llençar pilota', 'deixar caixa llençar pilota', 'llençar pilota', 'llençar caixa deixar caixa', 'agafar caixa deixar caixa', 'agafar pilota deixar caixa', 'llençar pilota agafar caixa', 'deixar caixa llençar caixa'

### Saving model locally

In [17]:
trainer.save_model() 
print("modelo guardado")



modelo guardado


### Saving Model in Huggingface

In [18]:
#token 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
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
trainer.push_to_hub(commit_message="test model", repo_id="whisper-small-ca-intent")
print("modelo subido a huggingface")

NameError: name 'trainer' is not defined