### Ablation Study 1 — Baseline 

This baseline runs plain LoRA fine-tuning on the speech-to-speech translation model.
It includes standard audio → text training, no semantic feedback, no confidence filtering, no context memory, no reinforcement or post-processing.
So it will load model + processor, stream audio–text batches, compute loss and backprop, run validation (Loss / BLEU / WER), save best checkpoints
This serves as the **reference model** for comparing all future ablations.


In [2]:
import sys
import os
import json
import logging
from datetime import datetime
from pathlib import Path
import torch
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from config import config
from dataloader import StreamingDatasetWithContext, collate_fn
from model_setup import load_models
from generation import calculate_wer, calculate_bleu


In [3]:
config.num_epochs = 5
config.checkpoints_dir = "baseline_ablation/checkpoints"
config.logs_dir = "baseline_ablation/logs"

# Ensure target language is set 
if not getattr(config, "target_lang", None):
    config.target_lang = "cym"  # Welsh by default

# Logging setup
for dir_path in [config.checkpoints_dir, config.logs_dir]:
    Path(dir_path).mkdir(parents=True, exist_ok=True)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = Path(config.logs_dir) / f"baseline_{timestamp}.log"
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.FileHandler(log_file, encoding="utf-8"), logging.StreamHandler(sys.stdout)],
)
logger = logging.getLogger("baseline_training")

logger.info("-" * 70)
logger.info("Ablation Study 1 - Baseline Model Training")
logger.info(f"Started: {datetime.now().isoformat()}")
logger.info(f"Target language (tgt_lang): {config.target_lang}")
logger.info("-" * 70)

2025-11-16 11:31:08,065 [INFO] ----------------------------------------------------------------------
2025-11-16 11:31:08,066 [INFO] Ablation Study 1 - Baseline Model Training
2025-11-16 11:31:08,066 [INFO] Started: 2025-11-16T11:31:08.066820
2025-11-16 11:31:08,067 [INFO] Target language (tgt_lang): cym
2025-11-16 11:31:08,068 [INFO] ----------------------------------------------------------------------


In [4]:
class BaselineTrainer:
    def __init__(self):
        logger.info("Loading models...")
        models = load_models()
        self.model = models["s2s_model"]
        self.processor = models["processor"]
        self.model = self.model.to(config.device)
        self.model.eval()
        logger.info("Models loaded and moved to device")

        logger.info("Creating datasets and dataloaders...")
        train_dataset = StreamingDatasetWithContext(split=config.train_split)
        val_dataset = StreamingDatasetWithContext(split=config.val_split)

        # Optionally limit samples if config provides caps
        if getattr(config, "max_train_samples", None):
            logger.info(f"Limiting train samples to {config.max_train_samples}")
        if getattr(config, "max_val_samples", None):
            logger.info(f"Limiting val samples to {config.max_val_samples}")

        self.train_loader = DataLoader(
            train_dataset,
            batch_size=config.batch_size,
            shuffle=True,
            collate_fn=collate_fn,
            drop_last=True,
            num_workers=0,
        )
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            collate_fn=collate_fn,
            num_workers=0,
        )
        logger.info(f"Train batches: {len(self.train_loader)}")
        logger.info(f"Val batches: {len(self.val_loader)}")

        logger.info("Setting up optimizer and scheduler...")
        trainable_params = [p for p in self.model.parameters() if p.requires_grad]
        self.optimizer = AdamW(trainable_params, lr=config.learning_rate, weight_decay=config.weight_decay)
        total_steps = max(1, len(self.train_loader) * config.num_epochs)
        self.scheduler = get_linear_schedule_with_warmup(self.optimizer, config.warmup_steps, total_steps)
        logger.info(f"  Total steps: {total_steps}")

        self.global_step = 0
        self.best_val_loss = float("inf")
        self.best_bleu = 0.0
        self.best_wer = float("inf")

        logger.info("Initialization complete")

    def train_step(self, batch):
        audio = batch["audio"].to(config.device)
        texts = batch["text"]
        try:
            audio_inputs = self.processor(
                audio=audio.cpu().numpy(),
                return_tensors="pt",
                sampling_rate=config.sample_rate,
            )
            audio_inputs = {k: v.to(config.device) for k, v in audio_inputs.items() if isinstance(v, torch.Tensor)}

            text_inputs = self.processor.tokenizer(
                texts, return_tensors="pt", padding=True, truncation=True, max_length=100
            )
            labels = text_inputs["input_ids"].to(config.device)

            outputs = self.model(**audio_inputs, labels=labels)
            loss = outputs.loss

            loss = loss / config.gradient_accumulation_steps
            loss.backward()

            return float(loss.item() * config.gradient_accumulation_steps)
        except Exception as e:
            logger.error(f"Train step error: {e}", exc_info=True)
            return 0.0

    @torch.no_grad()
    def validate(self):
        logger.info("-" * 70)
        logger.info(f"Validation - Step {self.global_step}")
        self.model.eval()

        losses = []
        all_refs = []
        all_hyps = []
        failed_batches = 0
        successful_batches = 0

        for batch_idx, batch in enumerate(tqdm(self.val_loader, desc="Validating")):
            try:
                audio = batch["audio"].to(config.device)
                texts = batch["text"]

                audio_inputs = self.processor(
                    audio=audio.cpu().numpy(),
                    return_tensors="pt",
                    sampling_rate=config.sample_rate,
                )
                audio_inputs = {k: v.to(config.device) for k, v in audio_inputs.items() if isinstance(v, torch.Tensor)}

                text_inputs = self.processor.tokenizer(
                    texts, return_tensors="pt", padding=True, truncation=True, max_length=100
                )
                labels = text_inputs["input_ids"].to(config.device)

                outputs = self.model(**audio_inputs, labels=labels)
                losses.append(float(outputs.loss.item()))

                # Ensure tgt_lang specified and generate_speech explicitly False
                gen_outputs = None
                try:
                    gen_outputs = self.model.generate(
                        **audio_inputs,
                        tgt_lang=config.target_lang,
                        generate_speech=False,
                        max_new_tokens=50,
                        num_beams=3,
                    )
                except Exception as ge:
                    # Log and continue (do not crash validation loop)
                    logger.warning(f"Generation error on batch {batch_idx}: {ge}")
                    failed_batches += 1
                    continue

                # Extract token ids robustly
                if isinstance(gen_outputs, tuple):
                    generated_ids = gen_outputs[0]
                elif hasattr(gen_outputs, "sequences"):
                    generated_ids = gen_outputs.sequences
                else:
                    generated_ids = gen_outputs

                if isinstance(generated_ids, torch.Tensor):
                    if generated_ids.ndim == 3:
                        generated_ids = generated_ids[:, 0, :]
                else:
                    logger.warning(f"Generated ids unexpected type: {type(generated_ids)}")
                    failed_batches += 1
                    continue

                translations = self.processor.batch_decode(generated_ids, skip_special_tokens=True)
                translations = [t.strip() for t in translations]

                all_refs.extend(texts)
                all_hyps.extend(translations)
                successful_batches += 1

            except Exception as e:
                failed_batches += 1
                logger.warning(f"Val batch {batch_idx} error: {e}", exc_info=True)

        if not losses:
            self.model.train()
            logger.warning("No validation loss recorded")
            return float("inf"), 0.0, 1.0

        avg_loss = sum(losses) / len(losses)
        bleu = calculate_bleu(all_refs, all_hyps)
        wer = calculate_wer(all_refs, all_hyps)

        logger.info(f"Validation results: Loss={avg_loss:.4f}, BLEU={bleu:.2f}, WER={wer:.4f}")
        logger.info(f"Successful batches: {successful_batches}/{len(self.val_loader)}; Failed: {failed_batches}")

        # Save best checkpoints
        improved = False
        if avg_loss < self.best_val_loss:
            self.best_val_loss = avg_loss
            self.save_checkpoint("loss")
            improved = True

        if bleu > self.best_bleu:
            self.best_bleu = bleu
            self.save_checkpoint("bleu")
            improved = True

        if wer < self.best_wer:
            self.best_wer = wer
            self.save_checkpoint("wer")
            improved = True

        self.model.train()
        return avg_loss, bleu, wer

    def save_checkpoint(self, metric):
        save_dir = Path(config.checkpoints_dir) / f"best_{metric}"
        save_dir.mkdir(exist_ok=True, parents=True)
        self.model.save_pretrained(save_dir)
        results = {
            "ablation": "baseline",
            "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC"),
            "best_val_loss": float(self.best_val_loss),
            "best_bleu": float(self.best_bleu),
            "best_wer": float(self.best_wer),
            "global_step": self.global_step,
        }
        with open(save_dir / "results.json", "w") as f:
            json.dump(results, f, indent=2)
        logger.info(f"Saved checkpoint for {metric} at {save_dir}")

    def train(self):
        logger.info("TRAINING START")
        logger.info(f"Epochs={config.num_epochs}, Batch size={config.batch_size}, Gradient accumulation={config.gradient_accumulation_steps}")
        for epoch in range(config.num_epochs):
            logger.info(f"Epoch {epoch+1}/{config.num_epochs}")
            progress = tqdm(self.train_loader, desc=f"Epoch {epoch+1}")
            losses = []
            for step, batch in enumerate(progress):
                loss = self.train_step(batch)
                losses.append(loss)

                if (step + 1) % config.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), config.max_grad_norm)
                    self.optimizer.step()
                    self.scheduler.step()
                    self.optimizer.zero_grad()
                    self.global_step += 1
                    if self.global_step % config.clear_cache_steps == 0:
                        torch.cuda.empty_cache()

                avg = sum(losses[-10:]) / min(10, len(losses))
                progress.set_postfix({"loss": f"{avg:.4f}"})

                if self.global_step > 0 and self.global_step % config.eval_steps == 0:
                    self.validate()

            epoch_loss = sum(losses) / len(losses) if losses else 0.0
            logger.info(f"Epoch {epoch+1} complete - Avg Loss: {epoch_loss:.4f}")
            # Run validation at epoch end
            self.validate()

        logger.info("TRAINING COMPLETE")
        logger.info(f"Best Loss: {self.best_val_loss:.4f} | Best BLEU: {self.best_bleu:.2f} | Best WER: {self.best_wer:.4f}")

In [None]:
torch.manual_seed(config.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(config.seed)
    logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    logger.info("Using CPU")

trainer = BaselineTrainer()
trainer.train()

2025-11-16 11:41:20,816 [INFO] Using GPU: NVIDIA GeForce RTX 3060
2025-11-16 11:41:20,816 [INFO] Loading models...
2025-11-16 11:41:20,816 [INFO] 
Loading models...


`torch_dtype` is deprecated! Use `dtype` instead!
Instantiating a decoder SeamlessM4Tv2Attention without passing `layer_idx` is not recommended and will lead to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` when creating this class.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

2025-11-16 11:41:46,311 [INFO]   S2S Trainable: 53,870,592 / 2,363,120,261 (2.28%)
2025-11-16 11:41:46,347 [INFO] Loading ASR model for back-translation...
2025-11-16 11:41:54,812 [INFO] ASR model loaded successfully
2025-11-16 11:41:54,812 [INFO] Loading semantic similarity model...
2025-11-16 11:41:54,828 [INFO] Use pytorch device_name: cuda:0
2025-11-16 11:41:54,828 [INFO] Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
2025-11-16 11:42:02,695 [INFO] Semantic model loaded successfully
2025-11-16 11:42:02,748 [INFO] Models loaded and moved to device
2025-11-16 11:42:02,748 [INFO] Creating datasets and dataloaders...
2025-11-16 11:42:03,845 [INFO]   1507 files → 3371 chunks (all)
2025-11-16 11:42:03,865 [INFO]   50 files → 98 chunks (dev)
2025-11-16 11:42:03,865 [INFO] Limiting val samples to 50
2025-11-16 11:42:03,865 [INFO] Train batches: 3371
2025-11-16 11:42:03,865 [INFO] Val batches: 98
2025-11-16 11:42:03,865 [INFO] Setting up opt

Epoch 1:   0%|          | 0/3371 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 11:52:05,767 [INFO] ----------------------------------------------------------------------
2025-11-16 11:52:05,768 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 11:53:29,498 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 11:53:29,498 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 11:53:30,660 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 11:53:31,461 [INFO] Saved checkpoint for bleu at baseline_ablation\checkpoints\best_bleu
2025-11-16 11:53:32,261 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 11:53:32,662 [INFO] ----------------------------------------------------------------------
2025-11-16 11:53:32,662 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 11:54:49,315 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 11:54:49,315 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 11:54:49,528 [INFO] ----------------------------------------------------------------------
2025-11-16 11:54:49,528 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 11:56:05,779 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 11:56:05,780 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 11:56:05,994 [INFO] ----------------------------------------------------------------------
2025-11-16 11:56:05,994 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 11:57:23,603 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 11:57:23,603 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 11:57:23,794 [INFO] ----------------------------------------------------------------------
2025-11-16 11:57:23,794 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 11:58:42,063 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 11:58:42,063 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 11:58:42,278 [INFO] ----------------------------------------------------------------------
2025-11-16 11:58:42,278 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 11:59:59,945 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 11:59:59,945 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:00:00,147 [INFO] ----------------------------------------------------------------------
2025-11-16 12:00:00,147 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:01:17,647 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:01:17,647 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:01:17,846 [INFO] ----------------------------------------------------------------------
2025-11-16 12:01:17,861 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:02:34,694 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:02:34,694 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:02:34,911 [INFO] ----------------------------------------------------------------------
2025-11-16 12:02:34,911 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:03:51,981 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:03:51,982 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:03:52,179 [INFO] ----------------------------------------------------------------------
2025-11-16 12:03:52,179 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:05:09,144 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:05:09,144 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:05:09,362 [INFO] ----------------------------------------------------------------------
2025-11-16 12:05:09,362 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:06:26,261 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:06:26,261 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:06:26,477 [INFO] ----------------------------------------------------------------------
2025-11-16 12:06:26,477 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:07:42,620 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:07:42,620 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:07:42,811 [INFO] ----------------------------------------------------------------------
2025-11-16 12:07:42,811 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:08:59,928 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:08:59,928 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:09:00,144 [INFO] ----------------------------------------------------------------------
2025-11-16 12:09:00,144 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:10:18,211 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:10:18,211 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:10:18,438 [INFO] ----------------------------------------------------------------------
2025-11-16 12:10:18,440 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:11:34,900 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:11:34,900 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:11:35,130 [INFO] ----------------------------------------------------------------------
2025-11-16 12:11:35,131 [INFO] Validation - Step 100


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:12:55,624 [INFO] Validation results: Loss=3.4819, BLEU=42.73, WER=0.9911
2025-11-16 12:12:55,625 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:19:39,066 [INFO] ----------------------------------------------------------------------
2025-11-16 12:19:39,066 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:21:00,744 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:21:00,744 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 12:21:02,161 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 12:21:03,144 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:21:03,510 [INFO] ----------------------------------------------------------------------
2025-11-16 12:21:03,510 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:22:18,661 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:22:18,661 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:22:18,859 [INFO] ----------------------------------------------------------------------
2025-11-16 12:22:18,859 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:23:34,228 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:23:34,228 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:23:34,461 [INFO] ----------------------------------------------------------------------
2025-11-16 12:23:34,461 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:24:52,344 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:24:52,344 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:24:52,544 [INFO] ----------------------------------------------------------------------
2025-11-16 12:24:52,544 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:26:08,244 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:26:08,244 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:26:08,444 [INFO] ----------------------------------------------------------------------
2025-11-16 12:26:08,444 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:27:22,944 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:27:22,944 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:27:23,144 [INFO] ----------------------------------------------------------------------
2025-11-16 12:27:23,144 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:28:38,344 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:28:38,344 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:28:38,527 [INFO] ----------------------------------------------------------------------
2025-11-16 12:28:38,527 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:29:51,261 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:29:51,276 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:29:51,444 [INFO] ----------------------------------------------------------------------
2025-11-16 12:29:51,444 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:31:06,214 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:31:06,214 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:31:06,427 [INFO] ----------------------------------------------------------------------
2025-11-16 12:31:06,427 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:32:19,795 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:32:19,796 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:32:20,002 [INFO] ----------------------------------------------------------------------
2025-11-16 12:32:20,003 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:33:35,260 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:33:35,260 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:33:35,444 [INFO] ----------------------------------------------------------------------
2025-11-16 12:33:35,444 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:34:50,810 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:34:50,810 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:34:51,027 [INFO] ----------------------------------------------------------------------
2025-11-16 12:34:51,027 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:36:04,744 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:36:04,744 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:36:04,960 [INFO] ----------------------------------------------------------------------
2025-11-16 12:36:04,960 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:37:18,594 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:37:18,594 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:37:18,777 [INFO] ----------------------------------------------------------------------
2025-11-16 12:37:18,777 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:38:32,742 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:38:32,743 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:38:32,940 [INFO] ----------------------------------------------------------------------
2025-11-16 12:38:32,941 [INFO] Validation - Step 200


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:39:46,634 [INFO] Validation results: Loss=2.7254, BLEU=14.06, WER=0.9554
2025-11-16 12:39:46,635 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:40:16,150 [INFO] Epoch 1 complete - Avg Loss: 5.8731
2025-11-16 12:40:16,152 [INFO] ----------------------------------------------------------------------
2025-11-16 12:40:16,152 [INFO] Validation - Step 210


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:41:28,910 [INFO] Validation results: Loss=2.6897, BLEU=14.06, WER=0.9464
2025-11-16 12:41:28,910 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 12:41:30,033 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 12:41:30,846 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer
2025-11-16 12:41:30,860 [INFO] Epoch 2/5


Epoch 2:   0%|          | 0/3371 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:45:59,560 [INFO] ----------------------------------------------------------------------
2025-11-16 12:45:59,560 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:47:21,543 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:47:21,543 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 12:47:22,623 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 12:47:23,493 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:47:23,893 [INFO] ----------------------------------------------------------------------
2025-11-16 12:47:23,893 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:48:37,977 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:48:37,977 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:48:38,160 [INFO] ----------------------------------------------------------------------
2025-11-16 12:48:38,160 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:49:51,710 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:49:51,710 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:49:51,893 [INFO] ----------------------------------------------------------------------
2025-11-16 12:49:51,893 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:51:06,893 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:51:06,893 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:51:07,110 [INFO] ----------------------------------------------------------------------
2025-11-16 12:51:07,110 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:52:22,544 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:52:22,544 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:52:22,736 [INFO] ----------------------------------------------------------------------
2025-11-16 12:52:22,737 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:53:36,193 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:53:36,209 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:53:36,395 [INFO] ----------------------------------------------------------------------
2025-11-16 12:53:36,395 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:54:50,544 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:54:50,544 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:54:50,735 [INFO] ----------------------------------------------------------------------
2025-11-16 12:54:50,736 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:56:03,959 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:56:03,960 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:56:04,143 [INFO] ----------------------------------------------------------------------
2025-11-16 12:56:04,143 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:57:19,044 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:57:19,044 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:57:19,244 [INFO] ----------------------------------------------------------------------
2025-11-16 12:57:19,244 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:58:34,237 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:58:34,238 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:58:34,436 [INFO] ----------------------------------------------------------------------
2025-11-16 12:58:34,437 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 12:59:47,535 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 12:59:47,535 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 12:59:47,710 [INFO] ----------------------------------------------------------------------
2025-11-16 12:59:47,710 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:01:01,949 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 13:01:01,949 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:01:02,143 [INFO] ----------------------------------------------------------------------
2025-11-16 13:01:02,143 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:02:17,114 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 13:02:17,115 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:02:17,325 [INFO] ----------------------------------------------------------------------
2025-11-16 13:02:17,325 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:03:30,041 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 13:03:30,041 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:03:30,237 [INFO] ----------------------------------------------------------------------
2025-11-16 13:03:30,238 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:04:43,606 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 13:04:43,606 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:04:43,809 [INFO] ----------------------------------------------------------------------
2025-11-16 13:04:43,809 [INFO] Validation - Step 300


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:05:55,974 [INFO] Validation results: Loss=2.4921, BLEU=42.73, WER=0.9018
2025-11-16 13:05:55,975 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:10:56,493 [INFO] ----------------------------------------------------------------------
2025-11-16 13:10:56,493 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:12:15,075 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:12:15,075 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 13:12:16,136 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 13:12:16,955 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:12:17,356 [INFO] ----------------------------------------------------------------------
2025-11-16 13:12:17,357 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:13:28,690 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:13:28,690 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:13:28,886 [INFO] ----------------------------------------------------------------------
2025-11-16 13:13:28,886 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:14:40,297 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:14:40,298 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:14:40,491 [INFO] ----------------------------------------------------------------------
2025-11-16 13:14:40,491 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:15:52,537 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:15:52,537 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:15:52,756 [INFO] ----------------------------------------------------------------------
2025-11-16 13:15:52,757 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:17:06,020 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:17:06,021 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:17:06,190 [INFO] ----------------------------------------------------------------------
2025-11-16 13:17:06,192 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:18:19,073 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:18:19,073 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:18:19,286 [INFO] ----------------------------------------------------------------------
2025-11-16 13:18:19,286 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:19:30,164 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:19:30,165 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:19:30,336 [INFO] ----------------------------------------------------------------------
2025-11-16 13:19:30,337 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:20:42,569 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:20:42,570 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:20:42,746 [INFO] ----------------------------------------------------------------------
2025-11-16 13:20:42,747 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:21:54,258 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:21:54,258 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:21:54,475 [INFO] ----------------------------------------------------------------------
2025-11-16 13:21:54,475 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:23:07,298 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:23:07,299 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:23:07,507 [INFO] ----------------------------------------------------------------------
2025-11-16 13:23:07,508 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:24:20,207 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:24:20,207 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:24:20,405 [INFO] ----------------------------------------------------------------------
2025-11-16 13:24:20,406 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:25:32,523 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:25:32,540 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:25:32,728 [INFO] ----------------------------------------------------------------------
2025-11-16 13:25:32,729 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:26:45,326 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:26:45,326 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:26:45,545 [INFO] ----------------------------------------------------------------------
2025-11-16 13:26:45,545 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:27:59,035 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:27:59,035 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:27:59,205 [INFO] ----------------------------------------------------------------------
2025-11-16 13:27:59,205 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:29:11,583 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:29:11,583 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:29:11,772 [INFO] ----------------------------------------------------------------------
2025-11-16 13:29:11,773 [INFO] Validation - Step 400


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:30:24,784 [INFO] Validation results: Loss=2.3770, BLEU=42.73, WER=0.8542
2025-11-16 13:30:24,785 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:31:23,706 [INFO] Epoch 2 complete - Avg Loss: 3.3206
2025-11-16 13:31:23,706 [INFO] ----------------------------------------------------------------------
2025-11-16 13:31:23,706 [INFO] Validation - Step 420


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:32:36,626 [INFO] Validation results: Loss=2.3771, BLEU=42.73, WER=0.8244
2025-11-16 13:32:36,626 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 13:32:37,691 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer
2025-11-16 13:32:37,707 [INFO] Epoch 3/5


Epoch 3:   0%|          | 0/3371 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:36:34,209 [INFO] ----------------------------------------------------------------------
2025-11-16 13:36:34,209 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:37:50,236 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:37:50,237 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 13:37:51,406 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 13:37:52,251 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:37:52,659 [INFO] ----------------------------------------------------------------------
2025-11-16 13:37:52,659 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:39:05,259 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:39:05,259 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:39:05,494 [INFO] ----------------------------------------------------------------------
2025-11-16 13:39:05,495 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:40:19,484 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:40:19,485 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:40:19,639 [INFO] ----------------------------------------------------------------------
2025-11-16 13:40:19,639 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:41:30,805 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:41:30,805 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:41:31,009 [INFO] ----------------------------------------------------------------------
2025-11-16 13:41:31,009 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:42:43,235 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:42:43,236 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:42:43,425 [INFO] ----------------------------------------------------------------------
2025-11-16 13:42:43,425 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:43:54,372 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:43:54,372 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:43:54,591 [INFO] ----------------------------------------------------------------------
2025-11-16 13:43:54,592 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:45:08,441 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:45:08,441 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:45:08,635 [INFO] ----------------------------------------------------------------------
2025-11-16 13:45:08,635 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:46:22,459 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:46:22,459 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:46:22,694 [INFO] ----------------------------------------------------------------------
2025-11-16 13:46:22,694 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:47:34,139 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:47:34,139 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:47:34,338 [INFO] ----------------------------------------------------------------------
2025-11-16 13:47:34,339 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:48:46,475 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:48:46,475 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:48:46,676 [INFO] ----------------------------------------------------------------------
2025-11-16 13:48:46,677 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:49:57,593 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:49:57,593 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:49:57,791 [INFO] ----------------------------------------------------------------------
2025-11-16 13:49:57,792 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:51:10,140 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:51:10,140 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:51:10,341 [INFO] ----------------------------------------------------------------------
2025-11-16 13:51:10,341 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:52:24,442 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:52:24,442 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:52:24,642 [INFO] ----------------------------------------------------------------------
2025-11-16 13:52:24,642 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:53:34,928 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:53:34,929 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:53:35,103 [INFO] ----------------------------------------------------------------------
2025-11-16 13:53:35,104 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:54:48,235 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:54:48,237 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 13:54:48,405 [INFO] ----------------------------------------------------------------------
2025-11-16 13:54:48,405 [INFO] Validation - Step 500


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 13:55:58,991 [INFO] Validation results: Loss=2.3277, BLEU=42.73, WER=0.8239
2025-11-16 13:55:58,991 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:00:56,970 [INFO] ----------------------------------------------------------------------
2025-11-16 14:00:56,971 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:02:17,057 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:02:17,057 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 14:02:19,878 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 14:02:20,725 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:02:21,252 [INFO] ----------------------------------------------------------------------
2025-11-16 14:02:21,252 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:03:33,688 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:03:33,688 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:03:33,884 [INFO] ----------------------------------------------------------------------
2025-11-16 14:03:33,885 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:04:47,287 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:04:47,288 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:04:47,478 [INFO] ----------------------------------------------------------------------
2025-11-16 14:04:47,479 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:05:59,691 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:05:59,692 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:05:59,914 [INFO] ----------------------------------------------------------------------
2025-11-16 14:05:59,914 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:07:14,559 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:07:14,559 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:07:14,755 [INFO] ----------------------------------------------------------------------
2025-11-16 14:07:14,756 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:08:27,454 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:08:27,455 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:08:27,651 [INFO] ----------------------------------------------------------------------
2025-11-16 14:08:27,653 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:09:39,439 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:09:39,439 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:09:39,646 [INFO] ----------------------------------------------------------------------
2025-11-16 14:09:39,647 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:10:52,853 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:10:52,853 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:10:53,041 [INFO] ----------------------------------------------------------------------
2025-11-16 14:10:53,041 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:12:04,890 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:12:04,892 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:12:05,121 [INFO] ----------------------------------------------------------------------
2025-11-16 14:12:05,121 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:13:17,628 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:13:17,628 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:13:17,835 [INFO] ----------------------------------------------------------------------
2025-11-16 14:13:17,836 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:14:30,723 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:14:30,723 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:14:30,917 [INFO] ----------------------------------------------------------------------
2025-11-16 14:14:30,918 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:15:43,207 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:15:43,207 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:15:43,406 [INFO] ----------------------------------------------------------------------
2025-11-16 14:15:43,407 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:16:56,932 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:16:56,933 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:16:57,156 [INFO] ----------------------------------------------------------------------
2025-11-16 14:16:57,156 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:18:09,572 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:18:09,572 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:18:09,822 [INFO] ----------------------------------------------------------------------
2025-11-16 14:18:09,822 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:19:21,199 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:19:21,200 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:19:21,386 [INFO] ----------------------------------------------------------------------
2025-11-16 14:19:21,387 [INFO] Validation - Step 600


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:20:33,774 [INFO] Validation results: Loss=2.2615, BLEU=42.73, WER=0.8095
2025-11-16 14:20:33,774 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:22:02,857 [INFO] Epoch 3 complete - Avg Loss: 2.8497
2025-11-16 14:22:02,857 [INFO] ----------------------------------------------------------------------
2025-11-16 14:22:02,858 [INFO] Validation - Step 630


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:23:16,123 [INFO] Validation results: Loss=2.2487, BLEU=42.73, WER=0.8006
2025-11-16 14:23:16,123 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 14:23:17,173 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 14:23:17,972 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer
2025-11-16 14:23:18,001 [INFO] Epoch 4/5


Epoch 4:   0%|          | 0/3371 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:26:45,974 [INFO] ----------------------------------------------------------------------
2025-11-16 14:26:45,974 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:28:04,399 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:28:04,399 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 14:28:05,527 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 14:28:06,431 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:28:06,823 [INFO] ----------------------------------------------------------------------
2025-11-16 14:28:06,823 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:29:17,638 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:29:17,638 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:29:17,808 [INFO] ----------------------------------------------------------------------
2025-11-16 14:29:17,808 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:30:29,323 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:30:29,323 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:30:29,524 [INFO] ----------------------------------------------------------------------
2025-11-16 14:30:29,524 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:31:39,933 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:31:39,935 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:31:40,121 [INFO] ----------------------------------------------------------------------
2025-11-16 14:31:40,121 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:32:51,588 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:32:51,588 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:32:51,827 [INFO] ----------------------------------------------------------------------
2025-11-16 14:32:51,828 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:34:03,124 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:34:03,124 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:34:03,336 [INFO] ----------------------------------------------------------------------
2025-11-16 14:34:03,336 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:35:14,052 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:35:14,053 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:35:14,221 [INFO] ----------------------------------------------------------------------
2025-11-16 14:35:14,221 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:36:26,245 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:36:26,246 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:36:26,422 [INFO] ----------------------------------------------------------------------
2025-11-16 14:36:26,422 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:37:35,071 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:37:35,071 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:37:35,253 [INFO] ----------------------------------------------------------------------
2025-11-16 14:37:35,253 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:38:45,639 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:38:45,639 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:38:45,834 [INFO] ----------------------------------------------------------------------
2025-11-16 14:38:45,834 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:39:56,156 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:39:56,156 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:39:56,365 [INFO] ----------------------------------------------------------------------
2025-11-16 14:39:56,365 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:41:07,755 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:41:07,755 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:41:07,955 [INFO] ----------------------------------------------------------------------
2025-11-16 14:41:07,955 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:42:19,378 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:42:19,378 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:42:19,577 [INFO] ----------------------------------------------------------------------
2025-11-16 14:42:19,578 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:43:29,794 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:43:29,795 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:43:29,967 [INFO] ----------------------------------------------------------------------
2025-11-16 14:43:29,967 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:44:42,091 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:44:42,091 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:44:42,267 [INFO] ----------------------------------------------------------------------
2025-11-16 14:44:42,268 [INFO] Validation - Step 700


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:45:52,170 [INFO] Validation results: Loss=2.2063, BLEU=42.73, WER=0.7917
2025-11-16 14:45:52,170 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:50:42,354 [INFO] ----------------------------------------------------------------------
2025-11-16 14:50:42,354 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:51:59,892 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 14:51:59,893 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 14:52:00,958 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 14:52:01,772 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:52:02,191 [INFO] ----------------------------------------------------------------------
2025-11-16 14:52:02,191 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:53:13,071 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 14:53:13,071 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:53:13,286 [INFO] ----------------------------------------------------------------------
2025-11-16 14:53:13,297 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:54:24,223 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 14:54:24,223 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:54:24,471 [INFO] ----------------------------------------------------------------------
2025-11-16 14:54:24,471 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:55:33,257 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 14:55:33,257 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:55:33,505 [INFO] ----------------------------------------------------------------------
2025-11-16 14:55:33,505 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:56:45,254 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 14:56:45,254 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:56:45,495 [INFO] ----------------------------------------------------------------------
2025-11-16 14:56:45,496 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:57:55,719 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 14:57:55,719 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:57:56,016 [INFO] ----------------------------------------------------------------------
2025-11-16 14:57:56,016 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 14:59:07,458 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 14:59:07,459 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 14:59:07,678 [INFO] ----------------------------------------------------------------------
2025-11-16 14:59:07,679 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:00:19,089 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:00:19,089 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:00:19,336 [INFO] ----------------------------------------------------------------------
2025-11-16 15:00:19,336 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:01:29,896 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:01:29,896 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:01:30,129 [INFO] ----------------------------------------------------------------------
2025-11-16 15:01:30,130 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:02:41,487 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:02:41,487 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:02:41,770 [INFO] ----------------------------------------------------------------------
2025-11-16 15:02:41,770 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:03:51,519 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:03:51,519 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:03:51,753 [INFO] ----------------------------------------------------------------------
2025-11-16 15:03:51,753 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:05:03,423 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:05:03,423 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:05:03,655 [INFO] ----------------------------------------------------------------------
2025-11-16 15:05:03,655 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:06:14,337 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:06:14,337 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:06:14,589 [INFO] ----------------------------------------------------------------------
2025-11-16 15:06:14,589 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:07:23,602 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:07:23,602 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:07:23,856 [INFO] ----------------------------------------------------------------------
2025-11-16 15:07:23,859 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:08:34,651 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:08:34,652 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:08:34,860 [INFO] ----------------------------------------------------------------------
2025-11-16 15:08:34,860 [INFO] Validation - Step 800


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:09:43,736 [INFO] Validation results: Loss=2.1626, BLEU=42.73, WER=0.7827
2025-11-16 15:09:43,752 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:11:37,673 [INFO] Epoch 4 complete - Avg Loss: 2.6347
2025-11-16 15:11:37,673 [INFO] ----------------------------------------------------------------------
2025-11-16 15:11:37,673 [INFO] Validation - Step 840


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:12:50,331 [INFO] Validation results: Loss=2.1507, BLEU=42.73, WER=0.7649
2025-11-16 15:12:50,332 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 15:12:51,387 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 15:12:52,195 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer
2025-11-16 15:12:52,207 [INFO] Epoch 5/5


Epoch 5:   0%|          | 0/3371 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:15:49,007 [INFO] ----------------------------------------------------------------------
2025-11-16 15:15:49,007 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:17:09,171 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:17:09,171 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 15:17:10,270 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:17:10,672 [INFO] ----------------------------------------------------------------------
2025-11-16 15:17:10,672 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:18:24,984 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:18:24,985 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:18:25,169 [INFO] ----------------------------------------------------------------------
2025-11-16 15:18:25,169 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:19:37,857 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:19:37,857 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:19:38,033 [INFO] ----------------------------------------------------------------------
2025-11-16 15:19:38,033 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:20:51,985 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:20:51,985 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:20:52,186 [INFO] ----------------------------------------------------------------------
2025-11-16 15:20:52,186 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:22:04,862 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:22:04,862 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:22:05,044 [INFO] ----------------------------------------------------------------------
2025-11-16 15:22:05,044 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:23:18,353 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:23:18,353 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:23:18,533 [INFO] ----------------------------------------------------------------------
2025-11-16 15:23:18,534 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:24:32,058 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:24:32,059 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:24:32,236 [INFO] ----------------------------------------------------------------------
2025-11-16 15:24:32,236 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:25:43,170 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:25:43,170 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:25:43,367 [INFO] ----------------------------------------------------------------------
2025-11-16 15:25:43,367 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:26:57,071 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:26:57,071 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:26:57,272 [INFO] ----------------------------------------------------------------------
2025-11-16 15:26:57,275 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:28:10,969 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:28:10,969 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:28:11,140 [INFO] ----------------------------------------------------------------------
2025-11-16 15:28:11,140 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:29:23,955 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:29:23,955 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:29:24,129 [INFO] ----------------------------------------------------------------------
2025-11-16 15:29:24,129 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:30:38,357 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:30:38,357 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:30:38,540 [INFO] ----------------------------------------------------------------------
2025-11-16 15:30:38,540 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:31:52,173 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:31:52,173 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:31:52,348 [INFO] ----------------------------------------------------------------------
2025-11-16 15:31:52,348 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:33:07,714 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:33:07,716 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:33:07,890 [INFO] ----------------------------------------------------------------------
2025-11-16 15:33:07,890 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:34:23,141 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:34:23,157 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:34:23,323 [INFO] ----------------------------------------------------------------------
2025-11-16 15:34:23,323 [INFO] Validation - Step 900


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:35:36,407 [INFO] Validation results: Loss=2.1045, BLEU=42.73, WER=0.7857
2025-11-16 15:35:36,407 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:40:34,257 [INFO] ----------------------------------------------------------------------
2025-11-16 15:40:34,257 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:41:53,029 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:41:53,029 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 15:41:54,110 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 15:41:54,957 [INFO] Saved checkpoint for wer at baseline_ablation\checkpoints\best_wer


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:41:55,357 [INFO] ----------------------------------------------------------------------
2025-11-16 15:41:55,357 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:43:08,673 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:43:08,673 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:43:08,890 [INFO] ----------------------------------------------------------------------
2025-11-16 15:43:08,890 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:44:21,457 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:44:21,457 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:44:21,692 [INFO] ----------------------------------------------------------------------
2025-11-16 15:44:21,692 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:45:34,856 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:45:34,857 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:45:35,057 [INFO] ----------------------------------------------------------------------
2025-11-16 15:45:35,057 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:46:49,378 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:46:49,378 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:46:49,572 [INFO] ----------------------------------------------------------------------
2025-11-16 15:46:49,573 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:48:01,092 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:48:01,092 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:48:01,273 [INFO] ----------------------------------------------------------------------
2025-11-16 15:48:01,273 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:49:13,855 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:49:13,857 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:49:14,073 [INFO] ----------------------------------------------------------------------
2025-11-16 15:49:14,073 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:50:26,502 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:50:26,502 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:50:26,709 [INFO] ----------------------------------------------------------------------
2025-11-16 15:50:26,709 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:51:39,256 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:51:39,256 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:51:39,423 [INFO] ----------------------------------------------------------------------
2025-11-16 15:51:39,423 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:52:52,073 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:52:52,073 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:52:52,260 [INFO] ----------------------------------------------------------------------
2025-11-16 15:52:52,260 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:54:04,145 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:54:04,145 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:54:04,341 [INFO] ----------------------------------------------------------------------
2025-11-16 15:54:04,342 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:55:16,755 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:55:16,755 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:55:16,956 [INFO] ----------------------------------------------------------------------
2025-11-16 15:55:16,956 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:56:29,556 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:56:29,556 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:56:29,756 [INFO] ----------------------------------------------------------------------
2025-11-16 15:56:29,756 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:57:42,706 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:57:42,706 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:57:42,906 [INFO] ----------------------------------------------------------------------
2025-11-16 15:57:42,906 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 15:59:17,807 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 15:59:17,807 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.


2025-11-16 15:59:18,359 [INFO] ----------------------------------------------------------------------
2025-11-16 15:59:18,360 [INFO] Validation - Step 1000


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 16:01:14,079 [INFO] Validation results: Loss=2.0583, BLEU=42.73, WER=0.7440
2025-11-16 16:01:14,080 [INFO] Successful batches: 98/98; Failed: 0


The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 16:03:47,906 [INFO] Epoch 5 complete - Avg Loss: 2.5040
2025-11-16 16:03:47,906 [INFO] ----------------------------------------------------------------------
2025-11-16 16:03:47,906 [INFO] Validation - Step 1050


Validating:   0%|          | 0/98 [00:00<?, ?it/s]

The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on the input modality. If you want to generate speech, use the `generate` method.
The `use_cache` argument is changed to `False` since `labels` is provided.
This calls the same method `forward` as `SeamlessM4Tv2ForTextToText` and `SeamlessM4Tv2ForSpeechToText`depending on

2025-11-16 16:05:02,546 [INFO] Validation results: Loss=2.0483, BLEU=42.73, WER=0.7589
2025-11-16 16:05:02,546 [INFO] Successful batches: 98/98; Failed: 0
2025-11-16 16:05:03,684 [INFO] Saved checkpoint for loss at baseline_ablation\checkpoints\best_loss
2025-11-16 16:05:03,709 [INFO] TRAINING COMPLETE
2025-11-16 16:05:03,711 [INFO] Best Loss: 2.0483 | Best BLEU: 42.73 | Best WER: 0.7440


: 