In [1]:
# %pip install jiwer

Collecting jiwer
  Downloading jiwer-4.0.0-py3-none-any.whl.metadata (3.3 kB)
Collecting rapidfuzz>=3.9.7 (from jiwer)
  Downloading rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading jiwer-4.0.0-py3-none-any.whl (23 kB)
Downloading rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, jiwer
Successfully installed jiwer-4.0.0 rapidfuzz-3.14.3


In [6]:
%pip install --upgrade torchcodec



In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import librosa
from datasets import load_dataset, Audio
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from jiwer import wer, cer
import re
from tqdm.auto import tqdm
import warnings

# Ignore minor warnings for cleaner output
warnings.filterwarnings("ignore")

import sys
import os

# Redirect stderr to devnull to hide the C++ multiprocessing errors
# WARNING: This hides ALL errors, so remove it if your code crashes silently.
sys.stderr = open(os.devnull, 'w')

# ... Run your training loop here ...

# Restore stderr after the loop so you can see future errors
# sys.stderr = sys.__stderr__

# --- 1. CONFIGURATION ---
class Config:
    # Audio Params
    SAMPLE_RATE = 16000
    N_MFCC = 13
    N_FFT = 400
    HOP_LENGTH = 160
    # 39 Features = 13 MFCC + 13 Delta + 13 Delta-Delta
    N_FEATURES = 39

    # Dataset Sizes (Small slices to prevent Colab Crash)
    TRAIN_SIZE = 5000
    TEST_SIZE = 500

    # Model Params
    HIDDEN_SIZE = 256
    NUM_LAYERS = 3
    DROPOUT = 0.3

    # Training Params
    BATCH_SIZE = 32
    LR = 1e-4
    EPOCHS = 30
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

    # Special Tokens
    BLANK_TOKEN = "[BLANK]"

config = Config()
print(f"Running on: {config.DEVICE}")

# --- 2. DATA LOADING (OPTIMIZED FOR COLAB) ---
def load_and_prepare_data():
    print("Loading dataset slices...")

    # TRICK: Instead of loading "train" (all 300GB), we load "train[:5500]"
    # This tells Hugging Face to only process the first few rows.
    # We load slightly more to account for filtering.
    train_split_str = f"train[:{config.TRAIN_SIZE + 500}]"
    test_split_str = f"test[:{config.TEST_SIZE + 100}]"

    # 1. Load Slices (This is fast and low-RAM)
    train_ds = load_dataset("hezarai/common-voice-13-fa", split=train_split_str, trust_remote_code=True)
    test_ds = load_dataset("hezarai/common-voice-13-fa", split=test_split_str, trust_remote_code=True)

    print(f"Initial Load: Train={len(train_ds)}, Test={len(test_ds)}")

    # 2. Drop unnecessary columns (Keep only audio/sentence)
    cols_to_keep = ["audio", "sentence"]
    train_ds = train_ds.select_columns(cols_to_keep)
    test_ds = test_ds.select_columns(cols_to_keep)

    # 3. Filter FIRST (Before casting audio)
    # This cleans the text without decoding the heavy MP3 files yet.
    print("Filtering empty sentences...")
    def is_valid(x):
        return x["sentence"] is not None and len(x["sentence"]) > 2

    train_ds = train_ds.filter(is_valid)
    test_ds = test_ds.filter(is_valid)

    # 4. Truncate to exact requested size
    train_ds = train_ds.select(range(min(len(train_ds), config.TRAIN_SIZE)))
    test_ds = test_ds.select(range(min(len(test_ds), config.TEST_SIZE)))

    # 5. Cast Audio SECOND (Now safe)
    # This enables on-the-fly decoding only for our small subset
    print("Casting audio column...")
    train_ds = train_ds.cast_column("audio", Audio(sampling_rate=config.SAMPLE_RATE))
    test_ds = test_ds.cast_column("audio", Audio(sampling_rate=config.SAMPLE_RATE))

    return train_ds, test_ds

train_ds, test_ds = load_and_prepare_data()
print(f"Final Train Size: {len(train_ds)} | Final Test Size: {len(test_ds)}")

# --- 3. VOCABULARY & NORMALIZATION ---

def normalize_text(text):
    text = str(text).lower().strip()
    # Normalize Persian characters
    text = text.replace('ك', 'ک').replace('ي', 'ی').replace('ئ', 'ی').replace('ى', 'ی')
    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def build_vocab(dataset):
    print("Building vocabulary...")
    vocab = set()
    # We iterate over the dataset to find all unique chars
    for item in tqdm(dataset, desc="Scanning Vocab"):
        vocab.update(normalize_text(item["sentence"]))

    sorted_vocab = sorted(list(vocab))

    # 0 is reserved for CTC Blank
    char2id = {c: i + 1 for i, c in enumerate(sorted_vocab)}
    char2id[config.BLANK_TOKEN] = 0

    id2char = {i: c for c, i in char2id.items()}
    return char2id, id2char

char2id, id2char = build_vocab(train_ds)
VOCAB_SIZE = len(char2id)
print(f"Vocabulary Size: {VOCAB_SIZE}")

# --- 4. UPDATE THIS FUNCTION ---
def process_batch(batch):
    # ... (Keep existing setup code) ...
    batch_features = []
    batch_labels = []
    batch_input_lens = []
    batch_label_lens = []

    for audio_data, text in zip(batch["audio"], batch["sentence"]):
        try:
            if audio_data is None: continue
            audio_array = audio_data["array"]
            if isinstance(audio_array, list): audio_array = np.array(audio_array)
            if len(audio_array) < 400: continue

            # Extract MFCC
            mfcc = librosa.feature.mfcc(y=audio_array, sr=config.SAMPLE_RATE, n_mfcc=config.N_MFCC, n_fft=config.N_FFT, hop_length=config.HOP_LENGTH)
            delta = librosa.feature.delta(mfcc)
            delta2 = librosa.feature.delta(mfcc, order=2)
            feats = np.vstack([mfcc, delta, delta2]).T.astype(np.float32)

            # --- PERMANENT INSTANCE NORM FIX ---
            # Normalize each file individually to Mean=0, Std=1
            # This is critical for convergence
            mean = np.mean(feats, axis=0)
            std = np.std(feats, axis=0)
            feats = (feats - mean) / (std + 1e-9)
            # -----------------------------------

            norm_text = normalize_text(text)
            label_ids = [char2id[c] for c in norm_text if c in char2id]

            if len(label_ids) > 0:
                batch_features.append(feats)
                batch_labels.append(label_ids)
                batch_input_lens.append(feats.shape[0])
                batch_label_lens.append(len(label_ids))

        except Exception:
            continue

    return {
        "input_features": batch_features,
        "labels": batch_labels,
        "input_lengths": batch_input_lens,
        "label_lengths": batch_label_lens
    }

# Re-apply transform
train_ds.set_transform(process_batch)
test_ds.set_transform(process_batch)

# --- 5. DATA COLLATOR ---

def data_collator(batch):
    # Filter out placeholder values (invalid samples that returned empty data)
    # An item is considered invalid if its label_lengths is 0
    batch = [x for x in batch if x["label_lengths"] > 0]

    # If the batch is empty after filtering, the rest of the function
    # will naturally produce empty tensors/lists, which DataLoader can handle.

    inputs = []
    labels = []
    in_lens = []
    label_lens = []

    for item in batch:
        inputs.append(torch.tensor(item["input_features"]))
        labels.append(torch.tensor(item["labels"]))
        in_lens.append(item["input_lengths"])
        label_lens.append(item["label_lengths"])

    if not inputs: # Handle case where all samples were filtered out
        return (
            torch.empty(0, config.N_FEATURES),  # inputs_padded
            torch.empty(0, dtype=torch.long),   # labels_padded
            torch.empty(0, dtype=torch.long),   # in_lens
            torch.empty(0, dtype=torch.long)    # label_lens
        )

    # Sort for PackedSequence (Descedning order)
    zipped = zip(inputs, labels, in_lens, label_lens)
    sorted_data = sorted(zipped, key=lambda x: x[2], reverse=True)
    inputs, labels, in_lens, label_lens = zip(*sorted_data)

    inputs = list(inputs)
    labels = list(labels)
    in_lens = torch.tensor(in_lens, dtype=torch.long)
    label_lens = torch.tensor(label_lens, dtype=torch.long)

    # Pad
    inputs_padded = pad_sequence(inputs, batch_first=True)
    labels_padded = pad_sequence(labels, batch_first=True, padding_value=0)

    return inputs_padded, labels_padded, in_lens, label_lens

# Create Loaders
train_loader = DataLoader(train_ds, batch_size=config.BATCH_SIZE, collate_fn=data_collator, shuffle=True, num_workers=2)
test_loader = DataLoader(test_ds, batch_size=config.BATCH_SIZE, collate_fn=data_collator, shuffle=False, num_workers=2)

# --- 6. FINAL FIXED MODEL (2x Subsampling + LogSoftmax) ---

class CNN_BiLSTM_ASR(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, vocab_size):
        super().__init__()

        # CNN: Reduces Time by 2x
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )

        # Calc Input Size (Same as before)
        feat_dim = input_dim
        feat_dim = (feat_dim + 2*1 - 3)//2 + 1
        feat_dim = (feat_dim + 2*1 - 3)//1 + 1
        self.lstm_input_size = 32 * feat_dim

        self.lstm = nn.LSTM(
            input_size=self.lstm_input_size,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
            dropout=config.DROPOUT
        )

        self.classifier = nn.Linear(hidden_dim * 2, vocab_size)

    def forward(self, x, input_lengths):
        x = x.unsqueeze(1)
        x = self.cnn(x)

        B, C, T_new, F_new = x.shape
        x = x.permute(0, 2, 1, 3).reshape(B, T_new, C * F_new)

        new_input_lengths = torch.div(input_lengths, 2, rounding_mode='floor')

        lstm_out, _ = self.lstm(x)
        logits = self.classifier(lstm_out)

        # --- CRITICAL FIX ---
        # Convert Logits to Log-Probabilities for CTC Loss
        log_probs = F.log_softmax(logits, dim=2)

        return log_probs, new_input_lengths

print("Model Fixed: Added LogSoftmax.")

# --- DEBUGGING: OVERFIT (2x Subsampling + Instance Norm) ---

print("--- STARTING 2x SUBSAMPLING TEST ---")

# 1. Grab Batch
batch = next(iter(train_loader))
inputs, labels, in_lens, label_lens = [x.to(config.DEVICE) for x in batch]

# --- INSTANCE NORM (Keep this!) ---
mean = inputs.mean(dim=1, keepdim=True)
std = inputs.std(dim=1, keepdim=True)
inputs = (inputs - mean) / (std + 1e-5)
# ----------------------------------

# 2. Re-init Model (2x Version)
model = CNN_BiLSTM_ASR(config.N_FEATURES, config.HIDDEN_SIZE, config.NUM_LAYERS, VOCAB_SIZE)
model.to(config.DEVICE)
model.train()

# 3. Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CTCLoss(blank=0, zero_infinity=False)

# 4. Loop
pbar = tqdm(range(2001), desc="Rapid Training")

for i in pbar:
    optimizer.zero_grad()

    # Forward
    logits, new_in_lens = model(inputs, in_lens)

    # Loss
    logits_permuted = logits.permute(1, 0, 2)
    loss = criterion(logits_permuted, labels, new_in_lens, label_lens)

    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    optimizer.step()

    pbar.set_postfix({"Loss": f"{loss.item():.4f}"})

    if i % 20 == 0:
        with torch.no_grad():
            pred_text = decode_prediction(logits[0], new_in_lens[0])
            ref_ids = labels[0][:label_lens[0]].cpu().tolist()
            ref_text = "".join([id2char[idx] for idx in ref_ids])

            print(f"\nStep {i} | Loss: {loss.item():.4f}")
            print(f"Pred: {pred_text}")

            if pred_text == ref_text:
                print(f"Ref:  {ref_text}")
                print("\n>>> VICTORY! 2x Subsampling fixed the bottleneck.")
                break
            print("-" * 30)

# --- FIX: UPDATED EVALUATION FUNCTION (TUPLE AWARE) ---

def decode_prediction(logits, input_len):
    """
    Greedy Decoder for CTC.
    logits: (Time, Vocab) - Single sample
    input_len: int - Scalar
    """
    # Argmax over the vocabulary dimension
    tokens = torch.argmax(logits, dim=-1) # Shape: (Time)

    decoded_preds = []
    # Collapse repeats and blanks
    for i in range(input_len):
        token = tokens[i].item()
        if token != 0: # 0 is blank
            if i == 0 or token != tokens[i-1].item():
                decoded_preds.append(id2char[token])
    return "".join(decoded_preds)

def evaluate(model, loader):
    model.eval()
    preds = []
    refs = []

    with torch.no_grad():
        for batch in loader:
            if batch is None or batch[0].shape[0] == 0: continue

            inputs, labels, in_lens, label_lens = [x.to(config.DEVICE) for x in batch]

            # --- FIX: Unpack the tuple correctly ---
            log_probs, new_lens = model(inputs, in_lens)

            for i in range(len(inputs)):
                # Use the 'new_lens' from the model (CNN adjusted length)
                pred_text = decode_prediction(log_probs[i], new_lens[i])

                # Get Reference
                ref_ids = labels[i][:label_lens[i]].cpu().tolist()
                ref_text = "".join([id2char[idx] for idx in ref_ids])

                preds.append(pred_text)
                refs.append(ref_text)

    if len(preds) == 0:
        return 1.0, 1.0, "None", "None"

    # Calculate Metrics
    w = wer(refs, preds)
    c = cer(refs, preds)
    return w, c, preds[0], refs[0]

print("Evaluation function updated. You can now run the training loop.")


# --- 7. FINAL TRAINING EXECUTION ---

model = CNN_BiLSTM_ASR(config.N_FEATURES, config.HIDDEN_SIZE, config.NUM_LAYERS, VOCAB_SIZE)
model.to(config.DEVICE)

# Use a safe Learning Rate
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4) # 0.0003
criterion = nn.CTCLoss(blank=0, zero_infinity=False)

print(f"\n--- Starting Fixed Training ---")

for epoch in range(config.EPOCHS):
    model.train()
    total_loss = 0
    num_batches = 0

    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}")

    for batch in pbar:
        if batch is None or batch[0].shape[0] == 0: continue

        inputs, labels, in_lens, label_lens = [x.to(config.DEVICE) for x in batch]

        optimizer.zero_grad()

        # Returns log_probs now!
        log_probs, new_input_lengths = model(inputs, in_lens)

        # CTC expects (Time, Batch, Vocab)
        log_probs_permuted = log_probs.permute(1, 0, 2)

        loss = criterion(log_probs_permuted, labels, new_input_lengths, label_lens)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        total_loss += loss.item()
        num_batches += 1
        pbar.set_postfix({"Loss": f"{loss.item():.4f}"})

    avg_loss = total_loss / num_batches if num_batches > 0 else 0.0
    print(f"\nEpoch {epoch+1} Average Loss: {avg_loss:.4f}")

    # Eval
    test_wer, test_cer, sample_pred, sample_ref = evaluate(model, test_loader)
    print(f"Test WER: {test_wer:.2f}")
    print(f"Test CER: {test_cer:.2f}")
    print(f"Pred: {sample_pred}")
    print("-" * 50)

Running on: cuda
Loading dataset slices...
Initial Load: Train=5500, Test=600
Filtering empty sentences...


Filter:   0%|          | 0/5500 [00:00<?, ? examples/s]

Filter:   0%|          | 0/600 [00:00<?, ? examples/s]

Casting audio column...
Final Train Size: 5000 | Final Test Size: 500
Building vocabulary...


Scanning Vocab:   0%|          | 0/5000 [00:00<?, ?it/s]

Vocabulary Size: 47
Model Fixed: Added LogSoftmax.
--- STARTING 2x SUBSAMPLING TEST ---


Rapid Training:   0%|          | 0/2001 [00:00<?, ?it/s]


Step 0 | Loss: 23.6020
Pred: جیlیlجlیlیlیlیlیصقlجیجیlمlصlقlقآیقlآhlصقآقlآhآhآhقکآlصبیقآقآقجقجیآۀlقۀآlیمقآجقlآhآقlآlقlآقlقآتآlقصhجتlقlیآhیآمlجlمقآقآیآتlصآقlقlتlیlیlیقlیlیجیجتیlجlیج
------------------------------

Step 20 | Loss: 3.0984
Pred: 
------------------------------

Step 40 | Loss: 3.0603
Pred: 
------------------------------

Step 60 | Loss: 3.0382
Pred: 
------------------------------

Step 80 | Loss: 2.9777
Pred: 
------------------------------

Step 100 | Loss: 2.9329
Pred: 
------------------------------

Step 120 | Loss: 2.8796
Pred: ا
------------------------------

Step 140 | Loss: 2.7677
Pred: 
------------------------------

Step 160 | Loss: 2.6179
Pred: 
------------------------------

Step 180 | Loss: 2.3970
Pred: پ
------------------------------

Step 200 | Loss: 2.1259
Pred: پد
------------------------------

Step 220 | Loss: 1.6970
Pred: پاااا د
------------------------------

Step 240 | Loss: 1.2027
Pred: پزن ر ا زنو نکشد
------------------------------

Step 260

Epoch 1:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 1 Average Loss: 4.0422
Test WER: 1.00
Test CER: 1.00
Pred: 
--------------------------------------------------


Epoch 2:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 2 Average Loss: 3.0149
Test WER: 1.00
Test CER: 1.00
Pred: 
--------------------------------------------------


Epoch 3:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 3 Average Loss: 2.9415
Test WER: 1.00
Test CER: 0.97
Pred: ا
--------------------------------------------------


Epoch 4:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 4 Average Loss: 2.5390
Test WER: 1.00
Test CER: 0.77
Pred: ای اب د ی ا کایشای سبادشاششی
--------------------------------------------------


Epoch 5:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 5 Average Loss: 1.9900
Test WER: 1.01
Test CER: 0.57
Pred: مد ب تممنابدر مزتمی ان کرای شامامنمی دسندی مب منا دین بیش راشابشی
--------------------------------------------------


Epoch 6:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 6 Average Loss: 1.6660
Test WER: 0.99
Test CER: 0.56
Pred: مخد بتراب در زتمکامنکایشمهمنمی پسندی وب ملاقدن بیسراشابشی
--------------------------------------------------


Epoch 7:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 7 Average Loss: 1.4533
Test WER: 0.98
Test CER: 0.51
Pred: مفد بت ملاب دار زتمیخامکای شمامن می سندی مب ملاقدن بیسراشا بشیم
--------------------------------------------------


Epoch 8:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 8 Average Loss: 1.3000
Test WER: 0.97
Test CER: 0.50
Pred: مخد بت ملاب دار وزدمیخان کری شمام من میپسندی وب ملاق دن بیسراشا بشیم
--------------------------------------------------


Epoch 9:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 9 Average Loss: 1.1827
Test WER: 0.97
Test CER: 0.47
Pred: مخی بت ملاب دار وزتومیخام کاگی شما ام منمیپسندی وب ملاق دین بیشراشابشیم
--------------------------------------------------


Epoch 10:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 10 Average Loss: 1.0883
Test WER: 0.96
Test CER: 0.46
Pred: مخد بت ملاب دار بزتمخام کرگی شما هم من میپسندی بب ملاق د ان بیسراشا بشیم
--------------------------------------------------


Epoch 11:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 11 Average Loss: 1.0014
Test WER: 0.96
Test CER: 0.45
Pred: مخید مت ملاب دارم و زتمیخام جاگی شما آملمیپسندی وب ملاق د ان بیسراشنا بشیم
--------------------------------------------------


Epoch 12:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 12 Average Loss: 0.9233
Test WER: 0.96
Test CER: 0.45
Pred: مخد بت مراب دارب و ازت میام کاگ شما ام من میپسندی وب ملاقدا ان بیسراشنابشیم
--------------------------------------------------


Epoch 13:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 13 Average Loss: 0.8570
Test WER: 0.96
Test CER: 0.45
Pred: مخد بتو مراب دارم و ازتمهام کاگی شما م مر میپزندی وب ملاقدا ان بیسراشنابشیم
--------------------------------------------------


Epoch 14:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 14 Average Loss: 0.7903
Test WER: 0.95
Test CER: 0.45
Pred: مخید مت منرابه دار و ازت میهام کاگی شما ام من میپسندی وب ملاقدا ان بیسراشنابشیم
--------------------------------------------------


Epoch 15:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 15 Average Loss: 0.7243
Test WER: 0.95
Test CER: 0.45
Pred: مخل متو نلاب دارا و ازت  میهامکاگی شماهم مل میپسندی وب ملاقد انبیسر راش نابشیم
--------------------------------------------------


Epoch 16:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 16 Average Loss: 0.6698
Test WER: 0.93
Test CER: 0.44
Pred: مخل متو نراب دارب و ازتمام جاگ شماهم ملمیپسندی وب ملاقد انبیسراشنابشیم
--------------------------------------------------


Epoch 17:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 17 Average Loss: 0.6145
Test WER: 0.94
Test CER: 0.44
Pred: مخد متو نداب دار و ازت میامگگی شماهمل میپسندی وب ملاق د انبیسراشناب شیم
--------------------------------------------------


Epoch 18:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 18 Average Loss: 0.5626
Test WER: 0.92
Test CER: 0.44
Pred: مخیل متونلاب دار وه ازتمیامجاگ شما امل میپسندی وب ملاقد انبیسراشنا بشیم
--------------------------------------------------


Epoch 19:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 19 Average Loss: 0.5204
Test WER: 0.92
Test CER: 0.44
Pred: مخیل متو منراق دارب و ازت میخامکاگ شما هم من میپسندی وب ملاقد انبیسراشنابشیم
--------------------------------------------------


Epoch 20:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 20 Average Loss: 0.4747
Test WER: 0.94
Test CER: 0.44
Pred: مخیل متو نراب دار و اعزد میهام کاگی شما هآمن میپسندی وب ملاقد انبیسر راشنا بشیم
--------------------------------------------------


Epoch 21:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 21 Average Loss: 0.4330
Test WER: 0.92
Test CER: 0.43
Pred: مخیل مطو نراق دارب و عزد میکامکاک شما همل میپسندی وب ملاقد انبیسراش نا بشیم
--------------------------------------------------


Epoch 22:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 22 Average Loss: 0.3993
Test WER: 0.91
Test CER: 0.43
Pred: مخل مت منرابه دارب و عزد میکامکگ شمامن میپسندی وب ملاقد ان بیسراشنا بشیم
--------------------------------------------------


Epoch 23:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 23 Average Loss: 0.3648
Test WER: 0.92
Test CER: 0.44
Pred: مخل متومنراب دارب و ازتمهامکگی شما امل میپسندی وب ملاقد انبیسراشنا بشیم
--------------------------------------------------


Epoch 24:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 24 Average Loss: 0.3329
Test WER: 0.94
Test CER: 0.44
Pred: مخیل متونعراقه دارب و عزت مکامکگ شماهمل میپذندی وب ملاقد انبیشراش نا بشیم
--------------------------------------------------


Epoch 25:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 25 Average Loss: 0.3108
Test WER: 0.92
Test CER: 0.44
Pred: مخل متونعراق دار و عزت مکامکگ شما همل میپسندی وب معلاقد ان بیسراشنابشیم
--------------------------------------------------


Epoch 26:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 26 Average Loss: 0.2806
Test WER: 0.92
Test CER: 0.43
Pred: م خل متو نعرا به دارب و عزت میهام کاگ شمها آمل میپسندی وب معلاقد ان بیسراشتنا بشیم
--------------------------------------------------


Epoch 27:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 27 Average Loss: 0.2584
Test WER: 0.93
Test CER: 0.44
Pred: مخل مطو منععراقه دارب وه اعزت مکام کاگ شماامل میپسندهی وب معلاقد انبیشراش نابشیم
--------------------------------------------------


Epoch 28:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 28 Average Loss: 0.2363
Test WER: 0.93
Test CER: 0.43
Pred: مخل متو منعرابه دار وه اعزت میکام کاکگ شماهمر میپسندهی وب معلاقد انبیسراشنا بشیم
--------------------------------------------------


Epoch 29:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 29 Average Loss: 0.2154
Test WER: 0.91
Test CER: 0.43
Pred: مخل متو مرابه دارب و عزت میکامکعگ شما امن میپسندهی وب معلاقد ان بیسراش نا بشیم
--------------------------------------------------


Epoch 30:   0%|          | 0/157 [00:00<?, ?it/s]


Epoch 30 Average Loss: 0.2006
Test WER: 0.95
Test CER: 0.43
Pred: مخل مطو معراقه دارم وه عزت میهامکاگی شما همل میپسندهی وب ملاقد ان بیسراش نو بشیم
--------------------------------------------------


In [8]:
%pip install --upgrade datasets

Collecting datasets
  Downloading datasets-4.4.1-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading datasets-4.4.1-py3-none-any.whl (511 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (47.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyarrow, datasets
  Attempting uninstall: pyarrow
    Found existing installation: pyarrow 18.1.0
    Uninstalling pyarrow-18.1.0:
      Successfully uninstalled pyarrow-18.1.0
  Attempting uninstall: datasets
    Found existing installation: datasets 4.0.0
    Uninstalling datasets-4.0.0:
      Successfully uninstalled datasets-4.0.0
Successfully installed datasets-4.4.1 pya