In [None]:
from datasets import load_dataset

ds = load_dataset("shenasa/English-Persian-Parallel-Dataset")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

dataset.tsv:   0%|          | 0.00/872M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3960172 [00:00<?, ? examples/s]

In [None]:
col_en, col_fa = ds["train"].column_names
ds = ds.rename_column(col_en, "en")
ds = ds.rename_column(col_fa, "fa")

print(ds["train"].column_names)


['en', 'fa']


In [None]:
from datasets import DatasetDict

# Shuffle once with a fixed seed for reproducibility
ds_shuffled = ds["train"].shuffle(seed=42)

# Select only what we need: 50k total
small_ds = ds_shuffled.select(range(50000))

print("Subset size:", len(small_ds))


Subset size: 50000


In [None]:
import re

def is_good_pair(en, fa):
    en = en.strip()
    fa = fa.strip()

    # 1. empty or identical
    if not en or not fa:
        return False
    if en == fa:
        return False

    # 2. URLs, emails
    if re.search(r"http|www\.|\.com|\.org|\.net", en.lower()):
        return False
    if re.search(r"http|www\.|\.com|\.org|\.net", fa.lower()):
        return False

    # 3. too many numbers (dates, IDs, timestamps)
    if len(re.findall(r"\d", en)) > 6:
        return False
    if len(re.findall(r"\d", fa)) > 6:
        return False

    # 4. token length
    en_len = len(en.split())
    fa_len = len(fa.split())

    if en_len < 4 or fa_len < 4:
        return False
    if en_len > 50 or fa_len > 50:
        return False

    # 5. language sanity
    # Persian must contain Persian characters
    if not re.search(r"[\u0600-\u06FF]", fa):
        return False

    # English should contain Latin letters
    if not re.search(r"[a-zA-Z]", en):
        return False

    return True


In [None]:
print("Before cleaning:", len(small_ds))

cleaned_ds = small_ds.filter(
    lambda x: is_good_pair(x["en"], x["fa"])
)

print("After cleaning:", len(cleaned_ds))
print("Removed:", len(small_ds) - len(cleaned_ds))
print("Remaining %:", round(len(cleaned_ds) / len(small_ds) * 100, 2))


Before cleaning: 50000


Filter:   0%|          | 0/50000 [00:00<?, ? examples/s]

After cleaning: 35893
Removed: 14107
Remaining %: 71.79


In [None]:
def is_better_pair(en, fa):
    # reuse previous filter
    if not is_good_pair(en, fa):
        return False

    # ratio of letters to total length
    en_letters = len(re.findall(r"[a-zA-Z]", en))
    fa_letters = len(re.findall(r"[\u0600-\u06FF]", fa))

    if en_letters / max(len(en), 1) < 0.4:
        return False
    if fa_letters / max(len(fa), 1) < 0.4:
        return False

    return True


print("Before extra cleaning:", len(cleaned_ds))

cleaned_ds_v2 = cleaned_ds.filter(
    lambda x: is_better_pair(x["en"], x["fa"])
)

print("After extra cleaning:", len(cleaned_ds_v2))
print("Removed:", len(cleaned_ds) - len(cleaned_ds_v2))
print("Remaining %:", round(len(cleaned_ds_v2) / len(cleaned_ds) * 100, 2))


Before extra cleaning: 35893


Filter:   0%|          | 0/35893 [00:00<?, ? examples/s]

After extra cleaning: 33991
Removed: 1902
Remaining %: 94.7


In [None]:
# First split: test set (2k)
train_val_test = cleaned_ds_v2.train_test_split(test_size=2000, seed=42)

temp_train_val = train_val_test["train"]
test_set = train_val_test["test"]

# Second split: train / validation
train_val = temp_train_val.train_test_split(
    test_size=2000,
    seed=42
)

train_set = train_val["train"]
validation_set = train_val["test"]

print("Train size:", len(train_set))
print("Validation size:", len(validation_set))
print("Test size:", len(test_set))


Train size: 29991
Validation size: 2000
Test size: 2000


In [None]:
import os

os.makedirs("spm_data", exist_ok=True)

def write_parallel(dataset, src_path, tgt_path):
    with open(src_path, "w", encoding="utf-8") as fsrc, \
         open(tgt_path, "w", encoding="utf-8") as ftgt:
        for ex in dataset:
            fsrc.write(ex["en"].strip() + "\n")
            ftgt.write(ex["fa"].strip() + "\n")

write_parallel(train_set, "spm_data/train.en", "spm_data/train.fa")
write_parallel(validation_set, "spm_data/valid.en", "spm_data/valid.fa")
write_parallel(test_set, "spm_data/test.en", "spm_data/test.fa")

print("Raw text files written.")


Raw text files written.


In [None]:
!pip install sentencepiece




In [None]:
import sentencepiece as spm

spm.SentencePieceTrainer.train(
    input="spm_data/train.en,spm_data/train.fa",
    model_prefix="spm_en_fa",
    vocab_size=8000,
    character_coverage=1.0,
    model_type="bpe"
)


In [None]:
sp = spm.SentencePieceProcessor()
sp.load("spm_en_fa.model")

def encode_file(in_path, out_path):
    with open(in_path, encoding="utf-8") as fin, \
         open(out_path, "w", encoding="utf-8") as fout:
        for line in fin:
            pieces = sp.encode(line.strip(), out_type=str)
            fout.write(" ".join(pieces) + "\n")

encode_file("spm_data/train.en", "spm_data/train.bpe.en")
encode_file("spm_data/train.fa", "spm_data/train.bpe.fa")
encode_file("spm_data/valid.en", "spm_data/valid.bpe.en")
encode_file("spm_data/valid.fa", "spm_data/valid.bpe.fa")
encode_file("spm_data/test.en",  "spm_data/test.bpe.en")
encode_file("spm_data/test.fa",  "spm_data/test.bpe.fa")

print("BPE encoding done.")


BPE encoding done.


In [None]:
!head -n 3 spm_data/train.en
!head -n 3 spm_data/train.bpe.en


Then he gave them an exact answer to their question mentioned in (21:5) about when and how Jerusalem would be destroyed.
Had Wautier led the revolt, it might well have succeeded;
Look, I know what you think about me,you know, and about what happened,
▁Then ▁he ▁gave ▁them ▁an ▁exact ▁answer ▁to ▁their ▁question ▁mention ed ▁in ▁( 21 : 5 ) ▁about ▁when ▁and ▁how ▁Jerusalem ▁would ▁be ▁destroy ed .
▁H ad ▁W aut ier ▁l ed ▁the ▁rev ol t , ▁it ▁might ▁well ▁have ▁succeed ed ;
▁L ook , ▁I ▁know ▁what ▁you ▁think ▁about ▁me , y ou ▁know , ▁and ▁about ▁what ▁happened ,


In [None]:
import sentencepiece as spm

sp = spm.SentencePieceProcessor()
sp.load("spm_en_fa.model")

PAD_ID = 0
BOS_ID = sp.bos_id()
EOS_ID = sp.eos_id()

print("Vocab size:", sp.get_piece_size())
print("BOS:", BOS_ID, "EOS:", EOS_ID)


Vocab size: 8000
BOS: 1 EOS: 2


In [None]:
def encode_ids(line):
    ids = sp.encode(line.strip(), out_type=int)
    return [BOS_ID] + ids + [EOS_ID]


In [None]:
def load_parallel(src_path, tgt_path, max_len=80):
    data = []
    with open(src_path, encoding="utf-8") as fsrc, \
         open(tgt_path, encoding="utf-8") as ftgt:
        for s, t in zip(fsrc, ftgt):
            src_ids = encode_ids(s)
            tgt_ids = encode_ids(t)
            if len(src_ids) <= max_len and len(tgt_ids) <= max_len:
                data.append((src_ids, tgt_ids))
    return data

train_data = load_parallel(
    "spm_data/train.bpe.en",
    "spm_data/train.bpe.fa"
)

valid_data = load_parallel(
    "spm_data/valid.bpe.en",
    "spm_data/valid.bpe.fa"
)

test_data = load_parallel(
    "spm_data/test.bpe.en",
    "spm_data/test.bpe.fa"
)

print("Train:", len(train_data))
print("Valid:", len(valid_data))
print("Test:", len(test_data))


Train: 29526
Valid: 1976
Test: 1973


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class MTDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def collate_fn(batch):
    src, tgt = zip(*batch)

    src = torch.nn.utils.rnn.pad_sequence(
        [torch.tensor(x) for x in src],
        padding_value=PAD_ID
    )

    tgt = torch.nn.utils.rnn.pad_sequence(
        [torch.tensor(x) for x in tgt],
        padding_value=PAD_ID
    )

    return src, tgt


In [None]:
from torch.utils.data import DataLoader

BATCH_SIZE = 64

train_loader = DataLoader(
    MTDataset(train_data),
    batch_size=64,
    shuffle=True,
    collate_fn=collate_fn
)

validation_loader = DataLoader(
    valid_data,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collate_fn
)
test_loader = DataLoader(
    test_data,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collate_fn
)
print("train_loader ready")


train_loader ready


In [None]:
import torch
import torch.nn as nn

class Encoder(nn.Module):
    def __init__(
        self,
        vocab_size,
        embed_size=256,
        hidden_size=256,
        num_layers=1,
        dropout=0.1
    ):
        super().__init__()

        self.embedding = nn.Embedding(
            vocab_size,
            embed_size,
            padding_idx=PAD_ID
        )

        self.rnn = nn.GRU(
            embed_size,
            hidden_size,
            num_layers=num_layers,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0
        )

    def forward(self, src):
        """
        src: (seq_len, batch)
        """
        embedded = self.embedding(src)      # (seq_len, batch, embed)
        outputs, hidden = self.rnn(embedded)

        # outputs: (seq_len, batch, hidden*2)
        # hidden: (num_layers*2, batch, hidden)

        return outputs, hidden


In [None]:
VOCAB_SIZE = sp.get_piece_size()

encoder = Encoder(
    vocab_size=VOCAB_SIZE,
    embed_size=256,      # 256 → 512
    hidden_size=256,     # 256 → 512
    num_layers=1,
).cuda()

print("Encoder initialized")


Encoder initialized


In [None]:
src_batch, tgt_batch = next(iter(train_loader))

src_batch = src_batch.cuda()

enc_outputs, enc_hidden = encoder(src_batch)

print("Encoder outputs:", enc_outputs.shape)
print("Encoder hidden:", enc_hidden.shape)


Encoder outputs: torch.Size([70, 64, 512])
Encoder hidden: torch.Size([2, 64, 256])


In [None]:
class Attention(nn.Module):
    def __init__(self, enc_hidden_dim, dec_hidden_dim):
        super().__init__()

        self.attn = nn.Linear(enc_hidden_dim * 2 + dec_hidden_dim, dec_hidden_dim)
        self.v = nn.Linear(dec_hidden_dim, 1, bias=False)

    def forward(self, decoder_hidden, encoder_outputs):
        # decoder_hidden: [batch, dec_hidden]
        # encoder_outputs: [src_len, batch, enc_hidden*2]

        src_len = encoder_outputs.size(0)

        decoder_hidden = decoder_hidden.unsqueeze(1).expand(-1, src_len, -1)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)

        energy = torch.tanh(
            self.attn(torch.cat((decoder_hidden, encoder_outputs), dim=2))
        )

        attention = self.v(energy).squeeze(2)

        return torch.softmax(attention, dim=1)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cuda


In [None]:
encoder = encoder.to(device)

attention = Attention(enc_hidden_dim=256, dec_hidden_dim=256)
attention = attention.to(device)

src_batch, tgt_batch = next(iter(train_loader))

src_batch = src_batch.to(device)
tgt_batch = tgt_batch.to(device)


In [None]:


# take last layer of encoder hidden
decoder_hidden = enc_hidden[-1]

attn_weights = attention(decoder_hidden, enc_outputs)

print("Attention weights shape:", attn_weights.shape)


Attention weights shape: torch.Size([64, 70])


In [None]:
print("Attention module ready on", next(attention.parameters()).device)

Attention module ready on cuda:0


In [None]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, enc_hidden_dim, dec_hidden_dim, attention):
        super().__init__()

        self.output_dim = output_dim
        self.attention = attention

        self.embedding = nn.Embedding(output_dim, emb_dim)

        self.rnn = nn.GRU(
            emb_dim + enc_hidden_dim * 2,
            dec_hidden_dim
        )

        self.fc_out = nn.Linear(
            emb_dim + enc_hidden_dim * 2 + dec_hidden_dim,
            output_dim
        )

    def forward(self, input, hidden, encoder_outputs):
        # input: [batch]
        input = input.unsqueeze(0)

        embedded = self.embedding(input)
        # [1, batch, emb_dim]

        # ✅ FIX: last layer only
        attn_weights = self.attention(hidden[-1], encoder_outputs)
        # [batch, src_len]

        attn_weights = attn_weights.unsqueeze(1)
        # [batch, 1, src_len]

        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        # [batch, src_len, enc_hidden*2]

        context = torch.bmm(attn_weights, encoder_outputs)
        # [batch, 1, enc_hidden*2]

        context = context.permute(1, 0, 2)
        # [1, batch, enc_hidden*2]

        rnn_input = torch.cat((embedded, context), dim=2)

        output, hidden = self.rnn(rnn_input, hidden)

        output = output.squeeze(0)
        context = context.squeeze(0)
        embedded = embedded.squeeze(0)

        prediction = self.fc_out(
            torch.cat((output, context, embedded), dim=1)
        )

        return prediction, hidden, attn_weights.squeeze(1)


In [None]:
decoder = Decoder(
    output_dim=VOCAB_SIZE,
    emb_dim=256,
    enc_hidden_dim=256,
    dec_hidden_dim=256,
    attention=attention
).to(device)

print("Decoder ready")


Decoder ready


In [None]:
# take first target token (<bos>)

decoder_hidden = enc_hidden[-1].unsqueeze(0)
input_token = tgt_batch[0]        # [batch]

prediction, dec_hidden, attn = decoder(
    input_token,
    decoder_hidden,
    enc_outputs
)

print("Prediction shape:", prediction.shape)
print("Decoder hidden shape:", dec_hidden.shape)
print("Attention shape:", attn.shape)


Prediction shape: torch.Size([64, 8000])
Decoder hidden shape: torch.Size([1, 64, 256])
Attention shape: torch.Size([64, 70])


In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, tgt, teacher_forcing_ratio=0.5):
        # src: [src_len, batch]
        # tgt: [tgt_len, batch]

        batch_size = tgt.shape[1]
        tgt_len = tgt.shape[0]
        vocab_size = self.decoder.output_dim

        outputs = torch.zeros(tgt_len, batch_size, vocab_size).to(self.device)

        enc_outputs, enc_hidden = self.encoder(src)

        hidden = enc_hidden[-1].unsqueeze(0)

        input = tgt[0]  # <bos>

        for t in range(1, tgt_len):
            output, hidden, _ = self.decoder(input, hidden, enc_outputs)
            outputs[t] = output

            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1)

            input = tgt[t] if teacher_force else top1

        return outputs


In [None]:
model = Seq2Seq(encoder, decoder, device).to(device)
print("Seq2Seq model ready")


Seq2Seq model ready


In [None]:
criterion = nn.CrossEntropyLoss(ignore_index=PAD_ID)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [None]:
model.train()

src, tgt = next(iter(train_loader))
src = src.to(device)
tgt = tgt.to(device)

optimizer.zero_grad()

output = model(src, tgt)

# ignore first token (<bos>)
output_dim = output.shape[-1]

loss = criterion(
    output[1:].reshape(-1, output_dim),
    tgt[1:].reshape(-1)
)

loss.backward()
optimizer.step()

print("Single-batch loss:", loss.item())


Single-batch loss: 9.038698196411133


In [None]:
def train_epoch(model, loader, optimizer, criterion, clip=1.0):
    model.train()
    epoch_loss = 0

    for src, tgt in loader:
        src = src.to(device)
        tgt = tgt.to(device)

        optimizer.zero_grad()
        output = model(src, tgt)

        output_dim = output.shape[-1]

        loss = criterion(
            output[1:].reshape(-1, output_dim),
            tgt[1:].reshape(-1)
        )

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()

        epoch_loss += loss.item()

    return epoch_loss / len(loader)


In [None]:
def evaluate(model, loader, criterion):
    model.eval()
    epoch_loss = 0

    with torch.no_grad():
        for src, tgt in loader:
            src = src.to(device)
            tgt = tgt.to(device)

            output = model(src, tgt, teacher_forcing_ratio=0)

            output_dim = output.shape[-1]

            loss = criterion(
                output[1:].reshape(-1, output_dim),
                tgt[1:].reshape(-1)
            )

            epoch_loss += loss.item()

    return epoch_loss / len(loader)


In [None]:
best_val_loss = float("inf")
patience = 3
counter = 0
MAX_EPOCHS = 10   # ← YES, 10 is totally fine

for epoch in range(MAX_EPOCHS):
    train_loss = train_epoch(
        model, train_loader, optimizer, criterion
    )

    val_loss = evaluate(
        model, validation_loader, criterion
    )

    print(f"Epoch {epoch+1}")
    print(f"  Train Loss: {train_loss:.3f}")
    print(f"  Val Loss:   {val_loss:.3f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0

        torch.save(model.state_dict(), "best_model.pt")
        print("  ✅ Best model saved")

    else:
        counter += 1
        print(f"  ⏸ No improvement ({counter}/{patience})")

    if counter >= patience:
        print("🛑 Early stopping")
        break


Epoch 1
  Train Loss: 5.717
  Val Loss:   5.702
  ✅ Best model saved
Epoch 2
  Train Loss: 4.721
  Val Loss:   5.394
  ✅ Best model saved
Epoch 3
  Train Loss: 4.161
  Val Loss:   5.248
  ✅ Best model saved
Epoch 4
  Train Loss: 3.796
  Val Loss:   5.171
  ✅ Best model saved
Epoch 5
  Train Loss: 3.482
  Val Loss:   5.194
  ⏸ No improvement (1/3)
Epoch 6
  Train Loss: 3.241
  Val Loss:   5.219
  ⏸ No improvement (2/3)
Epoch 7
  Train Loss: 3.047
  Val Loss:   5.261
  ⏸ No improvement (3/3)
🛑 Early stopping


In [None]:
model.load_state_dict(torch.load("best_model.pt"))
model.eval()


Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(8000, 256, padding_idx=0)
    (rnn): GRU(256, 256, bidirectional=True)
  )
  (decoder): Decoder(
    (attention): Attention(
      (attn): Linear(in_features=768, out_features=256, bias=True)
      (v): Linear(in_features=256, out_features=1, bias=False)
    )
    (embedding): Embedding(8000, 256)
    (rnn): GRU(768, 256)
    (fc_out): Linear(in_features=1024, out_features=8000, bias=True)
  )
)

In [None]:
def decode_ids_to_text(ids):
    # remove BOS / EOS / PAD
    ids = [i for i in ids if i not in (PAD_ID, BOS_ID, EOS_ID)]
    return sp.decode(ids)


In [None]:
def translate_sentence(sentence, model, max_len=100):
    model.eval()

    src_ids = sp.encode(sentence, out_type=int)
    src_ids = [BOS_ID] + src_ids + [EOS_ID]

    src_tensor = torch.tensor(src_ids).unsqueeze(1).to(device)  # [src_len, 1]

    with torch.no_grad():
        enc_outputs, enc_hidden = model.encoder(src_tensor)

        # Initialize decoder hidden state
        hidden = enc_hidden[-1].unsqueeze(0)  # [1, 1, 256]

        trg_ids = [BOS_ID]

        for _ in range(max_len):
            trg_token = torch.tensor([trg_ids[-1]]).to(device)  # [1]

            output, hidden, _ = model.decoder(
                trg_token, hidden, enc_outputs
            )

            next_id = output.argmax(1).item()
            trg_ids.append(next_id)

            if next_id == EOS_ID:
                break

    trg_ids = [i for i in trg_ids if i not in (PAD_ID, BOS_ID, EOS_ID)]
    return sp.decode(trg_ids)

In [None]:
test_en = [ex["en"] for ex in test_set]
test_fa = [ex["fa"] for ex in test_set]

hypotheses = []
references = []

for en, fa in zip(test_en, test_fa):
    pred = translate_sentence(en, model)
    hypotheses.append(pred)
    references.append(fa)


In [None]:
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/104.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Downloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Installing collected packages: portalocker, colorama, sacre

In [None]:
from sacrebleu import corpus_bleu

bleu = corpus_bleu(hypotheses, [references])
print("RNN+Attn EN→FA BLEU:", bleu.score)




RNN+Attn EN→FA BLEU: 8.124886347175062


In [None]:
from sacrebleu.metrics import CHRF

chrf = CHRF(word_order=2)
score = chrf.corpus_score(hypotheses, [references])
print("RNN+Attn EN→FA chrF++:", score.score)


RNN+Attn EN→FA chrF++: 25.362831001680785


In [None]:
!pip install -q bert-score


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from bert_score import score

P, R, F1 = score(
    hypotheses,
    references,
    lang="fa",
    rescale_with_baseline=True
)

print("RNN+Attn EN→FA BERTScore F1:", F1.mean().item())


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

RNN+Attn EN→FA BERTScore F1: 0.7007076144218445




In [None]:
import random

# Show random samples
num_samples = 10
random_indices = random.sample(range(len(test_en)), num_samples)

print("=" * 80)
print("SAMPLE TRANSLATIONS")
print("=" * 80)

for i, idx in enumerate(random_indices, 1):
    print(f"\n📌 Example {i}:")
    print(f"English:    {test_en[idx]}")
    print(f"Reference:  {test_fa[idx]}")
    print(f"Predicted:  {hypotheses[idx]}")
    print("-" * 80)

SAMPLE TRANSLATIONS

📌 Example 1:
English:    Well when Dantes was arrested Monsieur Morrel hastened to obtain the particulars and they were very sad
Reference:  خوب وقتی دانتس دستگیر شد ، موسیو مورل برای بدست آوردن جزئیات عجله کرد و آن ها بسیار ناراحت شدند
Predicted:  وقتی که D ها را M M ید ید ید ین و سته را برای وب ابی و به من اران و ند و آن ها را می کردند .
--------------------------------------------------------------------------------

📌 Example 2:
English:    VLADIMIR PUTIN: Let's talk later.
Reference:  ولادیمیر پوتین : بیایید بعدا صحبت کنیم .
Predicted:  استف وله ها رال های کنید .
--------------------------------------------------------------------------------

📌 Example 3:
English:    soyou were an artist .
Reference:  سویا تو هنرمند بودی
Predicted:  به به یک هنر به بود .
--------------------------------------------------------------------------------

📌 Example 4:
English:    All the people said to Samuel, Pray for your servants to Yahweh your God, that we not die; for we hav