In [10]:

import os, sys
from pathlib import Path
import torch

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

home = Path.home()
desktop = home / "Desktop"
base_dir = desktop / "HindiToEnglishMT"
if not base_dir.exists():
    base_dir = Path.cwd().resolve().parent
print("Base dir:", base_dir)

sys.path.append(str(base_dir / "utils"))
import sentencepiece as spm
from model_utils import Seq2SeqTransformer


Using device: cuda
GPU: NVIDIA GeForce RTX 3050 Laptop GPU
Base dir: C:\Users\ashwi\OneDrive\Desktop\HindiToEnglishMT


In [11]:
# Load tokenizer and model
sp = spm.SentencePieceProcessor(model_file=str(base_dir / "models" / "vocab" / "hi_en_unigram.model"))
pad_id = sp.pad_id(); bos_id = sp.bos_id(); eos_id = sp.eos_id()
vocab_size = sp.vocab_size()

ckpt_path = base_dir / "models" / "from_scratch" / "best_model.pt"
if not ckpt_path.exists():
    raise FileNotFoundError("Best model not found. Train it in 02_train_model.ipynb first.")

ckpt = torch.load(ckpt_path, map_location=device)

# Build model with same hyperparams used during training
model = Seq2SeqTransformer(
    vocab_size=vocab_size,
    pad_id=pad_id,
    d_model=64,
    num_heads=2,
    num_encoder_layers=2,
    num_decoder_layers=2,
    d_ff=128,
    dropout=0.2
).to(device)

# Load trained weights
model.load_state_dict(ckpt)
model.eval()


Seq2SeqTransformer(
  (src_embed): Embedding(32000, 64, padding_idx=0)
  (tgt_embed): Embedding(32000, 64, padding_idx=0)
  (pos_enc): PositionalEncoding()
  (encoder): ModuleList(
    (0-1): 2 x EncoderLayer(
      (self_attn): MultiheadAttention(
        (q_proj): Linear(in_features=64, out_features=64, bias=False)
        (k_proj): Linear(in_features=64, out_features=64, bias=False)
        (v_proj): Linear(in_features=64, out_features=64, bias=False)
        (o_proj): Linear(in_features=64, out_features=64, bias=False)
        (dropout): Dropout(p=0.2, inplace=False)
      )
      (ff): FeedForward(
        (net): Sequential(
          (0): Linear(in_features=64, out_features=128, bias=True)
          (1): ReLU(inplace=True)
          (2): Dropout(p=0.2, inplace=False)
          (3): Linear(in_features=128, out_features=64, bias=True)
          (4): Dropout(p=0.2, inplace=False)
        )
      )
      (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (norm2): Lay

In [12]:

# Helper to translate a single Hindi string
def translate(hi_text: str, max_len: int = 128, beam_size: int = 4, length_penalty: float = 0.6) -> str:
    src_ids = [sp.bos_id()] + sp.encode(hi_text, out_type=int) + [sp.eos_id()]
    src = torch.tensor([src_ids], dtype=torch.long, device=device)
    out_ids = model.beam_search(src, max_len=max_len, bos_id=sp.bos_id(), eos_id=sp.eos_id(),
                                beam_size=beam_size, length_penalty=length_penalty)[0].tolist()
    # strip BOS and everything after EOS
    if len(out_ids) > 0 and out_ids[0] == sp.bos_id():
        out_ids = out_ids[1:]
    if sp.eos_id() in out_ids:
        out_ids = out_ids[:out_ids.index(sp.eos_id())]
    return sp.decode(out_ids)


In [13]:
# Interactive input
while True:
    try:
        s = input("Enter Hindi sentence (or 'quit'): ").strip()
        if s.lower() in {"q", "quit", "exit"}:
            break
        print("Translation:", translate(s))

    except KeyboardInterrupt:
        break
