In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [2]:
model_path = "./english-tamil-final-model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

tokenizer.target_language = "ta"




In [3]:

def translate_text(text, model, tokenizer, max_length=128):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    inputs = tokenizer(text, return_tensors="pt", max_length=max_length, truncation=True, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():  
        outputs = model.generate(
            inputs["input_ids"],
            max_length=max_length,
            num_beams=4,
            length_penalty=0.6,
            early_stopping=True
        )
    translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translated_text

In [4]:


test_sentences = [
    "Hello, how are you?",
    "What is your name?",
    "I love learning Tamil language.",
 
]

print("English to Tamil Translations:")
print("-" * 50)

for sentence in test_sentences:
    tamil_translation = translate_text(sentence, model, tokenizer)
    print(f"English: {sentence}")
    print(f"Tamil: {tamil_translation}")
    print("-" * 50)

English to Tamil Translations:
--------------------------------------------------
English: Hello, how are you?
Tamil: ஹலோ, எப்படி இருக்கிறீர்கள்?
--------------------------------------------------
English: What is your name?
Tamil: உங்கள் பெயர் என்ன?
--------------------------------------------------
English: I love learning Tamil language.
Tamil: தமிழ் மொழியை கற்றுக்கொள்வதை நான் விரும்புகிறேன்.
--------------------------------------------------
