In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [5]:
model_path = "./tamil_english_translation_model"
 

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(device)

Using device: cuda




In [7]:
def translate(text, tokenizer, model, device, max_length=128):
    """
    Translate a single text from Tamil to English.
    
    Args:
        text (str): Input Tamil text
        tokenizer: Loaded tokenizer
        model: Loaded translation model
        device: Torch device
        max_length (int): Maximum length of generated translation

    Returns:
        str: Translated English text
    """
    inputs = tokenizer(
        text, 
        return_tensors="pt", 
        padding=True, 
        truncation=True, 
        max_length=max_length
    ).to(device)
    
    outputs = model.generate(
        **inputs, 
        max_length=max_length, 
        num_beams=4, 
        early_stopping=True
    )
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [8]:
def batch_translate(texts, tokenizer, model, device, max_length=128):
    """
    Translate multiple texts from Tamil to English.
    
    Args:
        texts (list): List of input Tamil texts
        tokenizer: Loaded tokenizer
        model: Loaded translation model
        device: Torch device
        max_length (int): Maximum length of generated translations

    Returns:
        list: Translated English texts
    """
    inputs = tokenizer(
        texts, 
        return_tensors="pt", 
        padding=True, 
        truncation=True, 
        max_length=max_length
    ).to(device)
    
    outputs = model.generate(
        **inputs, 
        max_length=max_length, 
        num_beams=4, 
        early_stopping=True
    )
    
    return [
        tokenizer.decode(output, skip_special_tokens=True) 
        for output in outputs
    ]


In [10]:
tamil_text = "வணக்கம் உலகம்!"
english_translation = translate(tamil_text, tokenizer, model, device)
print(f"Tamil: {tamil_text}")
print(f"English: {english_translation}")

Tamil: வணக்கம் உலகம்!
English: We are worldwide!


In [11]:


# Batch translation
tamil_texts = [
    "வணக்கம் உலகம்!",
    "நான் ஒரு மொழிபெயர்ப்பு மாதிரி உருவாக்குகிறேன்",
    "தமிழ் மொழி அருமையான மொழி"
]
batch_translations = batch_translate(tamil_texts, tokenizer, model, device)

print("\nBatch Translation:")
for tamil, english in zip(tamil_texts, batch_translations):
    print(f"Tamil: {tamil}")
    print(f"English: {english}")
print("---")


Batch Translation:
Tamil: வணக்கம் உலகம்!
English: We are worldwide!
Tamil: நான் ஒரு மொழிபெயர்ப்பு மாதிரி உருவாக்குகிறேன்
English: I am one language model
Tamil: தமிழ் மொழி அருமையான மொழி
English: Tamil language is a true language
---


In [None]:


class TranslationModel:
    def __init__(self, model_path):
        # Ensure GPU is available
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")
        
        # Load saved model and tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(self.device)
    
    def translate(self, text, max_length=128):
        """
        Translate a single text from Tamil to English
        
        Args:
            text (str): Input Tamil text
            max_length (int): Maximum length of generated translation
        
        Returns:
            str: Translated English text
        """
        # Prepare input
        inputs = self.tokenizer(
            text, 
            return_tensors="pt", 
            padding=True, 
            truncation=True, 
            max_length=max_length
        ).to(self.device)
        
        # Generate translation
        outputs = self.model.generate(
            **inputs, 
            max_length=max_length, 
            num_beams=4, 
            early_stopping=True
        )
        
        # Decode and return translation
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    def batch_translate(self, texts, max_length=128):
        """
        Translate multiple texts from Tamil to English
        
        Args:
            texts (list): List of input Tamil texts
            max_length (int): Maximum length of generated translations
        
        Returns:
            list: Translated English texts
        """
        # Prepare inputs
        inputs = self.tokenizer(
            texts, 
            return_tensors="pt", 
            padding=True, 
            truncation=True, 
            max_length=max_length
        ).to(self.device)
        
        # Generate translations
        outputs = self.model.generate(
            **inputs, 
            max_length=max_length, 
            num_beams=4, 
            early_stopping=True
        )
        
        # Decode translations
        return [
            self.tokenizer.decode(output, skip_special_tokens=True) 
            for output in outputs
        ]

# Example usage
def main():
    # Path where the model was saved
    MODEL_PATH = "./tamil_english_translation_model"
    
    # Initialize translation model
    translator = TranslationModel(MODEL_PATH)
    
    # Single text translation
    tamil_text = "வணக்கம் உலகம்!"
    english_translation = translator.translate(tamil_text)
    print(f"Tamil: {tamil_text}")
    print(f"English: {english_translation}")
    
    # Batch translation
    tamil_texts = [
        "வணக்கம் உலகம்!",
        "நான் ஒரு மொழிபெயர்ப்பு மாதிரி உருவாக்குகிறேன்",
        "தமிழ் மொழி அருமையான மொழி"
    ]
    batch_translations = translator.batch_translate(tamil_texts)
    
    print("\nBatch Translation:")
    for tamil, english in zip(tamil_texts, batch_translations):
        print(f"Tamil: {tamil}")
        print(f"English: {english}")
        print("---")

if __name__ == "__main__":
    main()