In [1]:
from transformers import MarianTokenizer, MarianMTModel

# Load the MarianMT model and tokenizer for English to Hindi
model_name = "Helsinki-NLP/opus-mt-en-hi"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)



In [2]:
# Define a list of words to keep in English
english_words_to_keep = ["definitely", "feedback", "comment", "video", "clearly", "products", "waiting", "bag"]

# Function to translate English to Hindi with word selection
def english_to_hindi_with_selection(text):
    # Split the text into words
    words = text.split()

    # Initialize an empty list for the translated words
    translated_words = []

    # Translate each word to Hindi unless it's in the list of words to keep in English
    for word in words:
        if word.lower() in english_words_to_keep:
            translated_words.append(word)
        else:
            # Tokenize and translate to Hindi
            input_ids = tokenizer.encode(word, return_tensors="pt")
            translation = model.generate(input_ids, max_length=40, num_beams=4, early_stopping=True)
            hindi_translation = tokenizer.decode(translation[0], skip_special_tokens=True)
            translated_words.append(hindi_translation)

    # Reconstruct the text
    hinglish_output = " ".join(translated_words)

    return hinglish_output

In [3]:
# Input sentences
sentences = [
    "Definitely share your feedback in the comment section.",
    "So even if it's a big video, I will clearly mention all the products.",
    "I was waiting for my bag."
]

# Translate each sentence to Hinglish with word selection
for sentence in sentences:
    hinglish_output = english_to_hindi_with_selection(sentence)
    print("Hinglish Output:", hinglish_output)

Hinglish Output: Definitely साझा आपका feedback में वह comment खंड.
Hinglish Output: तो यहां तक यदि यह है एक बड़ा वीडियो, आई होगा clearly उल्लेख सभी वह उत्पाद.
Hinglish Output: आई था waiting के लिए मेरा बैग.
