In [47]:
!pip install transformers -U -q
import torch
import tensorflow as tf

In [48]:
! pip install sentencepiece



In [49]:
!pip freeze | grep transformers

transformers==4.35.0


In [50]:
from transformers import MarianMTModel, MarianTokenizer

model_name = 'Helsinki-NLP/opus-mt-en-hi'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

import nltk
nltk.download('averaged_perceptron_tagger')

def is_noun(text):
    ans = nltk.pos_tag([text])
    val = ans[0][1]

    if val in ('NN', 'NNS', 'NNPS', 'NNP'):
        return True
    else:
        return False

input_text = ["Definitely share your feedback in the comment section.",
              "So even if it's a big video, I will clearly mention all the products.",
              "I was waiting for my bag."]


def translate_and_preserve_nouns(input_text):
    translated_sentences = []

    for text in input_text:
        model_inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        generated_tokens = model.generate(**model_inputs)
        translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
        words = text.split()
        noun_list = [word for word in words if is_noun(word)]

        for noun in noun_list:
            noun = noun.replace(',', '')
            model_inputs_noun = tokenizer(noun, return_tensors="pt", padding=True, truncation=True)
            generated_tokens_noun = model.generate(**model_inputs_noun)
            noun_translation = tokenizer.decode(generated_tokens_noun[0], skip_special_tokens=True)
            translation = translation.replace(noun_translation, noun)

        translated_sentences.append(translation)

    return translated_sentences

translated_sentences = translate_and_preserve_nouns(input_text)
for sentence in translated_sentences:
    print(sentence)



[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


comment खण्ड में अपनी प्रतिक्रिया को निश्‍चित ही share करें ।
तो यह एक बड़ा video है, तो भी मैं स्पष्ट रूप से सभी उत्पादों का mention करेंगे।
मैं अपने बैग के लिए इंतजार कर रहा था.


In [51]:
def evaluate_qualitatively(translated_sentences, input_text):
    print("Original English Text:")
    for text in input_text:
        print(text)

    print("\nTranslated Hinglish Text:")
    for sentence in translated_sentences:
        print(sentence)

evaluate_qualitatively(translated_sentences, input_text)


Original English Text:
Definitely share your feedback in the comment section.
So even if it's a big video, I will clearly mention all the products.
I was waiting for my bag.

Translated Hinglish Text:
comment खण्ड में अपनी प्रतिक्रिया को निश्‍चित ही share करें ।
तो यह एक बड़ा video है, तो भी मैं स्पष्ट रूप से सभी उत्पादों का mention करेंगे।
मैं अपने बैग के लिए इंतजार कर रहा था.
