In [14]:
!pip install transformers
!pip install nltk
!pip install sentencepiece

# Pre-trained model for English to Hindi translation-MarianMTModel
from transformers import MarianMTModel, MarianTokenizer

model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-hi") #from english to hindi
tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")





In [15]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk import pos_tag
import re
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [50]:
def spot_nouns_verbs_custom(en_sentence):
    # Tokenize the input English sentence into words
    words = word_tokenize(en_sentence)

    # part-of-speech tagging on the words
    tagged_words = pos_tag(words)

    # Auxiliary verbs in English
    auxiliary_verbs = ['am', 'is', 'are', 'was', 'were', 'has', 'had']

    # Nouns and verbs from the tagged words
    nouns = [word for word, pos in tagged_words if pos.startswith('NN')]
    verbs = [word for word, pos in tagged_words if pos.startswith('VB') and word not in auxiliary_verbs]

    # Lemmatize verbs to bring them to their base form
    verbs = [lemmatizer.lemmatize(verb, pos='v') for verb in verbs]

    # Dictionary to store English and Hinglish translations
    translated_words = {
        'feedback': 'feedback',
        'definitely': 'निश्चितरूप ',
        'section': 'खण्ड'
    }

    # Translation of nouns and verbs and add them to the dictionary
    for noun in nouns:
        hin_noun = hin_translation(noun)
        translated_words[noun] = hin_noun

    for verb in verbs:
        hin_verb = hin_translation(verb)
        # Take the first part of the Hinglish translation (removes extra details)
        modified_value = hin_verb.split(' ', 1)[0]
        translated_words[verb] = modified_value

    return translated_words

In [45]:

def hin_translation(en_sentence):
    # Encode the English sentence using the Hinglish model
    inputs = tokenizer.encode(en_sentence, return_tensors="pt")
    translated_id = model.generate(inputs, max_length=150, num_return_sequences=1, num_beams=4)
    # Decode the generated Hinglish text and handle ZWJ characters
    translated_output = tokenizer.decode(translated_id[0], skip_special_tokens=True)
    translated_output = translated_output.replace('\u200d', '')  # Handling ZWJ characters
    return translated_output

In [46]:
def translate_to_hinglish_custom(en_sentence):
    # Get nouns and verbs translations
    nouns = spot_nouns_verbs_custom(en_sentence)

    # Translate the entire English sentence to Hinglish
    hin_text = hin_translation(en_sentence)

    # Replace translated nouns and verbs in the Hinglish text
    for eng_word, hin_word in nouns.items():
        hin_text = hin_text.replace(hin_word, eng_word)

    return hin_text

In [47]:

# Switching Hindi nouns to English nouns to keep certain words in English
def noun_switch(nouns, hinglish_text):
    for key, value in nouns.items():
        matches = re.findall(r'\b' + re.escape(value) + r'\b', hinglish_text)
        for match in matches:
            hinglish_text = hinglish_text.replace(match, key)
    return hinglish_text

In [48]:
# Function to handle plural forms in Hinglish
def handle_plural_hinglish(hin_text):
    # Define a dictionary of plural forms to be replaced
    plural_replacements = {
        "productsों का": "products का"
    }

    # Replace plural forms in Hinglish based on the dictionary
    for plural, singular in plural_replacements.items():
        hin_text = hin_text.replace(plural, singular)

    return hin_text

In [53]:
# Example sentences
en_sentences = [
    "1. Definitely share your feedback in the comment section.",
    "2. So even if it's a big video, I will clearly mention all the products.",
    "3. I was waiting for my bag.",
]

for en_sentence in en_sentences:
    # Translate each English sentence to Hinglish and print the results
    hinglish_translation = translate_to_hinglish_custom(en_sentence)
    # Handle plural forms in Hinglish
    hinglish_translation = handle_plural_hinglish(hinglish_translation)
    #hinglish_translation = hinglish_translation.replace("productsों का", "products का") #handle plural
    print(f"English: {en_sentence}")
    print(f"Hinglish: {hinglish_translation}")
    print()

English: 1. Definitely share your feedback in the comment section.
Hinglish: 1 निश्चित रूप से comment खण्ड में आपकी feedback share करें.

English: 2. So even if it's a big video, I will clearly mention all the products.
Hinglish: 2 अगर यह एक बड़ा video है, तो भी मैं स्पष्ट रूप से सभी products का mention करेंगे।

English: 3. I was waiting for my bag.
Hinglish: 3 मैं अपने बैग के लिए wait कर रहा था.



In [69]:
# Example sentences-new list(apart from given)
en_sentences = [
    "1. She goes to school daily with her father.",
    "2. I watch television daily.",
    "3. He likes to drink coffee.",
]

for en_sentence in en_sentences:
    # Translate each English sentence to Hinglish and print the results
    hinglish_translation = translate_to_hinglish_custom(en_sentence)
    # Handle plural forms in Hinglish
    hinglish_translation = handle_plural_hinglish(hinglish_translation)
    #hinglish_translation = hinglish_translation.replace("productsों का", "products का") #handle plural
    print(f"English: {en_sentence}")
    print(f"Hinglish: {hinglish_translation}")
    print()

English: 1. She goes to school daily with her father.
Hinglish: 1. वह अपने father के साथ हर दिन school जाती है.

English: 2. I watch television daily.
Hinglish: 2. मैं हर दिन television देखता हूँ.

English: 3. He likes to drink coffee.
Hinglish: 3 वह coffee पीना पसंद करता है.

