In [7]:
from transformers import MarianMTModel, MarianTokenizer
import nltk

# Download necessary NLTK data correctly
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# Download the English-specific POS tagger data
nltk.download('averaged_perceptron_tagger_eng')  # This line is crucial for English POS tagging


# Load translation model and tokenizer (Hindi to English)
model_name = 'Helsinki-NLP/opus-mt-hi-en'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Hindi input sentence
hindi_text = "तकनीक दुनिया को तेजी से बदल रही है। युवा लोग कृत्रिम बुद्धिमत्ता और मशीन लर्निंग के बारे में उत्साहित हैं।"

# Translate
inputs = tokenizer([hindi_text], return_tensors="pt", padding=True)
translated = model.generate(**inputs)
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)

print("🔁 Translated Text:\n", translated_text)

# Perform POS tagging using NLTK
words = nltk.word_tokenize(translated_text)
tags = nltk.pos_tag(words)

print("\n🧠 POS Tags:")
for word, tag in tags:
    print(f"{word} → {tag}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


🔁 Translated Text:
 The technology is changing the world fast. Young people are excited about artificial intelligence and machine pressure.

🧠 POS Tags:
The → DT
technology → NN
is → VBZ
changing → VBG
the → DT
world → NN
fast → NN
. → .
Young → CC
people → NNS
are → VBP
excited → VBN
about → IN
artificial → JJ
intelligence → NN
and → CC
machine → NN
pressure → NN
. → .
