In [1]:
pip install transformers sentencepiece torch



In [2]:
from transformers import MarianMTModel, MarianTokenizer



In [3]:
def translate_english_to_hindi(text_list):
    # 1. Load the pre-trained Model and Tokenizer
    # 'Helsinki-NLP/opus-mt-en-hi' is a popular model for English to Hindi
    model_name = 'Helsinki-NLP/opus-mt-en-hi'
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    print(f"--- Machine Translation (English -> Hindi) ---")
    print(f"Model used: {model_name}\n")

    # 2. Tokenize and Prepare Input
    # Padding and truncation ensure sentences fit the model's expected size
    inputs = tokenizer(text_list, return_tensors="pt", padding=True, truncation=True)

    # 3. Perform Translation (Generation)
    # The model 'generates' the translated tokens
    translated_tokens = model.generate(**inputs)

    # 4. Decode the Output (Convert tokens back to strings)
    translations = [tokenizer.decode(t, skip_special_tokens=True) for t in translated_tokens]

    # 5. Display Results
    for original, translated in zip(text_list, translations):
        print(f"English: {original}")
        print(f"Hindi:   {translated}")
        print("-" * 40)

In [4]:
if __name__ == "__main__":
    # Public Information Content Examples
    public_info = [
        "Please wear a mask in crowded public places.",
        "The nearest metro station is closed for maintenance.",
        "A heavy rainfall warning has been issued for the next 24 hours.",
        "Government offices will remain closed on Sunday."
    ]

    translate_english_to_hindi(public_info)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]



pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

--- Machine Translation (English -> Hindi) ---
Model used: Helsinki-NLP/opus-mt-en-hi



model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

English: Please wear a mask in crowded public places.
Hindi:   भीड़ के सार्वजनिक स्थानों में एक मास्क पहनो.
----------------------------------------
English: The nearest metro station is closed for maintenance.
Hindi:   सबसे नज़दीकी प्रेस स्टेशन की देखभाल के लिए बंद कर दिया गया है.
----------------------------------------
Hindi:   अगले 24 घंटों से भारी वर्षा की चेतावनी जारी की गयी है ।
----------------------------------------
English: Government offices will remain closed on Sunday.
Hindi:   रविवार को सरकारी कार्यालय बंद कर दिया जाएगा.
----------------------------------------
