<a href="https://colab.research.google.com/github/Smarth2005/Hugging-Face-Projects/blob/main/Language%20Translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Language Translation using Hugging Face Transformers

In [None]:
from transformers import MarianTokenizer, MarianMTModel, AutoTokenizer, AutoModelForCausalLM
import torch

# Mapping languages to models
models_info = {
    "1": ("Hindi", "Helsinki-NLP/opus-mt-en-hi", "marian"),
    "2": ("Punjabi", "partex-nv/Llama-3.1-8B-VaaniSetu-EN2PA", "llama"),
    "3": ("French", "Helsinki-NLP/opus-mt-en-fr", "marian"),
    "4": ("German", "Helsinki-NLP/opus-mt-en-de", "marian")
}

# Preload tokenizers and models
loaded_models = {}
for key, (lang, model_name, model_type) in models_info.items():
    print(f"Loading {lang} model...")
    if model_type == "marian":
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
    elif model_type == "llama":
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.bfloat16,
            device_map="auto"  # Automatically moves model to GPU
        )
    loaded_models[key] = (lang, tokenizer, model, model_type)
print("All models loaded!\n")

# Translation function
def translate(text, lang_choice):
    lang, tokenizer, model, model_type = loaded_models[lang_choice]

    if model_type == "marian":
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        translated_tokens = model.generate(**inputs)
        translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)

    elif model_type == "llama":
        translate_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are given the english text, read it and understand it. After reading translate the english text to Punjabi and provide the output strictly

### Input:
{}

### Response:
"""
        formatted_input = translate_prompt.format(text)
        inputs = tokenizer([formatted_input], return_tensors="pt").to("cuda")
        output_ids = model.generate(**inputs, max_new_tokens=500)
        translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        translated_text = translated_text.split("Response:")[-1].strip()
        if not translated_text:
            translated_text = ""

    return translated_text

# Menu-driven translator
def my_translator(text):
    while True:
        if not text:
            print("Please enter some text!")
            continue

        print("\nSelect the target language:")
        print("1. Hindi\n2. Punjabi\n3. French\n4. German\n5. Exit")
        choice = input("Enter your choice: ").strip()

        if choice == "5":
            print("Exiting translator. Goodbye!")
            break

        if choice in loaded_models:
            translation = translate(text, choice)
            lang_name = loaded_models[choice][0]
            print(f"\nTranslated ({lang_name}): {translation}")
        else:
            print("Invalid choice! Please select 1, 2, 3, 4, or 5.")


Loading Hindi model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]



pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Loading Punjabi model...


model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/935 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]



Loading French model...


tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Loading German model...


tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/768k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/797k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/298M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

All models loaded!



In [None]:
my_translator("The weather is very pleasant today, so I'll go for a walk")


Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 1

Translated (Hindi): आज मौसम बहुत सुखद है, तो मैं एक चलने के लिए जाना होगा

Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 2

Translated (Punjabi): ਹੁਣ ਮੈਂ ਪਹਿਲਾਂ ਹੀ ਨਿਕਲ ਜਾਵਾਂਗਾ।

Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 3

Translated (French): Le temps est très agréable aujourd'hui, donc je vais faire une promenade

Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 4

Translated (German): Das Wetter ist heute sehr angenehm, also gehe ich spazieren

Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 5
Exiting translator. Goodbye!


In [None]:
my_translator("We always sit together in the cafeteria and share our lunch")


Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 1

Translated (Hindi): हम हमेशा साथ बैठकर दोपहर का खाना खाते हैं

Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 2

Translated (Punjabi): ਹਮੇਸ਼ਾ ਹੀ ਅਮਲ ਕੇਂਦਰ ਵਿੱਚ ਬੈਠਦੇ ਹਾਂ ਅਤੇ ਆਪਸ ਵਿੱਚ ਆਪਣੀ ਭੋਜਨ ਸ਼ਾਰਾਰ ਕਰਦੇ ਹਾਂ

Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 3

Translated (French): On s'assoit toujours à la cafétéria et on partage notre déjeuner.

Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 4

Translated (German): Wir sitzen immer zusammen in der Cafeteria und teilen unser Mittagessen

Select the target language:
1. Hindi
2. Punjabi
3. French
4. German
5. Exit
Enter your choice: 5
Exiting translator. Goodbye!


Language Translation using `googletrans`

In [None]:
!pip uninstall -y googletrans
!pip install googletrans==4.0.0-rc1

Found existing installation: googletrans 4.0.2
Uninstalling googletrans-4.0.2:
  Successfully uninstalled googletrans-4.0.2
Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloa

  for x in reversed(rel.split(sep)):


In [None]:
from googletrans import Translator

translator = Translator()
translator.translate("How are you", dest="pa")

<googletrans.models.Translated at 0x78b76c7fe350>

In [None]:
res = translator.translate("How are you", dest="pa")
print(res.text)

ਤੁਸੀ ਕਿਵੇਂ ਹੋ


In [None]:
res = translator.translate("I am working",dest="pa")
print(res.text)

ਮੈਂ ਕੰਮ ਕਰ ਰਿਹਾ ਹਾਂ


In [None]:
res = translator.translate("The weather is pleasant today, so I'll go for a walk", dest="pa")
print(res.text)

ਮੌਸਮ ਅੱਜ ਸੁਹਾਵਣਾ ਹੈ, ਇਸ ਲਈ ਮੈਂ ਸੈਰ ਲਈ ਜਾਵਾਂਗਾ


In [None]:
res = translator.translate("The weather is pleasant today, so I'll go for a walk", dest="hi")
print(res.text)

मौसम आज सुखद है, इसलिए मैं टहलने जाऊंगा


`googletrans` Documentation for googletrans.LANGUAGES: https://py-googletrans.readthedocs.io/en/latest/