**Importing dependencies:**

In [1]:
!pip install torch



In [2]:
!pip install transformers ipywidgets gradio --upgrade

Collecting ipywidgets
  Downloading ipywidgets-8.1.2-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.4/139.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gradio
  Downloading gradio-4.21.0-py3-none-any.whl (17.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.0/17.0 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m
Collecting comm>=0.1.3 (from ipywidgets)
  Downloading comm-0.2.1-py3-none-any.whl (7.2 kB)
Collecting widgetsnbextension~=4.0.10 (from ipywidgets)
  Downloading widgetsnbextension-4.0.10-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m71.0 MB/s[0m eta [36m0:00:00[0m
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.0-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.1/92.1 kB

In [3]:
!pip install accelerate sentencepiece

Collecting accelerate
  Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/280.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m153.6/280.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.27.2


In [4]:
import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer

In [5]:
from google.colab import output
output.enable_custom_widget_manager()

**Building a model:**

The model we're using is madlad400-3b-mt, which is developed by Google and is trained on 419 languages.

In [6]:
model_name = 'jbochi/madlad400-3b-mt'
model = T5ForConditionalGeneration.from_pretrained( model_name, device_map = 'auto' )
tokenizer = T5Tokenizer.from_pretrained( model_name )

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/11.8G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/830 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.43M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.6M [00:00<?, ?B/s]

In [7]:
test_text =  '<2en>' + 'Ich liebe Eiscreme.' #The string term "<2en>" indicates the model that the target language is english.
input_ids = tokenizer( test_text, return_tensors = 'pt' ).input_ids.to( model.device )
output_ids = model.generate( input_ids = input_ids )
test_output_text = tokenizer.decode( output_ids[ 0 ], skip_special_tokens = True )
print( 'Translated text: {}'.format( test_output_text ) )



Translated text: I love ice cream.


In [8]:
def translate_anylang_to_en( translation_text ):
  translation_text = "<2en>" + translation_text
  input_ids = tokenizer( translation_text, return_tensors = 'pt' ).input_ids.to( model.device )
  output_ids = model.generate( input_ids = input_ids )
  translated_text = tokenizer.decode( output_ids[ 0 ], skip_special_tokens = True )
  return translated_text

In [9]:
test_text_2 = "मेरा नाम राहुल है।"
test_output_text_2 = translate_anylang_to_en( test_text_2 )
print( 'Translated text: {}'.format( test_output_text_2 ) )

Translated text: My name is Rahul.


**Creating a Gradio UI:**

In [10]:
interface = gr.Interface( fn = translate_anylang_to_en, inputs = gr.Textbox( lines = 2, placeholder = 'Enter text to translate to English' ), outputs = 'text' )

In [11]:
interface.launch( debug = True )

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://a94ad41a13d29032e6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://a94ad41a13d29032e6.gradio.live


