## Installing the python library, that is wrapper around IndicXlit model

In [5]:
# installing library
# for thorough documentation: https://pypi.org/project/ai4bharat-transliteration/
!pip install ai4bharat-transliteration --upgrade --no-cache-dir



In [8]:
!pip install fairseq --upgrade

Collecting fairseq
  Using cached fairseq-0.12.2.tar.gz (9.6 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting hydra-core<1.1,>=1.0.7 (from fairseq)
  Using cached hydra_core-1.0.7-py3-none-any.whl.metadata (3.7 kB)
Collecting omegaconf<2.1 (from fairseq)
  Using cached omegaconf-2.0.6-py3-none-any.whl.metadata (3.0 kB)
Requested omegaconf<2.1 from https://files.pythonhosted.org/packages/d0/eb/9d63ce09dd8aa85767c65668d5414958ea29648a0eec80a4a7d311ec2684/omegaconf-2.0.6-py3-none-any.whl (from fairseq) has invalid metadata: .* suffix can only be used with `==` or `!=` operators
    PyYAML (>=5.1.*)
            ~~~~~~^
Please use pip<24.1 if you need to use this version.[0m[33m
[0m  Using cached omegaconf-2.0.5-py3-none-any.whl.metadata (3.0 kB)
Requested omegaconf<2.1 from https://files.pythonhosted.org

## Import the module for transliteration engine

In [1]:
# model support the following languages : [as, bn, brx, gom, gu, hi, kn, ks, mai, ml, mni, mr, ne, or, pa, sa, sd, si, ta, te, ur]
# importing ai4bharat transliteration module
from ai4bharat.transliteration import XlitEngine

## Using word Transliteration

- beam_width increases beam search size, resulting in improved accuracy but increases time/compute. (Default: 4)
- topk returns only specified number of top results. (Default: 4)
- rescore returns the reranked suggestions after using a dictionary. (Default: True)

#### En-Indic conversion

In [2]:
# intializing the en-indic multilingual model and dictionaries (if rerank option is True)
e = XlitEngine("hi", beam_width=4, rescore=True, src_script_type = "en")

# transliterate word
out = e.translit_word("one", topk=1)
print(out)

Initializing Multilingual model for transliteration


ImportError: cannot import name 'convert_namespace_to_omegaconf' from 'fairseq.dataclass.utils' (/home/hemanth/anaconda3/envs/t/lib/python3.9/site-packages/fairseq/dataclass/utils.py)

#### Indic-En conversion

In [None]:
# intializing the indic-en multilingual model and dictionaries (if rerank option is True)
e = XlitEngine( beam_width=4, rescore=False, src_script_type = "indic")

# transliterate Hindi word
out = e.translit_word("भारत", 'hi', topk=5)
print(out)

# transliterate Gujarati word
out = e.translit_word("ગુજરાત", 'gu', topk=5)
print(out)

Downloading Multilingual model for transliteration


MB100% (119.0 of 119.0) |################| Elapsed Time: 0:00:01 Time:  0:00:01


Succefully Downloaded to: /usr/local/lib/python3.8/dist-packages/ai4bharat/transliteration/transformer/models/indic2en/v1.0/model.zip
Models downloaded to: /usr/local/lib/python3.8/dist-packages/ai4bharat/transliteration/transformer/models/indic2en/v1.0
NOTE: When uninstalling this library, REMEMBER to delete the models manually
Initializing Multilingual model for transliteration
['bhaarat', 'bharat', 'bharath', 'bhart']
['gujaraat', 'gujarat', 'goojarat', 'gujraat']


## word Transliteration without rescoring

#### En-Indic conversion

In [None]:
e = XlitEngine("hi", beam_width=10, rescore=False, src_script_type = "en")
out = e.translit_word("one", topk=5)
print(out)

Initializing Multilingual model for transliteration
{'hi': ['ओने', 'ओन', 'ओनी', 'ओणे', 'ओना']}


#### Indic-En conversion

In [None]:
# intializing the indic-en multilingual model and dictionaries (if rerank option is True)
e = XlitEngine( beam_width=10, rescore=False, src_script_type = "indic")

# transliterate Hindi word
out = e.translit_word("भारत", 'hi', topk=5)
print(out)

Initializing Multilingual model for transliteration
['bhaarat', 'bharat', 'bharath', 'bharata', 'bhaarut']


## Using Sentence Transliteration

- Only single top most prediction is returned for each word in sentence.

#### En-Indic conversion

In [None]:
e = XlitEngine(["te", 'mr'], beam_width=10, src_script_type = "en")
out = e.translit_sentence("102 VAnakkam ulagam")
print(out)

Initializing Multilingual model for transliteration


Loading dicts into RAM: 100%|██████████| 2/2 [00:14<00:00,  7.25s/it]


{'mr': '१०२ वणक्कम उलगम', 'te': '౧౦౨ వణక్కం ఉలగం'}


#### Indic-En conversion

In [None]:
e = XlitEngine( beam_width=10, src_script_type = "indic")
out = e.translit_sentence("వణక్కం ఉలగం", 'te')
print(out)

Initializing Multilingual model for transliteration


Loading dicts into RAM: 100%|██████████| 1/1 [00:00<00:00, 11.80it/s]


vanakkam ulagam


## Using Multiple language Transliteration

In [None]:
# Pass list of languages for multile language transliteration
e = XlitEngine(["ta", "ml"], beam_width=6, src_script_type = "en")
# leave empty or use "all" to load all available languages
# e = XlitEngine("all)

out = e.translit_word("amma", topk=3)
print(out)

out = e.translit_sentence("hello world")
print(out)

## Specify language name to get only specific language result
out = e.translit_word("amma", topk=5)
print(out)

Initializing Multilingual model for transliteration


Loading dicts into RAM: 100%|██████████| 2/2 [01:11<00:00, 35.53s/it]


{'ml': ['അമ്മ', 'അമ്മാ', 'ആമ്മ'], 'ta': ['அம்மா', 'ஆம்மா', 'அம்ம']}
{'ml': 'ഹെല്ലോ വർൾഡ്', 'ta': 'ஹெல்லோ வர்ல்ட்'}
{'ml': ['അമ്മ', 'അമ്മാ', 'ആമ്മ', 'എഎമ്എ', 'അംമ'], 'ta': ['அம்மா', 'ஆம்மா', 'அம்ம', 'அம்மை', 'அமா']}


## Transliteration for all available languages

In [None]:
# loading all the language dictionaries would require 8-10 gb of space in RAM
e = XlitEngine(beam_width=10, src_script_type = "en")
out = e.translit_sentence("Hello World")
print(out)