<a href="https://colab.research.google.com/github/ManasviEmmadi/ML_AI_Projects/blob/main/STT%2C_TTS_Translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Vakyansh + Hugging Face : Hindi Speech To text Demo

## Install requirements

In [1]:
%%capture

!apt-get -y install sox ffmpeg
!pip install transformers ffmpeg-python sox

!wget https://raw.githubusercontent.com/harveenchadha/bol/main/demos/colab/record.py

## Load Hindi Model

In [2]:
import soundfile as sf
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor

def load_model():
    # load pretrained model
    # Harveenchadha/vakyansh-wav2vec2-hindi-him-4200
    processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
    model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
    return processor, model

processor, model = load_model()


Downloading (…)rocessor_config.json:   0%|          | 0.00/261 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/729 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.66k [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/378M [00:00<?, ?B/s]

In [3]:
def parse_transcription(wav_file):
    # load audio    
    audio_input, sample_rate = sf.read(wav_file)

    # pad input values and return pt tensor
    input_values = processor(audio_input, sampling_rate=16_000, return_tensors="pt").input_values

    # INFERENCE
    # retrieve logits & take argmax
    logits = model(input_values).logits
    predicted_ids = torch.argmax(logits, dim=-1)

    # transcribe
    transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
    return transcription

## Record file using colab

In [4]:
from record import record_audio

record_audio('test')

## Run Model on recorded file

In [5]:
result_of_hindii = parse_transcription('/content/test.wav')
result_of_hindii

'मेरा नाम क्या है'

In [6]:
%store result_of_hindii

Stored 'result_of_hindii' (str)


# **Indic to english translation**

In [7]:
# clone the repo for running evaluation
!git clone https://github.com/AI4Bharat/indicTrans.git
%cd indicTrans
# clone requirements repositories
!git clone https://github.com/anoopkunchukuttan/indic_nlp_library.git
!git clone https://github.com/anoopkunchukuttan/indic_nlp_resources.git
!git clone https://github.com/rsennrich/subword-nmt.git
%cd ..

Cloning into 'indicTrans'...
remote: Enumerating objects: 694, done.[K
remote: Counting objects: 100% (397/397), done.[K
remote: Compressing objects: 100% (201/201), done.[K
remote: Total 694 (delta 273), reused 272 (delta 194), pack-reused 297[K
Receiving objects: 100% (694/694), 2.65 MiB | 5.78 MiB/s, done.
Resolving deltas: 100% (400/400), done.
/content/indicTrans
Cloning into 'indic_nlp_library'...
remote: Enumerating objects: 1325, done.[K
remote: Counting objects: 100% (107/107), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 1325 (delta 91), reused 82 (delta 82), pack-reused 1218[K
Receiving objects: 100% (1325/1325), 9.55 MiB | 7.67 MiB/s, done.
Resolving deltas: 100% (701/701), done.
Cloning into 'indic_nlp_resources'...
remote: Enumerating objects: 139, done.[K
remote: Counting objects: 100% (13/13), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 139 (delta 2), reused 2 (delta 0), pack-reused 126[K
Receiving obj

In [8]:
# Install the necessary libraries
!pip install sacremoses pandas mock sacrebleu tensorboardX pyarrow indic-nlp-library
! pip install mosestokenizer subword-nmt
# Install fairseq from source
!git clone https://github.com/pytorch/fairseq.git
%cd fairseq
# !git checkout da9eaba12d82b9bfc1442f0e2c6fc1b895f4d35d
!pip install ./
! pip install xformers
%cd ..

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m880.6/880.6 KB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mock
  Downloading mock-5.0.1-py3-none-any.whl (30 kB)
Collecting sacrebleu
  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.9/118.9 KB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorboardX
  Downloading tensorboardX-2.5.1-py2.py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.4/125.4 KB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
Collecting indic-nlp-library
  Downloading indic_nlp_library-0.81-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 KB[0m 

In [9]:
# ! pip install --use-feature=in-tree-build ./

[31mERROR: Directory './' is not installable. Neither 'setup.py' nor 'pyproject.toml' found.[0m[31m
[0m

In [17]:
!pip install triton==2.0.0.dev20221120

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting triton==2.0.0.dev20221120
  Downloading triton-2.0.0.dev20221120-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.7/18.7 MB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: triton
Successfully installed triton-2.0.0.dev20221120


In [11]:
import triton

In [15]:
!pip install setup.py 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Could not find a version that satisfies the requirement setup.py (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for setup.py[0m[31m
[0m

In [12]:
import fairseq

Need to compile C++ extensions to get sparse attention suport. Please run python setup.py build develop
Error caught was: module 'triton.language' has no attribute 'constexpr'


/usr/local/lib/python3.8/dist-packages/xformers/_C.so: undefined symbol: _ZNK3c104impl13OperatorEntry20reportSignatureErrorENS0_12CppSignatureE


In [None]:
!pip install fairseq

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# add fairseq folder to python path
import os
os.environ['PYTHONPATH'] += ":/content/fairseq/"
# sanity check to see if fairseq is installed
from fairseq import checkpoint_utils
from fairseq.distributed import utils as distributed_utils


TypeError: ignored

In [None]:
!git clone https://github.com/pytorch/fairseqh
os.chdir('/content/fairseq')
!pip install ./

Cloning into 'fairseqh'...
fatal: could not read Username for 'https://github.com': No such device or address
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Processing /content/fairseq
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: fairseq
  Building wheel for fairseq (PEP 517) ... [?25l[?25hdone
  Created wheel for fairseq: file

In [None]:
# download the indictrans model


# downloading the indic-en model
!wget https://storage.googleapis.com/samanantar-public/V0.3/models/indic-en.zip
!unzip indic-en.zip

# downloading the en-indic model
# !wget https://storage.googleapis.com/samanantar-public/V0.3/models/en-indic.zip
# !unzip en-indic.zip

# # downloading the indic-indic model
# !wget https://storage.googleapis.com/samanantar-public/V0.3/models/m2m.zip
# !unzip m2m.zip

%cd indicTrans

--2022-07-27 11:22:25--  https://storage.googleapis.com/samanantar-public/V0.3/models/indic-en.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.24.128, 142.250.4.128, 172.217.194.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.24.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4759117228 (4.4G) [application/zip]
Saving to: ‘indic-en.zip’


2022-07-27 11:22:51 (177 MB/s) - ‘indic-en.zip’ saved [4759117228/4759117228]

Archive:  indic-en.zip
   creating: indic-en/
   creating: indic-en/vocab/
  inflating: indic-en/vocab/bpe_codes.32k.SRC  
  inflating: indic-en/vocab/vocab.SRC  
  inflating: indic-en/vocab/vocab.TGT  
  inflating: indic-en/vocab/bpe_codes.32k.TGT  
   creating: indic-en/final_bin/
  inflating: indic-en/final_bin/preprocess.log  
  inflating: indic-en/final_bin/dict.TGT.txt  
  inflating: indic-en/final_bin/test.SRC-TGT.SRC.idx  
  inflating: indic-en/final_bin/test.SRC-TGT.TGT.idx  
  infl

In [None]:
from indicTrans.inference.engine import Model

indic2en_model = Model(expdir='../indic-en')

ModuleNotFoundError: ignored

In [13]:
result_of_hindii

'मेरा नाम क्या है'

In [None]:
indic2en_model.batch_translate(result_of_hindii, 'hi', 'en')
# indic2en_model.batch_translate(result, 'hi', 'en')

NameError: ignored

# **English to Indic Translation**

In [None]:
!wget https://storage.googleapis.com/samanantar-public/V0.3/models/en-indic.zip
!unzip en-indic.zip

%cd indicTrans

--2022-08-02 03:24:53--  https://storage.googleapis.com/samanantar-public/V0.3/models/en-indic.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.193.128, 172.217.204.128, 172.217.203.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.193.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4811880516 (4.5G) [application/zip]
Saving to: ‘en-indic.zip’

Archive:  en-indic.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of en-indic.zip or
        en-indic.zip.zip, and cannot find en-indic.zip.ZIP, period.
/content/indicTrans


In [None]:
from indicTrans.inference.engine import Model

en2indic_model = Model(expdir='../en-indic')

2022-07-21 04:06:02 | INFO | numexpr.utils | NumExpr defaulting to 2 threads.


Initializing vocab and bpe
Initializing model for translation


2022-07-21 04:06:05 | INFO | fairseq.tasks.translation | [SRC] dictionary: 32104 types
2022-07-21 04:06:05 | INFO | fairseq.tasks.translation | [TGT] dictionary: 35888 types


In [None]:
result_from_server = 'the train arrives in the afternoon.'

In [None]:
en2indic_model.batch_translate(result_from_server, 'en', 'hi')

AssertionError: ignored

# **Text to Speech (tts)**

In [None]:
!pip install aksharamukha

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install omegaconf

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch
from aksharamukha import transliterate

# Loading model
model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models',
                                     model='silero_tts',
                                     language='indic',
                                     speaker='v3_indic')

orig_text = "प्रसिद्द कबीर अध्येता, पुरुषोत्तम अग्रवाल का यह शोध आलेख, उस रामानंद की खोज करता है"
roman_text = transliterate.process('Devanagari', 'ISO', orig_text)
print(roman_text)

audioss = model.apply_tts(roman_text,
                        speaker='hindi_male')

Using cache found in /root/.cache/torch/hub/snakers4_silero-models_master


prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai


In [None]:
audioss

tensor([ 0.0008,  0.0008,  0.0006,  ..., -0.0161, -0.0477,  0.0374])

In [None]:
from IPython.display import Audio
Audio(audioss.numpy(), rate=24000) 