In [1]:
!pip install -U pip
!pip install coqui_stt_training
!apt-get install libopusfile0 libopus-dev libopusfile-dev

Collecting pip
  Downloading pip-21.3.1-py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 5.2 MB/s 
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.3
    Uninstalling pip-21.1.3:
      Successfully uninstalled pip-21.1.3
Successfully installed pip-21.3.1
Collecting coqui_stt_training
  Downloading coqui_stt_training-1.1.0-py3-none-any.whl (81 kB)
     |████████████████████████████████| 81 kB 4.0 MB/s             
[?25hCollecting pyxdg
  Downloading pyxdg-0.27-py2.py3-none-any.whl (49 kB)
     |████████████████████████████████| 49 kB 6.6 MB/s             
Collecting attrdict
  Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)
Collecting opuslib==2.0.0
  Downloading opuslib-2.0.0.tar.gz (7.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sox
  Downloading sox-1.4.1-py2.py3-none-any.whl (39 kB)
Collecting pyogg>=0.6.14a1
  Downloading PyOgg-0.6.14a1.tar.gz (35 kB)
  Prepar

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  libopus-dev libopusfile-dev libopusfile0
0 upgraded, 3 newly installed, 0 to remove and 37 not upgraded.
Need to get 293 kB of archives.
After this operation, 1,055 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 libopus-dev amd64 1.1.2-1ubuntu1 [197 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libopusfile0 amd64 0.9+20170913-1build1 [38.8 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libopusfile-dev amd64 0.9+20170913-1build1 [57.7 kB]
Fetched 293 kB in 1s (317 kB/s)
Selecting previously unselected package libopus-dev:amd64.
(Reading database ... 155222 files and directories currently installed.)
Preparing to unpack .../libopus-dev_1.1.2-1ubuntu1_amd64.deb ...
Unpacking libopus-dev:amd64 (1.1.2-1ubuntu1) ...
Selecting previously unselected package libopus

Run below cells to check GPU availability.

In [1]:
import tensorflow as tf
tf.test.is_gpu_available()

True

In [4]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

get_available_gpus()

['/device:GPU:0']

In case of GPU didn't initialized.

In [3]:
!pip install tensorflow-gpu==1.15

Collecting tensorflow-gpu==1.15
  Downloading tensorflow_gpu-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl (411.5 MB)
     |████████████████████████████████| 411.5 MB 7.4 kB/s            
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-1.15.0


In [5]:
tf.version.VERSION

'1.15.0'

### 1. Loading Data

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
import os
import zipfile
import pandas as pd
import librosa
from coqui_stt_training.util.downloader import maybe_download

#### 1.1 Loading data from source
**skip this if transforemed audio data (.wav files, mono, 16khz) is already loaded.**

In [None]:
def download_performatted_data():
  if not os.path.exists('sinhala/sinhala'):
    maybe_download("sinhala.zip", "sinhala/", "https://www.openslr.org/resources/52/asr_sinhala_0.zip")
    print("\Extracting Data...")
    with zipfile.ZipFile('sinhala/sinhala.zip', 'r') as zip_ref:
      zip_ref.extractall('sinhala/')
    print('n\Finished extrcting data')
  else:
    print('Found data - not extracting')


download_performatted_data()

In [None]:
!ls sinhala/asr_sinhala/
!wc -l sinhala/asr_sinhala/*.tsv
!mkdir exported-model
!mkdir checkpoints

#### 1.2 Preprocessing Audio files

**skip this if transforemed audio data (.wav files, mono, 16khz) is already loaded.**

In [None]:
!pip install pydub

In [None]:
from pathlib import PurePath
from pydub import AudioSegment

path = '/content/sinhala/asr_sinhala/data'

def convert_audio(path):
  for root, directories, files in os.walk(path, topdown=True):
      #for name in files:
      paths = [os.path.join(root, f) for f in files]
      for i in paths:
        file_path = PurePath(i)
        #flac_tmp_audio_data = AudioSegment.from_file(file_path, file_path.suffix[1:])
        #audio_name = "{0}-{1}.wav".format(prefix, audio.name.split('.')[0])
        flac_tmp_audio_data = AudioSegment.from_file(file_path, file_path.suffix[1:]).set_channels(1)
        #export("{0}/{1}".format("./wav", audio_name), format="wav")
        flac_tmp_audio_data.export(file_path.name.replace(file_path.suffix, "") + ".wav", format="wav")

convert_audio(path)

### 2. Loading pretrained model

Loading pretrained tamil model for transfer learning

In [9]:
def download_pretrained_model():
    model_dir="tamil/"
    if not os.path.exists("tamil/coqui-yesno-checkpoints"):
        #maybe_download("model.tar.gz", model_dir, "/content/drive/MyDrive/pretrained/Tamil STT v0.1.0 (ITML).zip")
        print('\nNo extracted pre-trained model found. Extracting now...')
        #tar = tarfile.open("tamil/model.tar.gz")
        #tar.extractall("tamil/")
        #tar.close()
        with zipfile.ZipFile('/content/drive/MyDrive/pretrained/Tamil STT v0.1.0 (ITML).zip', 'r') as zip_ref:
          zip_ref.extractall('tamil/')
        print('n\Finished extrcting data')
    else:
        print('Found "tamil/coqui-yesno-checkpoints" - not extracting.')

# Download + extract pre-trained English model
download_pretrained_model()


No extracted pre-trained model found. Extracting now...
n\Finished extrcting data


### 3. Transforming tabular data into required formats

In [None]:
def file_meta(path):
    file_sizes =[]
    file_names = []
    durations = []
    
    global df

    for root, directories, files in os.walk(path, topdown=True):
        for name in files:
            file_names.append(os.path.splitext(name)[0])

        paths = [os.path.join(root, f) for f in files]
        for i in paths:
            file_size = os.path.getsize(i)
            file_sizes.append(file_size)
            
            d = librosa.get_duration(filename=i)
            durations.append(d)
         
        df = pd.DataFrame({'wav_filename': file_names,
                   'wav_filesize': file_sizes,
                   'durations': durations})

    return df

path = 'asr_sinhala_0/asr_sinhala/data'

file_meta(path)

df.head()

loading transcript data

In [None]:
df2 = pd.read_csv('asr_sinhala_0/asr_sinhala/utt_spk_text.tsv', names=['wav_filename', 'id', 'transcript'], delimiter='\t')

df2.head()

merging transcript data and meta data

In [None]:
df_main = df.merge(df2, on='wav_filename', how='left')

df_main.head()

In [None]:
df_main.shape

In [None]:
df_final = df_main[['wav_filename',	'wav_filesize',	'transcript', 'durations']]

df_final = df_final.dropna(axis=0)

df_final['transcript'] = df_final['transcript'].astype(str)

df_final['len_trasn'] = df_final['transcript'].str.len()

df_final.head()

In [None]:
df_final['transcript'] = df_final['transcript'].replace(r'\n',' ', regex=True)
df_final['wav_filename'] = df_final['wav_filename'].astype(str) + '.wav'

#removing outliers based on previous EDA
df_final = df_final[df_final['wav_filesize'] < 200000]
df_final = df_final[df_final['len_trasn'] < 5000]
df_final = df_final[df_final['durations'] < 15]

df_final.shape

In [None]:
df_final = df_final[['wav_filename', 'wav_filesize', 'transcript']]

df_final.head()

In [None]:
df_final3.to_csv('samples.csv')

In [29]:
#if loading with locally available data
data = pd.read_csv('/content/drive/MyDrive/Data/new_preprocessed.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,wav_filename,wav_filesize,transcript
0,0,015f827f2f.wav,82655,බුරුල් කොට අතහරින්ට සැරසෙත්ම
1,1,01dc481e00.wav,64248,ජාතික දේශපාලනයේ දී
2,2,01a9f2b678.wav,49246,කියවලා බලන්නකෝ.
3,3,015fe72ff7.wav,121655,ආදරය කරපු වසර අටක කාලයේ අතරතුර අපි දුරස් වුණා.
4,4,016fb6f0e8.wav,93070,වෙනත් බොලිවුඩ් කතා වලට බෑ.


Spliting data for train/dev/test sets

In [30]:
train = data.iloc[:7500, :]
dev = data.iloc[7500:9500, :]
test = data.iloc[9500:10000, :]

train.to_csv('/content/drive/MyDrive/Data/train6.csv')
dev.to_csv('/content/drive/MyDrive/Data/dev6.csv')
test.to_csv('/content/drive/MyDrive/Data/test6.csv')

### 4. Training Model

In [31]:
from coqui_stt_training.util.config import initialize_globals_from_args

In [32]:
initialize_globals_from_args(
    train_files=['/content/drive/MyDrive/Data/train6.csv'],
    dev_files=['/content/drive/MyDrive/Data/dev6.csv'],
    test_files=['/content/drive/MyDrive/Data/test6.csv'],
    alphabet_config_path='/content/drive/MyDrive/Data/alphabet.txt',
    checkpoint_dir='/content/checkpoints',
    load_checkpoint_dir="/content/tamil/Tamil STT v0.1.0 (ITML)",
    save_checkpoint_dir="content/checkpoints",
    load_train='init',
    n_hidden=512,
    epochs=100,
    beam_width=1,
    export_dir='/content/exported-model',
    early_stop=True,
    es_epochs=20,
    load_cudnn=True,
    export_tflite=False,
    #train_cudnn=True,
    reduce_lr_on_plateau=True,
    train_batch_size=16, #16 batch_size per gpu
    dev_batch_size=16,
    test_batch_size=16,
)

In [33]:
from coqui_stt_training.util.config import Config

print(Config.to_json())

{
    "train_files": [
        "/content/drive/MyDrive/Data/train6.csv"
    ],
    "dev_files": [
        "/content/drive/MyDrive/Data/dev6.csv"
    ],
    "test_files": [
        "/content/drive/MyDrive/Data/test6.csv"
    ],
    "metrics_files": [],
    "auto_input_dataset": "",
    "vocab_file": "",
    "read_buffer": 1048576,
    "feature_cache": "",
    "cache_for_epochs": 0,
    "shuffle_batches": false,
    "shuffle_start": 1,
    "shuffle_buffer": 1000,
    "feature_win_len": 32,
    "feature_win_step": 20,
    "audio_sample_rate": 16000,
    "normalize_sample_rate": true,
    "augment": null,
    "epochs": 100,
    "dropout_rate": 0.05,
    "dropout_rate2": 0.05,
    "dropout_rate3": 0.05,
    "dropout_rate4": 0.0,
    "dropout_rate5": 0.0,
    "dropout_rate6": 0.05,
    "relu_clip": 20.0,
    "beta1": 0.9,
    "beta2": 0.999,
    "epsilon": 1e-08,
    "learning_rate": 0.001,
    "train_batch_size": 16,
    "dev_batch_size": 16,
    "test_batch_size": 16,
    "export_batch_siz

In [None]:
from coqui_stt_training.train import train

train()

I Initializing all variables.
I STARTING Optimization
Epoch 0 |   Training | Elapsed Time: 0:02:07 | Steps: 468 | Loss: 91.885155    
Epoch 0 | Validation | Elapsed Time: 0:00:14 | Steps: 125 | Loss: 92.638279 | Dataset: /content/drive/MyDrive/Data/dev6.csv
I Saved new best validating model with loss 92.638279 to: /content/checkpoints/best_dev-468
--------------------------------------------------------------------------------
Epoch 1 |   Training | Elapsed Time: 0:02:06 | Steps: 468 | Loss: 73.799614    
Epoch 1 | Validation | Elapsed Time: 0:00:14 | Steps: 125 | Loss: 69.182551 | Dataset: /content/drive/MyDrive/Data/dev6.csv
I Saved new best validating model with loss 69.182551 to: /content/checkpoints/best_dev-936
--------------------------------------------------------------------------------
Epoch 2 |   Training | Elapsed Time: 0:02:06 | Steps: 468 | Loss: 56.409645    
Epoch 2 | Validation | Elapsed Time: 0:00:14 | Steps: 125 | Loss: 53.986411 | Dataset: /content/drive/MyDrive/Da

In [25]:
from coqui_stt_training.evaluate import test

test()

I Loading best validating checkpoint from /content/checkpoints/best_dev-5681
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
I Loading variable from checkpoint: global_step
I Loading variable from checkpoint: layer_1/bias
I Loading variable from checkpoint: layer_1/weights
I Loading variable from checkpoint: layer_2/bias
I Loading variable from checkpoint: layer_2/weights
I Loading variable from checkpoint: layer_3/bias
I Loading variable from checkpoint: layer_3/weights
I Loading variable from checkpoint: layer_5/bias
I Loading variable from checkpoint: layer_5/weights
I Loading variable from checkpoint: layer_6/bias
I Loading variable from checkpoint: layer_6/weights
Testing model on /content/drive/MyDrive/Data/test4.csv
Test epoch | Steps: 63 | Elapsed Time: 0:51:18                                 
Test on /content/drive/

In [26]:
from coqui_stt_training.export import export

export()

I Exporting the model...
I Loading best validating checkpoint from /content/checkpoints/best_dev-5681
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
I Loading variable from checkpoint: layer_1/bias
I Loading variable from checkpoint: layer_1/weights
I Loading variable from checkpoint: layer_2/bias
I Loading variable from checkpoint: layer_2/weights
I Loading variable from checkpoint: layer_3/bias
I Loading variable from checkpoint: layer_3/weights
I Loading variable from checkpoint: layer_5/bias
I Loading variable from checkpoint: layer_5/weights
I Loading variable from checkpoint: layer_6/bias
I Loading variable from checkpoint: layer_6/weights
I Models exported at /content/exported-model
I Model metadata file saved to /content/exported-model/author_model_0.0.1.md. Before submitting the exported model for publishing make s

saving checkpoints and models

In [27]:
from google.colab import drive
drive.mount('/content/drive')

#%cp -av /content/checkpoints /content/drive/MyDrive/Model/checkpoints
%cp -av /content/exported-model /content/drive/MyDrive/Model/export

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
'/content/exported-model' -> '/content/drive/MyDrive/Model/export/exported-model'
'/content/exported-model/output_graph.pb' -> '/content/drive/MyDrive/Model/export/exported-model/output_graph.pb'
'/content/exported-model/author_model_0.0.1.md' -> '/content/drive/MyDrive/Model/export/exported-model/author_model_0.0.1.md'


In [28]:
%cp -av /content/checkpoints /content/drive/MyDrive

'/content/checkpoints' -> '/content/drive/MyDrive/checkpoints'
'/content/checkpoints/best_dev-5000.meta' -> '/content/drive/MyDrive/checkpoints/best_dev-5000.meta'
'/content/checkpoints/flags.txt' -> '/content/drive/MyDrive/checkpoints/flags.txt'
'/content/checkpoints/alphabet.txt' -> '/content/drive/MyDrive/checkpoints/alphabet.txt'
'/content/checkpoints/train-8500.meta' -> '/content/drive/MyDrive/checkpoints/train-8500.meta'
'/content/checkpoints/train-9500.meta' -> '/content/drive/MyDrive/checkpoints/train-9500.meta'
'/content/checkpoints/train-10000.meta' -> '/content/drive/MyDrive/checkpoints/train-10000.meta'
'/content/checkpoints/train-8000.meta' -> '/content/drive/MyDrive/checkpoints/train-8000.meta'
'/content/checkpoints/train-9000.meta' -> '/content/drive/MyDrive/checkpoints/train-9000.meta'
'/content/checkpoints/train-10000.data-00000-of-00001' -> '/content/drive/MyDrive/checkpoints/train-10000.data-00000-of-00001'
'/content/checkpoints/train-8000.data-00000-of-00001' -> '/c