In [1]:
!pip install -U pip
!pip install coqui_stt_training
!apt-get install libopusfile0 libopus-dev libopusfile-dev

Collecting pip
  Downloading pip-21.3.1-py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 4.1 MB/s 
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.3
    Uninstalling pip-21.1.3:
      Successfully uninstalled pip-21.1.3
Successfully installed pip-21.3.1
Collecting coqui_stt_training
  Downloading coqui_stt_training-1.1.0-py3-none-any.whl (81 kB)
     |████████████████████████████████| 81 kB 3.4 MB/s             
[?25hCollecting pyxdg
  Downloading pyxdg-0.27-py2.py3-none-any.whl (49 kB)
     |████████████████████████████████| 49 kB 5.9 MB/s             
[?25hCollecting coqui-stt-ctcdecoder==1.1.0
  Downloading coqui_stt_ctcdecoder-1.1.0-cp37-cp37m-manylinux_2_24_x86_64.whl (2.6 MB)
     |████████████████████████████████| 2.6 MB 24.1 MB/s            
Collecting pyogg>=0.6.14a1
  Downloading PyOgg-0.6.14a1.tar.gz (35 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sox
  Downloa

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  libopus-dev libopusfile-dev libopusfile0
0 upgraded, 3 newly installed, 0 to remove and 37 not upgraded.
Need to get 293 kB of archives.
After this operation, 1,055 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 libopus-dev amd64 1.1.2-1ubuntu1 [197 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libopusfile0 amd64 0.9+20170913-1build1 [38.8 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libopusfile-dev amd64 0.9+20170913-1build1 [57.7 kB]
Fetched 293 kB in 2s (150 kB/s)
Selecting previously unselected package libopus-dev:amd64.
(Reading database ... 155229 files and directories currently installed.)
Preparing to unpack .../libopus-dev_1.1.2-1ubuntu1_amd64.deb ...
Unpacking libopus-dev:amd64 (1.1.2-1ubuntu1) ...
Selecting previously unselected package libopus

Run below cells to check GPU availability.

In [1]:
import tensorflow as tf
tf.test.is_gpu_available()

True

In [2]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

get_available_gpus()

['/device:GPU:0']

In case of GPU didn't initialized.

In [3]:
!pip install tensorflow-gpu==1.15

Collecting tensorflow-gpu==1.15
  Downloading tensorflow_gpu-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl (411.5 MB)
     |████████████████████████████████| 411.5 MB 8.4 kB/s             
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-1.15.0


In [None]:
tf.version.VERSION

'1.15.4'

### 1. Loading Data

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import os
import zipfile
import pandas as pd
import librosa
from coqui_stt_training.util.downloader import maybe_download

### 2. Loading pretrained model

Loading pretrained tamil model for transfer learning

In [5]:
def download_pretrained_model():
    model_dir="tamil/"
    if not os.path.exists("tamil/coqui-yesno-checkpoints"):
        #maybe_download("model.tar.gz", model_dir, "/content/drive/MyDrive/pretrained/Tamil STT v0.1.0 (ITML).zip")
        print('\nNo extracted pre-trained model found. Extracting now...')
        #tar = tarfile.open("tamil/model.tar.gz")
        #tar.extractall("tamil/")
        #tar.close()
        with zipfile.ZipFile('/content/drive/MyDrive/pretrained/Tamil STT v0.1.0 (ITML).zip', 'r') as zip_ref:
          zip_ref.extractall('tamil/')
        print('n\Finished extrcting data')
    else:
        print('Found "tamil/coqui-yesno-checkpoints" - not extracting.')

# Download + extract pre-trained English model
download_pretrained_model()


No extracted pre-trained model found. Extracting now...
n\Finished extrcting data


### 3. Training Model

In [11]:
from coqui_stt_training.util.config import initialize_globals_from_args

In [12]:
initialize_globals_from_args(
    train_files=['/content/drive/MyDrive/Data3/train_main.csv'],
    dev_files=['/content/drive/MyDrive/Data3/dev_main2.csv'],
    test_files=['/content/drive/MyDrive/Data3/test_main.csv'],
    alphabet_config_path='/content/drive/MyDrive/alphabet.txt',
    checkpoint_dir='/content/drive/MyDrive/checkpoints',
    #checkpoint_dir='/content/drive/MyDrive/checkpoints',
    ###load_checkpoint_dir="/content/tamil/Tamil STT v0.1.0 (ITML)",
    #load_checkpoint_dir='/content/drive/MyDrive/Model/checkpoints/checkpoints11/checkpoints',
    load_checkpoint_dir='/content/drive/MyDrive/checkpoints',
    #save_checkpoint_dir="content/checkpoints",
    save_checkpoint_dir='/content/drive/MyDrive/checkpoints',
    load_train='best',
    n_hidden=512,
    epochs=100,
    beam_width=1,
    export_dir='/content/exported-model',
    early_stop=True,
    es_epochs=20,
    load_cudnn=True,
    export_tflite=True,
    #train_cudnn=True,
    reduce_lr_on_plateau=True,
    plateau_epochs=5,
    train_batch_size=128,
    dev_batch_size=128,
    test_batch_size=64,
    export_model_version='0.0.5'
)

In [13]:
from coqui_stt_training.util.config import Config

print(Config.to_json())

{
    "train_files": [
        "/content/drive/MyDrive/Data3/train_main.csv"
    ],
    "dev_files": [
        "/content/drive/MyDrive/Data3/dev_main2.csv"
    ],
    "test_files": [
        "/content/drive/MyDrive/Data3/test_main.csv"
    ],
    "metrics_files": [],
    "auto_input_dataset": "",
    "vocab_file": "",
    "read_buffer": 1048576,
    "feature_cache": "",
    "cache_for_epochs": 0,
    "shuffle_batches": false,
    "shuffle_start": 1,
    "shuffle_buffer": 1000,
    "feature_win_len": 32,
    "feature_win_step": 20,
    "audio_sample_rate": 16000,
    "normalize_sample_rate": true,
    "augment": null,
    "epochs": 100,
    "dropout_rate": 0.05,
    "dropout_rate2": 0.05,
    "dropout_rate3": 0.05,
    "dropout_rate4": 0.0,
    "dropout_rate5": 0.0,
    "dropout_rate6": 0.05,
    "relu_clip": 20.0,
    "beta1": 0.9,
    "beta2": 0.999,
    "epsilon": 1e-08,
    "learning_rate": 0.001,
    "train_batch_size": 128,
    "dev_batch_size": 128,
    "test_batch_size": 64,
   

In [None]:
from coqui_stt_training.train import train

train()

I Loading best validating checkpoint from /content/drive/MyDrive/checkpoints/best_dev-2618
I Loading variable from checkpoint: beta1_power
I Loading variable from checkpoint: beta2_power
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias/Adam
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias/Adam_1
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel/Adam
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel/Adam_1
I Loading variable from checkpoint: global_step
I Loading variable from checkpoint: layer_1/bias
I Loading variable from checkpoint: layer_1/bias/Adam
I 

In [None]:
from coqui_stt_training.evaluate import test

test()

I Loading best validating checkpoint from /content/drive/MyDrive/checkpoints/best_dev-2992
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
I Loading variable from checkpoint: global_step
I Loading variable from checkpoint: layer_1/bias
I Loading variable from checkpoint: layer_1/weights
I Loading variable from checkpoint: layer_2/bias
I Loading variable from checkpoint: layer_2/weights
I Loading variable from checkpoint: layer_3/bias
I Loading variable from checkpoint: layer_3/weights
I Loading variable from checkpoint: layer_5/bias
I Loading variable from checkpoint: layer_5/weights
I Loading variable from checkpoint: layer_6/bias
I Loading variable from checkpoint: layer_6/weights
Testing model on /content/drive/MyDrive/Data3/test_main.csv
Test epoch | Steps: 36 | Elapsed Time: 2:58:15                                 
Test

In [None]:
from coqui_stt_training.export import export

export()

I Exporting the model...
I Loading best validating checkpoint from /content/drive/MyDrive/checkpoints/best_dev-2244
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
I Loading variable from checkpoint: layer_1/bias
I Loading variable from checkpoint: layer_1/weights
I Loading variable from checkpoint: layer_2/bias
I Loading variable from checkpoint: layer_2/weights
I Loading variable from checkpoint: layer_3/bias
I Loading variable from checkpoint: layer_3/weights
I Loading variable from checkpoint: layer_5/bias
I Loading variable from checkpoint: layer_5/weights
I Loading variable from checkpoint: layer_6/bias
I Loading variable from checkpoint: layer_6/weights
I Models exported at /content/exported-model
I Model metadata file saved to /content/exported-model/author_model_0.0.4.md. Before submitting the exported model for pub

saving checkpoints and models

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#%cp -av /content/checkpoints /content/drive/MyDrive/Work/checkpoints
%cp -av /content/exported-model /content/drive/MyDrive/export

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
'/content/exported-model' -> '/content/drive/MyDrive/export'
'/content/exported-model/output_graph.tflite' -> '/content/drive/MyDrive/export/output_graph.tflite'
'/content/exported-model/author_model_0.0.4.md' -> '/content/drive/MyDrive/export/author_model_0.0.4.md'


In [None]:
%cp -av /content/checkpoints /content/drive/MyDrive

'/content/checkpoints' -> '/content/drive/MyDrive/checkpoints'
'/content/checkpoints/best_dev-5000.meta' -> '/content/drive/MyDrive/checkpoints/best_dev-5000.meta'
'/content/checkpoints/flags.txt' -> '/content/drive/MyDrive/checkpoints/flags.txt'
'/content/checkpoints/alphabet.txt' -> '/content/drive/MyDrive/checkpoints/alphabet.txt'
'/content/checkpoints/train-8500.meta' -> '/content/drive/MyDrive/checkpoints/train-8500.meta'
'/content/checkpoints/train-9500.meta' -> '/content/drive/MyDrive/checkpoints/train-9500.meta'
'/content/checkpoints/train-10000.meta' -> '/content/drive/MyDrive/checkpoints/train-10000.meta'
'/content/checkpoints/train-8000.meta' -> '/content/drive/MyDrive/checkpoints/train-8000.meta'
'/content/checkpoints/train-9000.meta' -> '/content/drive/MyDrive/checkpoints/train-9000.meta'
'/content/checkpoints/train-10000.data-00000-of-00001' -> '/content/drive/MyDrive/checkpoints/train-10000.data-00000-of-00001'
'/content/checkpoints/train-8000.data-00000-of-00001' -> '/c