# Testing Coqui STT APIs

## Sample Data acquisition

In [1]:
### Download sample data
import os
import pandas
from coqui_stt_training.util.downloader import maybe_download

def download_sample_data():
    data_dir="english/"
    # Download data + alphabet
    audio_file = maybe_download("LDC93S1.wav", data_dir, "https://raw.githubusercontent.com/coqui-ai/STT/main/data/smoke_test/LDC93S1.wav")
    transcript_file = maybe_download("LDC93S1.txt", data_dir, "https://raw.githubusercontent.com/coqui-ai/STT/main/data/smoke_test/LDC93S1.txt")
    alphabet = maybe_download("alphabet.txt", data_dir, "https://raw.githubusercontent.com/coqui-ai/STT/main/data/alphabet.txt")
    # Format data
    with open(transcript_file, "r") as fin:
        transcript = " ".join(fin.read().strip().lower().split(" ")[2:]).replace(".", "")
    df = pandas.DataFrame(data=[(os.path.abspath(audio_file), os.path.getsize(audio_file), transcript)],
                          columns=["wav_filename", "wav_filesize", "transcript"])
    # Save formatted CSV 
    df.to_csv(os.path.join(data_dir, "ldc93s1.csv"), index=False)

# Download and format data
download_sample_data()

Found archive "english/LDC93S1.wav" - not downloading.
Found archive "english/LDC93S1.txt" - not downloading.
Found archive "english/alphabet.txt" - not downloading.


In [2]:
csv_file = open("english/ldc93s1.csv", "r")
print(csv_file.read())

wav_filename,wav_filesize,transcript
/home/josh/Desktop/proj/mcv-stt-hackathon/english/LDC93S1.wav,93638,she had your dark suit in greasy wash water all year



In [3]:
alphabet_file = open("english/alphabet.txt", "r")
print(alphabet_file.read())

# Each line in this file represents the Unicode codepoint (UTF-8 encoded)
# associated with a numeric label.
# A line that starts with # is a comment. You can escape it with \# if you wish
# to use '#' as a label.
 
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
'
# The last (non-comment) line needs to end with a newline.



## Initialize Hyperparameters

In [5]:
from coqui_stt_training.util.config import initialize_globals_from_args

initialize_globals_from_args(
    alphabet_config_path="english/alphabet.txt",
    checkpoint_dir="ckpt_dir",
    train_files=["english/ldc93s1.csv"],
    dev_files=["english/ldc93s1.csv"],
    test_files=["english/ldc93s1.csv"],
    load_train="init",
    n_hidden=200,
    epochs=100,
)

In [6]:
# View all training configurations
from coqui_stt_training.util.config import Config

# Take a peek at the entire Config
print(Config.to_json())

{
    "train_files": [
        "english/ldc93s1.csv"
    ],
    "dev_files": [
        "english/ldc93s1.csv"
    ],
    "test_files": [
        "english/ldc93s1.csv"
    ],
    "metrics_files": [],
    "auto_input_dataset": "",
    "vocab_file": "",
    "read_buffer": 1048576,
    "feature_cache": "",
    "cache_for_epochs": 0,
    "shuffle_batches": false,
    "shuffle_start": 1,
    "shuffle_buffer": 1000,
    "feature_win_len": 32,
    "feature_win_step": 20,
    "audio_sample_rate": 16000,
    "normalize_sample_rate": true,
    "augment": null,
    "epochs": 500,
    "dropout_rate": 0.05,
    "dropout_rate2": 0.05,
    "dropout_rate3": 0.05,
    "dropout_rate4": 0.0,
    "dropout_rate5": 0.0,
    "dropout_rate6": 0.05,
    "relu_clip": 20.0,
    "beta1": 0.9,
    "beta2": 0.999,
    "epsilon": 1e-08,
    "learning_rate": 0.001,
    "train_batch_size": 1,
    "dev_batch_size": 1,
    "test_batch_size": 1,
    "export_batch_size": 1,
    "skip_batch_test": false,
    "inter_op_parall

In [7]:
# Kick off training job; configures CUDA to only use one GPU
from coqui_stt_training.train import train

# use maximum one GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

train()

I Performing dummy training to check for memory problems.
I If the following process crashes, you likely have batch sizes that are too big for your available system memory (or GPU memory).
I Initializing all variables.
I STARTING Optimization
Epoch 0 |   Training | Elapsed Time: 0:00:00 | Steps: 0 | Loss: 0.000000
Epoch 0 |   Training | Elapsed Time: 0:00:01 | Steps: 1 | Loss: 318.357086
Epoch 0 |   Training | Elapsed Time: 0:00:01 | Steps: 1 | Loss: 318.357086
Epoch 0 | Validation | Elapsed Time: 0:00:00 | Steps: 0 | Loss: 0.000000 | Dataset: english/ldc93s1.csv
Epoch 0 | Validation | Elapsed Time: 0:00:00 | Steps: 1 | Loss: 192.964005 | Dataset: english/ldc93s1.csv
Epoch 0 | Validation | Elapsed Time: 0:00:00 | Steps: 1 | Loss: 192.964005 | Dataset: english/ldc93s1.csv
--------------------------------------------------------------------------------
I FINISHED optimization in 0:00:02.059964
I Dummy run finished without problems, now starting real training process.
I STARTING Optimizat

In [10]:
# Test run ;  Uses training data for experimentation purposes; Not recommended in production this is a cardinal sin
from coqui_stt_training.evaluate import test

test()

I Loading best validating checkpoint from ckpt_dir/best_dev-100
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
I Loading variable from checkpoint: global_step
I Loading variable from checkpoint: layer_1/bias
I Loading variable from checkpoint: layer_1/weights
I Loading variable from checkpoint: layer_2/bias
I Loading variable from checkpoint: layer_2/weights
I Loading variable from checkpoint: layer_3/bias
I Loading variable from checkpoint: layer_3/weights
I Loading variable from checkpoint: layer_5/bias
I Loading variable from checkpoint: layer_5/weights
I Loading variable from checkpoint: layer_6/bias
I Loading variable from checkpoint: layer_6/weights
Testing model on english/ldc93s1.csv
Test epoch | Steps: 0 | Elapsed Time: 0:00:00
Test epoch | Steps: 1 | Elapsed Time: 0:00:00
Test epoch | Steps: 1 | Elapsed Time: 0:00

# END