In [1]:
import sys
sys.path.append('/mydata/hassan/TensorFlowASR')

In [2]:
config = {
    "speech_config": {
        "sample_rate": 16000,
        "frame_ms": 25,
        "stride_ms": 10,
        "num_feature_bins": 80,
        "feature_type": "log_mel_spectrogram",
        "preemphasis": 0.97,
        "normalize_signal": True,
        "normalize_feature": True,
        "normalize_per_frame": False,
    },
    "decoder_config": {
        "vocabulary": None,
        "target_vocab_size": 1000,
        "max_subword_length": 10,
        "blank_at_zero": True,
        "beam_width": 0,
        "norm_score": True,
        "corpus_files": None,
    },
    "model_config": {
        "name": "conformer",
        "encoder_subsampling": {
            "type": "conv2d",
            "filters": 144,
            "kernel_size": 3,
            "strides": 2,
        },
        "encoder_positional_encoding": "sinusoid_concat",
        "encoder_dmodel": 144,
        "encoder_num_blocks": 16,
        "encoder_head_size": 36,
        "encoder_num_heads": 4,
        "encoder_mha_type": "relmha",
        "encoder_kernel_size": 32,
        "encoder_fc_factor": 0.5,
        "encoder_dropout": 0.1,
        "prediction_embed_dim": 320,
        "prediction_embed_dropout": 0,
        "prediction_num_rnns": 1,
        "prediction_rnn_units": 320,
        "prediction_rnn_type": "lstm",
        "prediction_rnn_implementation": 2,
        "prediction_layer_norm": True,
        "prediction_projection_units": 0,
        "joint_dim": 320,
        "prejoint_linear": True,
        "joint_activation": "tanh",
        "joint_mode": "add",
    },
    "learning_config": {
        "train_dataset_config": {
            "use_tf": True,
            "augmentation_config": {
                "feature_augment": {
                    "time_masking": {
                        "num_masks": 10,
                        "mask_factor": 100,
                        "p_upperbound": 0.05,
                    },
                    "freq_masking": {"num_masks": 1, "mask_factor": 27},
                }
            },
            "data_paths": [
                "/mydata/hassan/data/LibriSpeech/train-clean-100/transcript.tsv"
            ],
            "tfrecords_dir": None,
            "shuffle": True,
            "cache": True,
            "buffer_size": 100,
            "drop_remainder": True,
            "stage": "train",
        },
        "eval_dataset_config": {
            "use_tf": True,
            "data_paths": None,
            "tfrecords_dir": None,
            "shuffle": False,
            "cache": True,
            "buffer_size": 100,
            "drop_remainder": True,
            "stage": "eval",
        },
        "test_dataset_config": {
            "use_tf": True,
            "data_paths": None,
            "tfrecords_dir": None,
            "shuffle": False,
            "cache": True,
            "buffer_size": 100,
            "drop_remainder": True,
            "stage": "test",
        },
        "optimizer_config": {
            "warmup_steps": 40000,
            "beta_1": 0.9,
            "beta_2": 0.98,
            "epsilon": 1e-09,
        },
        "running_config": {
            "batch_size": 4,
            "num_epochs": 15,
            "checkpoint": {
                "filepath": "/mydata/hassan/conformer2/checkpoints/{epoch:02d}.h5",
                "save_best_only": False,
                "save_weights_only": True,
                "save_freq": "epoch",
            },
            "states_dir": "/mydata/hassan/conformer2/states",
            "tensorboard": {
                "log_dir": "/mydata/hassan/conformer2/tensorboard",
                "histogram_freq": 1,
                "write_graph": True,
                "write_images": True,
                "update_freq": "epoch",
                "profile_batch": 2,
            },
        },
    },
}

In [3]:
metadata = {
    "train": {"max_input_length": 2974, "max_label_length": 194, "num_entries": 281241},
    "eval": {"max_input_length": 3516, "max_label_length": 186, "num_entries": 5567},
}

In [4]:
import os
import math
import argparse
from tensorflow_asr.utils import env_util

env_util.setup_environment()
import tensorflow as tf

tf.keras.backend.clear_session()
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
strategy = env_util.setup_strategy([0])

from tensorflow_asr.configs.config import Config
from tensorflow_asr.datasets import asr_dataset
from tensorflow_asr.featurizers import speech_featurizers, text_featurizers
from tensorflow_asr.models.transducer.conformer import Conformer
from tensorflow_asr.optimizers.schedules import TransformerSchedule

config = Config(config)
speech_featurizer = speech_featurizers.TFSpeechFeaturizer(config.speech_config)

text_featurizer = text_featurizers.CharFeaturizer(config.decoder_config)

train_dataset = asr_dataset.ASRSliceDataset(
    speech_featurizer=speech_featurizer,
    text_featurizer=text_featurizer,
    **vars(config.learning_config.train_dataset_config),
    indefinite=True
)
eval_dataset = asr_dataset.ASRSliceDataset(
    speech_featurizer=speech_featurizer,
    text_featurizer=text_featurizer,
    **vars(config.learning_config.eval_dataset_config),
    indefinite=True
)

train_dataset.load_metadata(metadata)
eval_dataset.load_metadata(metadata)
speech_featurizer.reset_length()
text_featurizer.reset_length()

global_batch_size = config.learning_config.running_config.batch_size
global_batch_size *= strategy.num_replicas_in_sync

train_data_loader = train_dataset.create(global_batch_size)
eval_data_loader = eval_dataset.create(global_batch_size)

with strategy.scope():
    # build model
    conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
    conformer.make(speech_featurizer.shape)
    conformer.summary(line_length=100)

    optimizer = tf.keras.optimizers.Adam(
        TransformerSchedule(
            d_model=conformer.dmodel,
            warmup_steps=config.learning_config.optimizer_config.pop("warmup_steps", 10000),
            max_lr=(0.05 / math.sqrt(conformer.dmodel))
        ),
        **config.learning_config.optimizer_config
    )

    conformer.compile(
        optimizer=optimizer,
        experimental_steps_per_execution=10,
        global_batch_size=global_batch_size,
        blank=text_featurizer.blank
    )

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(**config.learning_config.running_config.checkpoint),
    tf.keras.callbacks.experimental.BackupAndRestore(config.learning_config.running_config.states_dir),
    tf.keras.callbacks.TensorBoard(**config.learning_config.running_config.tensorboard)
]

conformer.fit(
    train_data_loader,
    epochs=config.learning_config.running_config.num_epochs,
    validation_data=eval_data_loader,
    callbacks=callbacks,
    steps_per_epoch=train_dataset.total_steps,
    validation_steps=eval_dataset.total_steps
)

2023-05-17 10:23:05.841327: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-17 10:23:05.841352: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


2023-05-17 10:23:07.099610: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-05-17 10:23:07.099636: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2023-05-17 10:23:07.099669: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (node0.tempnode.dsdm-pg0.clemson.cloudlab.us): /proc/driver/nvidia/version does not exist
2023-05-17 10:23:07.099930: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


INFO:tensorflow:Use RNNT loss in TensorFlow
{' ': 1, 'a': 2, 'b': 3, 'c': 4, 'd': 5, 'e': 6, 'f': 7, 'g': 8, 'h': 9, 'i': 10, 'j': 11, 'k': 12, 'l': 13, 'm': 14, 'n': 15, 'o': 16, 'p': 17, 'q': 18, 'r': 19, 's': 20, 't': 21, 'u': 22, 'v': 23, 'w': 24, 'x': 25, 'y': 26, 'z': 27, "'": 28}
INFO:tensorflow:Reading /mydata/hassan/data/LibriSpeech/train-clean-100/transcript.tsv ...
{' ': 1, 'a': 2, 'b': 3, 'c': 4, 'd': 5, 'e': 6, 'f': 7, 'g': 8, 'h': 9, 'i': 10, 'j': 11, 'k': 12, 'l': 13, 'm': 14, 'n': 15, 'o': 16, 'p': 17, 'q': 18, 'r': 19, 's': 20, 't': 21, 'u': 22, 'v': 23, 'w': 24, 'x': 25, 'y': 26, 'z': 27, "'": 28} ['c', 'h', 'a', 'r', 'l', 'e', 's', ' ', 'g', 'a', 'v', 'e', ' ', 't', 'h', 'e', 'm', ' ', 'a', ' ', 'g', 'r', 'a', 'c', 'i', 'o', 'u', 's', ' ', 'a', 'n', 'd', ' ', 'a', ' ', 'c', 'o', 'm', 'p', 'l', 'i', 'a', 'n', 't', ' ', 'a', 'n', 's', 'w', 'e', 'r', ' ', 't', 'o', ' ', 'a', 'l', 'l', ' ', 't', 'h', 'e', 'i', 'r', ' ', 'r', 'e', 'm', 'o', 'n', 's', 't', 'r', 'a', 'n', '

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



{' ': 1, 'a': 2, 'b': 3, 'c': 4, 'd': 5, 'e': 6, 'f': 7, 'g': 8, 'h': 9, 'i': 10, 'j': 11, 'k': 12, 'l': 13, 'm': 14, 'n': 15, 'o': 16, 'p': 17, 'q': 18, 'r': 19, 's': 20, 't': 21, 'u': 22, 'v': 23, 'w': 24, 'x': 25, 'y': 26, 'z': 27, "'": 28} ['m', 'o', 's', 't', ' ', 'o', 'f', ' ', 't', 'h', 'e', 'm', ' ', 'i', 'n', ' ', 't', 'h', 'e', ' ', 'u', 'n', 'i', 't', 'e', 'd', ' ', 's', 't', 'a', 't', 'e', 's', ' ', 'i', 'n', 'd', 'i', 'a', 'n', ' ', 's', 'e', 'r', 'v', 'i', 'c', 'e', ' ', 'i', 't', ' ', 'i', 's', ' ', 't', 'h', 'e', ' ', 'e', 'x', 'p', 'r', 'e', 's', 's', ' ', 'p', 'o', 'l', 'i', 'c', 'y', ' ', 'o', 'f', ' ', 't', 'h', 'e', ' ', 'g', 'o', 'v', 'e', 'r', 'n', 'm', 'e', 'n', 't', ' ', 't', 'o', ' ', 'u', 's', 'e', ' ', 't', 'h', 'e', ' ', 'e', 'd', 'u', 'c', 'a', 't', 'e', 'd', ' ', 'i', 'n', 'd', 'i', 'a', 'n', 's', ' ', 'w', 'h', 'e', 'n', 'e', 'v', 'e', 'r', ' ', 'p', 'o', 's', 's', 'i', 'b', 'l', 'e', ' ', 'i', 'n', ' ', 'p', 'r', 'o', 'm', 'o', 't', 'i', 'n', 'g', ' ', 

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Tensor("ones:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_3:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_6:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_9:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_12:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_15:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_18:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_21:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_24:0", shape=(None, 80, 1), dtype=float32)
Tensor("ones_27:0", shape=(None, 80, 1), dtype=float32)
Model: "conformer_encoder"
____________________________________________________________________________________________________
 Layer (type)                                Output Shape                            Param #        
 conformer_encoder_subsampling (Conv2dSubsam  multiple                               188208         
 pling)                                                                                             
               

2023-05-17 10:23:20.560138: I tensorflow/core/profiler/lib/profiler_session.cc:110] Profiler session initializing.
2023-05-17 10:23:20.560177: I tensorflow/core/profiler/lib/profiler_session.cc:125] Profiler session started.
2023-05-17 10:23:20.560439: I tensorflow/core/profiler/lib/profiler_session.cc:143] Profiler session tear down.
2023-05-17 10:23:20.602304: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 28539
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
   

Epoch 1/15


2023-05-17 10:23:20.786055: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.
2023-05-17 10:23:20.816733: W tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:2218] No (suitable) GPUs detected, skipping auto_mixed_precision graph optimizer
2023-05-17 10:23:20.830687: W tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:2218] No (suitable) GPUs detected, skipping auto_mixed_precision graph optimizer
2023-05-17 10:23:20.832437: W tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:2218] No (suitable) GPUs detected, skipping auto_mixed_precision graph optimizer
2023-05-17 10:24:22.221142: W tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:2218] No (suitable) GPUs detected, skipping auto_mixed_precision graph optimizer
2023-05-17 10:24:33.085909: W tensorflow/core/grappler/optimizers/auto_mixed_precis

  10/7134 [..............................] - ETA: 18:03:08 - loss: 1274.9646

2023-05-17 10:24:52.070612: I tensorflow/core/profiler/lib/profiler_session.cc:110] Profiler session initializing.
2023-05-17 10:24:52.070657: I tensorflow/core/profiler/lib/profiler_session.cc:125] Profiler session started.
2023-05-17 10:25:08.336578: I tensorflow/core/profiler/lib/profiler_session.cc:67] Profiler session collecting data.
2023-05-17 10:25:09.937041: I tensorflow/core/profiler/lib/profiler_session.cc:143] Profiler session tear down.


 210/7134 [..............................] - ETA: 3:31:39 - loss: 1055.1592

KeyboardInterrupt: 