Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions examples/conformer/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@ decoder_config:
beam_width: 5
norm_score: True
corpus_files:
- /media/nlhuy/Data/ML/ASR/Raw/LibriSpeech/LibriSpeech/train-clean-100/transcripts.tsv
- /media/nlhuy/Data/ML/ASR/Raw/LibriSpeech/LibriSpeech/train-clean-360/transcripts.tsv
- /media/nlhuy/Data/ML/ASR/Raw/LibriSpeech/LibriSpeech/train-other-500/transcripts.tsv
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/train-clean-100/transcripts.tsv

model_config:
name: conformer
Expand Down Expand Up @@ -77,32 +75,35 @@ learning_config:
mask_factor: 27
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/train-clean-100/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: True
cache: True
buffer_size: 100
drop_remainder: True
stage: train

eval_dataset_config:
use_tf: True
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-clean/transcripts.tsv
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: False
cache: True
buffer_size: 100
drop_remainder: True
stage: eval

test_dataset_config:
use_tf: True
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: False
cache: True
buffer_size: 100
drop_remainder: True
stage: test

optimizer_config:
warmup_steps: 40000
Expand Down
128 changes: 128 additions & 0 deletions examples/conformer/train_tpu_keras_subword_conformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright 2020 Huy Le Nguyen (@usimarit)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import math
import argparse
from tensorflow_asr.utils import setup_environment, setup_tpu

setup_environment()
import tensorflow as tf

DEFAULT_YAML = os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.yml")

tf.keras.backend.clear_session()

parser = argparse.ArgumentParser(prog="Conformer Training")

parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file")

parser.add_argument("--sentence_piece", default=False, action="store_true", help="Whether to use `SentencePiece` model")

parser.add_argument("--bs", type=int, default=None, help="Batch size per replica")

parser.add_argument("--tpu_address", type=str, default=None, help="TPU address. Leave None on Colab")

parser.add_argument("--max_lengths_prefix", type=str, default=None, help="Path to file containing max lengths")

parser.add_argument("--compute_lengths", default=False, action="store_true", help="Whether to compute lengths")

parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision")

parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords")

parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], help="Transcript files for generating subwords")

args = parser.parse_args()

tf.config.optimizer.set_experimental_options({"auto_mixed_precision": args.mxp})

strategy = setup_tpu(args.tpu_address)

from tensorflow_asr.configs.config import Config
from tensorflow_asr.datasets.keras import ASRTFRecordDatasetKeras
from tensorflow_asr.featurizers.speech_featurizers import TFSpeechFeaturizer
from tensorflow_asr.featurizers.text_featurizers import SubwordFeaturizer, SentencePieceFeaturizer
from tensorflow_asr.models.keras.conformer import Conformer
from tensorflow_asr.optimizers.schedules import TransformerSchedule

config = Config(args.config)
speech_featurizer = TFSpeechFeaturizer(config.speech_config)

if args.sentence_piece:
print("Loading SentencePiece model ...")
text_featurizer = SentencePieceFeaturizer.load_from_file(config.decoder_config, args.subwords)
elif args.subwords and os.path.exists(args.subwords):
print("Loading subwords ...")
text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords)
else:
print("Generating subwords ...")
text_featurizer = SubwordFeaturizer.build_from_corpus(
config.decoder_config,
corpus_files=args.subwords_corpus
)
text_featurizer.save_to_file(args.subwords)

train_dataset = ASRTFRecordDatasetKeras(
speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,
**vars(config.learning_config.train_dataset_config)
)
eval_dataset = ASRTFRecordDatasetKeras(
speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,
**vars(config.learning_config.eval_dataset_config)
)

if args.compute_lengths:
train_dataset.update_lengths(args.max_lengths_prefix)
eval_dataset.update_lengths(args.max_lengths_prefix)

# Update max lengths calculated from both train and eval datasets
train_dataset.load_max_lengths(args.max_lengths_prefix)
eval_dataset.load_max_lengths(args.max_lengths_prefix)

with strategy.scope():
batch_size = args.bs if args.bs is not None else config.learning_config.running_config.batch_size
global_batch_size = batch_size
global_batch_size *= strategy.num_replicas_in_sync
# build model
conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
conformer._build(speech_featurizer.shape, prediction_shape=text_featurizer.prepand_shape, batch_size=global_batch_size)
conformer.summary(line_length=120)

optimizer = tf.keras.optimizers.Adam(
TransformerSchedule(
d_model=conformer.dmodel,
warmup_steps=config.learning_config.optimizer_config["warmup_steps"],
max_lr=(0.05 / math.sqrt(conformer.dmodel))
),
beta_1=config.learning_config.optimizer_config["beta1"],
beta_2=config.learning_config.optimizer_config["beta2"],
epsilon=config.learning_config.optimizer_config["epsilon"]
)

conformer.compile(optimizer=optimizer, global_batch_size=global_batch_size, blank=text_featurizer.blank)

train_data_loader = train_dataset.create(global_batch_size)
eval_data_loader = eval_dataset.create(global_batch_size)

callbacks = [
tf.keras.callbacks.ModelCheckpoint(**config.learning_config.running_config.checkpoint),
tf.keras.callbacks.experimental.BackupAndRestore(config.learning_config.running_config.states_dir),
tf.keras.callbacks.TensorBoard(**config.learning_config.running_config.tensorboard)
]

conformer.fit(
train_data_loader, epochs=config.learning_config.running_config.num_epochs,
validation_data=eval_data_loader, callbacks=callbacks,
)
123 changes: 123 additions & 0 deletions examples/conformer/train_tpu_subword_conformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright 2021 M. Yusuf Sarıgöz (@monatis) and Huy Le Nguyen (@usimarit)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import math
import argparse
from tensorflow_asr.utils import setup_environment, setup_tpu

setup_environment()
import tensorflow as tf

DEFAULT_YAML = os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.yml")

tf.keras.backend.clear_session()

parser = argparse.ArgumentParser(prog="Conformer Training")

parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file")

parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep")

parser.add_argument("--sentence_piece", default=False, action="store_true", help="Whether to use `SentencePiece` model")

parser.add_argument("--bs", type=int, default=None, help="Common training and evaluation batch size per TPU core")

parser.add_argument("--tpu_address", type=str, default=None, help="TPU address. Leave None on Colab")

parser.add_argument("--max_lengths_prefix", type=str, default=None, help="Path to file containing max lengths")

parser.add_argument("--compute_lengths", default=False, action="store_true", help="Whether to compute lengths")

parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision")

parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords")

parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], help="Transcript files for generating subwords")

args = parser.parse_args()

tf.config.optimizer.set_experimental_options({"auto_mixed_precision": args.mxp})

from tensorflow_asr.configs.config import Config
from tensorflow_asr.datasets.asr_dataset import ASRTFRecordDataset
from tensorflow_asr.featurizers.speech_featurizers import TFSpeechFeaturizer
from tensorflow_asr.featurizers.text_featurizers import SubwordFeaturizer, SentencePieceFeaturizer
from tensorflow_asr.runners.transducer_runners import TransducerTrainer
from tensorflow_asr.models.conformer import Conformer
from tensorflow_asr.optimizers.schedules import TransformerSchedule

config = Config(args.config, learning=True)
speech_featurizer = TFSpeechFeaturizer(config.speech_config)

if args.sentence_piece:
print("Loading SentencePiece model ...")
text_featurizer = SentencePieceFeaturizer.load_from_file(config.decoder_config, args.subwords)
elif args.subwords and os.path.exists(args.subwords):
print("Loading subwords ...")
text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords)
else:
print("Generating subwords ...")
text_featurizer = SubwordFeaturizer.build_from_corpus(
config.decoder_config,
corpus_files=args.subwords_corpus
)
text_featurizer.save_to_file(args.subwords)

train_dataset = ASRTFRecordDataset(
speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,
**vars(config.learning_config.train_dataset_config)
)

eval_dataset = ASRTFRecordDataset(
speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,
**vars(config.learning_config.eval_dataset_config)
)

if args.compute_lengths:
train_dataset.update_lengths(args.max_lengths_prefix)
eval_dataset.update_lengths(args.max_lengths_prefix)

# Update max lengths calculated from both train and eval datasets
train_dataset.load_max_lengths(args.max_lengths_prefix)
eval_dataset.load_max_lengths(args.max_lengths_prefix)

strategy = setup_tpu(args.tpu_address)

conformer_trainer = TransducerTrainer(
config=config.learning_config.running_config,
text_featurizer=text_featurizer, strategy=strategy
)

with conformer_trainer.strategy.scope():
# build model
conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
conformer._build(speech_featurizer.shape, prediction_shape=text_featurizer.prepand_shape,
batch_size=args.bs if args.bs is not None else config.learning_config.running_config.batch_size)
conformer.summary(line_length=120)

optimizer = tf.keras.optimizers.Adam(
TransformerSchedule(
d_model=conformer.dmodel,
warmup_steps=config.learning_config.optimizer_config["warmup_steps"],
max_lr=(0.05 / math.sqrt(conformer.dmodel))
),
beta_1=config.learning_config.optimizer_config["beta1"],
beta_2=config.learning_config.optimizer_config["beta2"],
epsilon=config.learning_config.optimizer_config["epsilon"]
)

conformer_trainer.compile(model=conformer, optimizer=optimizer, max_to_keep=args.max_ckpts)

conformer_trainer.fit(train_dataset, eval_dataset, train_bs=args.bs, eval_bs=args.bs)
9 changes: 6 additions & 3 deletions examples/contextnet/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,32 +208,35 @@ learning_config:
mask_factor: 27
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/train-clean-100/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: True
cache: True
buffer_size: 100
drop_remainder: True
stage: train

eval_dataset_config:
use_tf: True
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-clean/transcripts.tsv
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: False
cache: True
buffer_size: 100
drop_remainder: True
stage: eval

test_dataset_config:
use_tf: True
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: False
cache: True
buffer_size: 100
drop_remainder: True
stage: test

optimizer_config:
warmup_steps: 40000
Expand Down
9 changes: 6 additions & 3 deletions examples/deepspeech2/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,32 +53,35 @@ learning_config:
use_tf: True
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/train-clean-100/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: True
cache: True
buffer_size: 100
drop_remainder: True
stage: train

eval_dataset_config:
use_tf: True
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-clean/transcripts.tsv
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: False
cache: True
buffer_size: 100
drop_remainder: True
stage: eval

test_dataset_config:
use_tf: True
data_paths:
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords-test
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
shuffle: False
cache: True
buffer_size: 100
drop_remainder: True
stage: test

optimizer_config:
class_name: adam
Expand Down
Loading