diff --git a/examples/conformer/train_conformer.py b/examples/conformer/train_conformer.py index 32e17a8974..5697130463 100644 --- a/examples/conformer/train_conformer.py +++ b/examples/conformer/train_conformer.py @@ -26,29 +26,25 @@ parser = argparse.ArgumentParser(prog="Conformer Training") -parser.add_argument("--config", type=str, default=DEFAULT_YAML, - help="The file path of model configuration file") +parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") -parser.add_argument("--max_ckpts", type=int, default=10, - help="Max number of checkpoints to keep") +parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") -parser.add_argument("--tfrecords", default=False, action="store_true", - help="Whether to use tfrecords") +parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords") -parser.add_argument("--tbs", type=int, default=None, - help="Train batch size per replica") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") -parser.add_argument("--ebs", type=int, default=None, - help="Evaluation batch size per replica") +parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") -parser.add_argument("--devices", type=int, nargs="*", default=[0], - help="Devices' ids to apply distributed training") +parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") -parser.add_argument("--mxp", default=False, action="store_true", - help="Enable mixed precision") +parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") -parser.add_argument("--cache", default=False, action="store_true", - help="Enable caching for dataset") +parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") + +parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset") + +parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling") args = parser.parse_args() @@ -75,14 +71,18 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + tfrecords_shards=args.tfrecords_shards, + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, + tfrecords_shards=args.tfrecords_shards, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) else: train_dataset = ASRSliceDataset( @@ -90,13 +90,15 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) conformer_trainer = TransducerTrainer( diff --git a/examples/conformer/train_ga_conformer.py b/examples/conformer/train_ga_conformer.py index 9e29ac29ee..d5be194c4d 100644 --- a/examples/conformer/train_ga_conformer.py +++ b/examples/conformer/train_ga_conformer.py @@ -26,32 +26,27 @@ parser = argparse.ArgumentParser(prog="Conformer Training") -parser.add_argument("--config", type=str, default=DEFAULT_YAML, - help="The file path of model configuration file") +parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") -parser.add_argument("--max_ckpts", type=int, default=10, - help="Max number of checkpoints to keep") +parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") -parser.add_argument("--tfrecords", default=False, action="store_true", - help="Whether to use tfrecords") +parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords") -parser.add_argument("--tbs", type=int, default=None, - help="Train batch size per replica") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") -parser.add_argument("--ebs", type=int, default=None, - help="Evaluation batch size per replica") +parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") -parser.add_argument("--acs", type=int, default=None, - help="Train accumulation steps") +parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") -parser.add_argument("--devices", type=int, nargs="*", default=[0], - help="Devices' ids to apply distributed training") +parser.add_argument("--acs", type=int, default=None, help="Train accumulation steps") -parser.add_argument("--mxp", default=False, action="store_true", - help="Enable mixed precision") +parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") -parser.add_argument("--cache", default=False, action="store_true", - help="Enable caching for dataset") +parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") + +parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset") + +parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling") args = parser.parse_args() @@ -78,14 +73,18 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + tfrecords_shards=args.tfrecords_shards, + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, + tfrecords_shards=args.tfrecords_shards, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) else: train_dataset = ASRSliceDataset( @@ -93,13 +92,15 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) conformer_trainer = TransducerTrainerGA( diff --git a/examples/conformer/train_ga_subword_conformer.py b/examples/conformer/train_ga_subword_conformer.py index 1896f8cede..609c27e6ad 100644 --- a/examples/conformer/train_ga_subword_conformer.py +++ b/examples/conformer/train_ga_subword_conformer.py @@ -26,41 +26,33 @@ parser = argparse.ArgumentParser(prog="Conformer Training") -parser.add_argument("--config", type=str, default=DEFAULT_YAML, - help="The file path of model configuration file") +parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") -parser.add_argument("--max_ckpts", type=int, default=10, - help="Max number of checkpoints to keep") +parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") -parser.add_argument("--tfrecords", default=False, action="store_true", - help="Whether to use tfrecords") +parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords") -parser.add_argument("--tbs", type=int, default=None, - help="Train batch size per replica") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") -parser.add_argument("--ebs", type=int, default=None, - help="Evaluation batch size per replica") +parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") -parser.add_argument("--acs", type=int, default=None, - help="Train accumulation steps") +parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") -parser.add_argument("--sentence_piece", default=False, action="store_true", - help="Whether to use `SentencePiece` model") +parser.add_argument("--acs", type=int, default=None, help="Train accumulation steps") -parser.add_argument("--devices", type=int, nargs="*", default=[0], - help="Devices' ids to apply distributed training") +parser.add_argument("--sentence_piece", default=False, action="store_true", help="Whether to use `SentencePiece` model") -parser.add_argument("--mxp", default=False, action="store_true", - help="Enable mixed precision") +parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") -parser.add_argument("--cache", default=False, action="store_true", - help="Enable caching for dataset") +parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") -parser.add_argument("--subwords", type=str, default=None, - help="Path to file that stores generated subwords") +parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset") -parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], - help="Transcript files for generating subwords") +parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") + +parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], help="Transcript files for generating subwords") + +parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling") args = parser.parse_args() @@ -100,14 +92,18 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + tfrecords_shards=args.tfrecords_shards, + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, + tfrecords_shards=args.tfrecords_shards, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) else: train_dataset = ASRSliceDataset( @@ -115,13 +111,15 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) conformer_trainer = TransducerTrainerGA( diff --git a/examples/conformer/train_subword_conformer.py b/examples/conformer/train_subword_conformer.py index 528e437dee..07abf8c8cd 100644 --- a/examples/conformer/train_subword_conformer.py +++ b/examples/conformer/train_subword_conformer.py @@ -26,38 +26,31 @@ parser = argparse.ArgumentParser(prog="Conformer Training") -parser.add_argument("--config", type=str, default=DEFAULT_YAML, - help="The file path of model configuration file") +parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") -parser.add_argument("--max_ckpts", type=int, default=10, - help="Max number of checkpoints to keep") +parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") -parser.add_argument("--tfrecords", default=False, action="store_true", - help="Whether to use tfrecords") +parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords") -parser.add_argument("--sentence_piece", default=False, action="store_true", - help="Whether to use `SentencePiece` model") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") -parser.add_argument("--tbs", type=int, default=None, - help="Train batch size per replica") +parser.add_argument("--sentence_piece", default=False, action="store_true", help="Whether to use `SentencePiece` model") -parser.add_argument("--ebs", type=int, default=None, - help="Evaluation batch size per replica") +parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") -parser.add_argument("--devices", type=int, nargs="*", default=[0], - help="Devices' ids to apply distributed training") +parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") -parser.add_argument("--mxp", default=False, action="store_true", - help="Enable mixed precision") +parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") -parser.add_argument("--cache", default=False, action="store_true", - help="Enable caching for dataset") +parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") -parser.add_argument("--subwords", type=str, default=None, - help="Path to file that stores generated subwords") +parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset") -parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], - help="Transcript files for generating subwords") +parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") + +parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], help="Transcript files for generating subwords") + +parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling") args = parser.parse_args() @@ -97,14 +90,18 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + tfrecords_shards=args.tfrecords_shards, + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, + tfrecords_shards=args.tfrecords_shards, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) else: train_dataset = ASRSliceDataset( @@ -112,13 +109,15 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) conformer_trainer = TransducerTrainer( diff --git a/examples/contextnet/train_contextnet.py b/examples/contextnet/train_contextnet.py index 6cc9be72b5..e52ab642b0 100644 --- a/examples/contextnet/train_contextnet.py +++ b/examples/contextnet/train_contextnet.py @@ -26,29 +26,25 @@ parser = argparse.ArgumentParser(prog="ContextNet Training") -parser.add_argument("--config", type=str, default=DEFAULT_YAML, - help="The file path of model configuration file") +parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") -parser.add_argument("--max_ckpts", type=int, default=10, - help="Max number of checkpoints to keep") +parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") -parser.add_argument("--tfrecords", default=False, action="store_true", - help="Whether to use tfrecords") +parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords") -parser.add_argument("--tbs", type=int, default=None, - help="Train batch size per replica") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") -parser.add_argument("--ebs", type=int, default=None, - help="Evaluation batch size per replica") +parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") -parser.add_argument("--devices", type=int, nargs="*", default=[0], - help="Devices' ids to apply distributed training") +parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") -parser.add_argument("--mxp", default=False, action="store_true", - help="Enable mixed precision") +parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") -parser.add_argument("--cache", default=False, action="store_true", - help="Enable caching for dataset") +parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") + +parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset") + +parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling") args = parser.parse_args() @@ -75,14 +71,18 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + tfrecords_shards=args.tfrecords_shards, + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, + tfrecords_shards=args.tfrecords_shards, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) else: train_dataset = ASRSliceDataset( @@ -90,13 +90,15 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) contextnet_trainer = TransducerTrainer( diff --git a/examples/contextnet/train_ga_contextnet.py b/examples/contextnet/train_ga_contextnet.py index 45361181b1..d5e969ae54 100644 --- a/examples/contextnet/train_ga_contextnet.py +++ b/examples/contextnet/train_ga_contextnet.py @@ -26,32 +26,27 @@ parser = argparse.ArgumentParser(prog="ContextNet Training") -parser.add_argument("--config", type=str, default=DEFAULT_YAML, - help="The file path of model configuration file") +parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") -parser.add_argument("--max_ckpts", type=int, default=10, - help="Max number of checkpoints to keep") +parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") -parser.add_argument("--tfrecords", default=False, action="store_true", - help="Whether to use tfrecords") +parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords") -parser.add_argument("--tbs", type=int, default=None, - help="Train batch size per replica") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") -parser.add_argument("--ebs", type=int, default=None, - help="Evaluation batch size per replica") +parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") -parser.add_argument("--acs", type=int, default=None, - help="Train accumulation steps") +parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") -parser.add_argument("--devices", type=int, nargs="*", default=[0], - help="Devices' ids to apply distributed training") +parser.add_argument("--acs", type=int, default=None, help="Train accumulation steps") -parser.add_argument("--mxp", default=False, action="store_true", - help="Enable mixed precision") +parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") -parser.add_argument("--cache", default=False, action="store_true", - help="Enable caching for dataset") +parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") + +parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset") + +parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling") args = parser.parse_args() @@ -78,14 +73,18 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + tfrecords_shards=args.tfrecords_shards, + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, + tfrecords_shards=args.tfrecords_shards, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) else: train_dataset = ASRSliceDataset( @@ -93,13 +92,15 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) contextnet_trainer = TransducerTrainerGA( diff --git a/examples/contextnet/train_ga_subword_contextnet.py b/examples/contextnet/train_ga_subword_contextnet.py index 7a33956b42..32cf6a75df 100644 --- a/examples/contextnet/train_ga_subword_contextnet.py +++ b/examples/contextnet/train_ga_subword_contextnet.py @@ -26,38 +26,31 @@ parser = argparse.ArgumentParser(prog="ContextNet Training") -parser.add_argument("--config", type=str, default=DEFAULT_YAML, - help="The file path of model configuration file") +parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") -parser.add_argument("--max_ckpts", type=int, default=10, - help="Max number of checkpoints to keep") +parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") -parser.add_argument("--tfrecords", default=False, action="store_true", - help="Whether to use tfrecords") +parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords") -parser.add_argument("--tbs", type=int, default=None, - help="Train batch size per replica") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") -parser.add_argument("--ebs", type=int, default=None, - help="Evaluation batch size per replica") +parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") -parser.add_argument("--acs", type=int, default=None, - help="Train accumulation steps") +parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") -parser.add_argument("--devices", type=int, nargs="*", default=[0], - help="Devices' ids to apply distributed training") +parser.add_argument("--acs", type=int, default=None, help="Train accumulation steps") -parser.add_argument("--mxp", default=False, action="store_true", - help="Enable mixed precision") +parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") -parser.add_argument("--cache", default=False, action="store_true", - help="Enable caching for dataset") +parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") -parser.add_argument("--subwords", type=str, default=None, - help="Path to file that stores generated subwords") +parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset") -parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], - help="Transcript files for generating subwords") +parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") + +parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], help="Transcript files for generating subwords") + +parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling") args = parser.parse_args() @@ -94,14 +87,18 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + tfrecords_shards=args.tfrecords_shards, + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, + tfrecords_shards=args.tfrecords_shards, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) else: train_dataset = ASRSliceDataset( @@ -109,13 +106,15 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) contextnet_trainer = TransducerTrainerGA( diff --git a/examples/contextnet/train_subword_contextnet.py b/examples/contextnet/train_subword_contextnet.py index 50f90892ba..2d186fbe42 100644 --- a/examples/contextnet/train_subword_contextnet.py +++ b/examples/contextnet/train_subword_contextnet.py @@ -26,35 +26,29 @@ parser = argparse.ArgumentParser(prog="ContextNet Training") -parser.add_argument("--config", type=str, default=DEFAULT_YAML, - help="The file path of model configuration file") +parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") -parser.add_argument("--max_ckpts", type=int, default=10, - help="Max number of checkpoints to keep") +parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") -parser.add_argument("--tfrecords", default=False, action="store_true", - help="Whether to use tfrecords") +parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords") -parser.add_argument("--tbs", type=int, default=None, - help="Train batch size per replica") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") -parser.add_argument("--ebs", type=int, default=None, - help="Evaluation batch size per replica") +parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") -parser.add_argument("--devices", type=int, nargs="*", default=[0], - help="Devices' ids to apply distributed training") +parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") -parser.add_argument("--mxp", default=False, action="store_true", - help="Enable mixed precision") +parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") -parser.add_argument("--cache", default=False, action="store_true", - help="Enable caching for dataset") +parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") -parser.add_argument("--subwords", type=str, default=None, - help="Path to file that stores generated subwords") +parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset") -parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], - help="Transcript files for generating subwords") +parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") + +parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], help="Transcript files for generating subwords") + +parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling") args = parser.parse_args() @@ -91,14 +85,18 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + tfrecords_shards=args.tfrecords_shards, + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, + tfrecords_shards=args.tfrecords_shards, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) else: train_dataset = ASRSliceDataset( @@ -106,13 +104,15 @@ speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, - stage="train", cache=args.cache, shuffle=True + stage="train", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, - stage="eval", cache=args.cache, shuffle=True + stage="eval", cache=args.cache, + shuffle=True, buffer_size=args.bfs, ) contextnet_trainer = TransducerTrainer( diff --git a/scripts/create_tfrecords.py b/scripts/create_tfrecords.py index 7a787589a3..509d6b891b 100644 --- a/scripts/create_tfrecords.py +++ b/scripts/create_tfrecords.py @@ -20,14 +20,15 @@ parser = argparse.ArgumentParser(prog="TFRecords Creation") -parser.add_argument("--mode", "-m", type=str, - default=None, help=f"Mode in {modes}") +parser.add_argument("--mode", "-m", type=str, default=None, help=f"Mode in {modes}") -parser.add_argument("--tfrecords_dir", type=str, default=None, - help="Directory to tfrecords") +parser.add_argument("--tfrecords_dir", type=str, default=None, help="Directory to tfrecords") -parser.add_argument("transcripts", nargs="+", type=str, - default=None, help="Paths to transcript files") +parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") + +parser.add_argument("--shuffle", default=False, action="store_true", help="Shuffle data or not") + +parser.add_argument("transcripts", nargs="+", type=str, default=None, help="Paths to transcript files") args = parser.parse_args() @@ -36,9 +37,8 @@ transcripts = preprocess_paths(args.transcripts) tfrecords_dir = preprocess_paths(args.tfrecords_dir) -if args.mode == "train": - ASRTFRecordDataset(transcripts, tfrecords_dir, None, None, - args.mode, shuffle=True).create_tfrecords() -else: - ASRTFRecordDataset(transcripts, tfrecords_dir, None, None, - args.mode, shuffle=False).create_tfrecords() +ASRTFRecordDataset( + data_paths=transcripts, tfrecords_dir=tfrecords_dir, + speech_featurizer=None, text_featurizer=None, + stage=args.mode, shuffle=args.shuffle, tfrecords_shards=args.tfrecords_shards +).create_tfrecords() diff --git a/tensorflow_asr/datasets/asr_dataset.py b/tensorflow_asr/datasets/asr_dataset.py index 7ef06a1196..ab2e8690fc 100755 --- a/tensorflow_asr/datasets/asr_dataset.py +++ b/tensorflow_asr/datasets/asr_dataset.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import abc -import glob import multiprocessing import os @@ -20,7 +19,7 @@ import tensorflow as tf from ..augmentations.augments import Augmentation -from .base_dataset import BaseDataset +from .base_dataset import BaseDataset, BUFFER_SIZE from ..featurizers.speech_featurizers import read_raw_audio, SpeechFeaturizer from ..featurizers.text_featurizers import TextFeaturizer from ..utils.utils import bytestring_feature, print_one_line, get_num_batches @@ -58,8 +57,12 @@ def __init__(self, data_paths: list, augmentations: Augmentation = Augmentation(None), cache: bool = False, - shuffle: bool = False): - super(ASRDataset, self).__init__(data_paths, augmentations, cache, shuffle, stage) + shuffle: bool = False, + buffer_size: int = BUFFER_SIZE): + super(ASRDataset, self).__init__( + data_paths=data_paths, augmentations=augmentations, + cache=cache, shuffle=shuffle, stage=stage, buffer_size=buffer_size + ) self.speech_featurizer = speech_featurizer self.text_featurizer = text_featurizer @@ -105,7 +108,7 @@ def process(self, dataset, batch_size): dataset = dataset.cache() if self.shuffle: - dataset = dataset.shuffle(TFRECORD_SHARDS, reshuffle_each_iteration=True) + dataset = dataset.shuffle(self.buffer_size, reshuffle_each_iteration=True) # PADDED BATCH the dataset dataset = dataset.padded_batch( @@ -150,13 +153,17 @@ def __init__(self, text_featurizer: TextFeaturizer, stage: str, augmentations: Augmentation = Augmentation(None), + tfrecords_shards: int = TFRECORD_SHARDS, cache: bool = False, - shuffle: bool = False): + shuffle: bool = False, + buffer_size: int = BUFFER_SIZE): super(ASRTFRecordDataset, self).__init__( - stage, speech_featurizer, text_featurizer, - data_paths, augmentations, cache, shuffle + stage=stage, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, + data_paths=data_paths, augmentations=augmentations, cache=cache, shuffle=shuffle, buffer_size=buffer_size ) self.tfrecords_dir = tfrecords_dir + if tfrecords_shards <= 0: raise ValueError("tfrecords_shards must be positive") + self.tfrecords_shards = tfrecords_shards if not tf.io.gfile.exists(self.tfrecords_dir): tf.io.gfile.makedirs(self.tfrecords_dir) @@ -177,10 +184,10 @@ def create_tfrecords(self): def get_shard_path(shard_id): return os.path.join(self.tfrecords_dir, f"{self.stage}_{shard_id}.tfrecord") - shards = [get_shard_path(idx) for idx in range(1, TFRECORD_SHARDS + 1)] + shards = [get_shard_path(idx) for idx in range(1, self.tfrecords_shards + 1)] - splitted_entries = np.array_split(entries, TFRECORD_SHARDS) - with multiprocessing.Pool(TFRECORD_SHARDS) as pool: + splitted_entries = np.array_split(entries, self.tfrecords_shards) + with multiprocessing.Pool(self.tfrecords_shards) as pool: pool.map(write_tfrecord_file, zip(shards, splitted_entries)) return True @@ -273,7 +280,7 @@ def process(self, dataset, batch_size): dataset = dataset.cache() if self.shuffle: - dataset = dataset.shuffle(TFRECORD_SHARDS, reshuffle_each_iteration=True) + dataset = dataset.shuffle(self.buffer_size, reshuffle_each_iteration=True) # PADDED BATCH the dataset dataset = dataset.padded_batch( @@ -337,7 +344,7 @@ def process(self, dataset, batch_size): dataset = dataset.cache() if self.shuffle: - dataset = dataset.shuffle(TFRECORD_SHARDS, reshuffle_each_iteration=True) + dataset = dataset.shuffle(self.buffer_size, reshuffle_each_iteration=True) # PADDED BATCH the dataset dataset = dataset.padded_batch( diff --git a/tensorflow_asr/datasets/base_dataset.py b/tensorflow_asr/datasets/base_dataset.py index 864eee8864..d5e018de1a 100644 --- a/tensorflow_asr/datasets/base_dataset.py +++ b/tensorflow_asr/datasets/base_dataset.py @@ -15,6 +15,8 @@ from ..augmentations.augments import Augmentation +BUFFER_SIZE = 100 + class BaseDataset(metaclass=abc.ABCMeta): """ Based dataset for all models """ @@ -24,11 +26,15 @@ def __init__(self, augmentations: Augmentation = Augmentation(None), cache: bool = False, shuffle: bool = False, + buffer_size: int = BUFFER_SIZE, stage: str = "train"): self.data_paths = data_paths self.augmentations = augmentations # apply augmentation self.cache = cache # whether to cache WHOLE transformed dataset to memory self.shuffle = shuffle # whether to shuffle tf.data.Dataset + if buffer_size <= 0 and shuffle: + raise ValueError("buffer_size must be positive when shuffle is on") + self.buffer_size = buffer_size # shuffle buffer size self.stage = stage # for defining tfrecords files self.total_steps = None # for better training visualization diff --git a/tensorflow_asr/featurizers/text_featurizers.py b/tensorflow_asr/featurizers/text_featurizers.py index df32d44089..9c78746d51 100755 --- a/tensorflow_asr/featurizers/text_featurizers.py +++ b/tensorflow_asr/featurizers/text_featurizers.py @@ -337,6 +337,7 @@ def build_from_corpus(cls, decoder_config: dict): The input sentence must be pretokenized when using word type.""" decoder_cfg = DecoderConfig(decoder_config) # Train SentencePiece Model + def corpus_iterator(): for file in decoder_cfg.corpus_files: with open(file, "r", encoding="utf-8") as f: @@ -349,7 +350,7 @@ def corpus_iterator(): sp.SentencePieceTrainer.Train( sentence_iterator=corpus_iterator(), model_prefix=decoder_cfg.output_path_prefix, - model_type=decoder_cfg.model_type, + model_type=decoder_cfg.model_type, vocab_size=decoder_cfg.target_vocab_size, num_threads=cpu_count(), unk_id=cls.UNK_TOKEN_ID, @@ -357,7 +358,7 @@ def corpus_iterator(): eos_id=cls.EOS_TOKEN_ID, pad_id=cls.PAD_TOKEN_ID, unk_surface='__UNKNOWN__' # change default unk surface U+2047("⁇") by "__UNKNOWN__" - ) + ) # Export fairseq dictionary processor = sp.SentencePieceProcessor() processor.Load(decoder_cfg.output_path_prefix + ".model") @@ -399,7 +400,7 @@ def extract(self, text: str) -> tf.Tensor: Returns: sequence of ints in tf.Tensor - """ + """ text = self.preprocess_text(text) text = text.strip() # remove trailing space indices = self.model.encode_as_ids(text) @@ -449,4 +450,3 @@ def indices2upoints(self, indices: tf.Tensor) -> tf.Tensor: indices = self.normalize_indices(indices) upoints = tf.gather_nd(self.upoints, tf.expand_dims(indices, axis=-1)) return tf.gather_nd(upoints, tf.where(tf.not_equal(upoints, 0))) - diff --git a/tensorflow_asr/losses/rnnt_losses.py b/tensorflow_asr/losses/rnnt_losses.py index 14b8cea9af..bebdc5b536 100644 --- a/tensorflow_asr/losses/rnnt_losses.py +++ b/tensorflow_asr/losses/rnnt_losses.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huy Le Nguyen (@usimarit) +# Copyright 2020 Huy Le Nguyen (@usimarit) and M. Yusuf Sarıgöz (@monatis) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,16 +19,19 @@ use_warprnnt = True except ImportError: print("Cannot import RNNT loss in warprnnt. Falls back to RNNT in TensorFlow") + print("Note: The RNNT in Tensorflow is not supported for CPU yet") from tensorflow.python.ops.gen_array_ops import matrix_diag_part_v2 use_warprnnt = False def rnnt_loss(logits, labels, label_length, logit_length, blank=0, name=None): if use_warprnnt: - return rnnt_loss_warprnnt(logits=logits, labels=labels, label_length=label_length, logit_length=logit_length, blank=blank) + return rnnt_loss_warprnnt(logits=logits, labels=labels, + label_length=label_length, logit_length=logit_length, blank=blank) else: return rnnt_loss_tf(logits=logits, labels=labels, label_length=label_length, logit_length=logit_length, name=name) + @tf.function def rnnt_loss_warprnnt(logits, labels, label_length, logit_length, blank=0): if not tf.config.list_physical_devices('GPU'): @@ -120,7 +123,8 @@ def next_state(x, mask_and_trans_probs): beta_t = tf.concat([x[:, :-1] + truth_probs, LOG_0 * tf.ones(shape=[batch_size, 1])], axis=1) beta_next = reduce_logsumexp(tf.stack([beta_b, beta_t], axis=0), axis=0) - masked_beta_next = nan_to_zero(beta_next * tf.expand_dims(mask_s, axis=1)) + nan_to_zero(x * tf.expand_dims((1.0 - mask_s), axis=1)) + masked_beta_next = (nan_to_zero(beta_next * tf.expand_dims(mask_s, axis=1)) + + nan_to_zero(x * tf.expand_dims((1.0 - mask_s), axis=1))) return masked_beta_next # Initial beta for batches. @@ -155,14 +159,10 @@ def compute_rnnt_loss_and_grad_helper(logits, labels, label_length, logit_length bp_diags = extract_diagonals(blank_probs) tp_diags = extract_diagonals(truth_probs) - label_mask = tf.expand_dims(tf.sequence_mask( - label_length + 1, maxlen=target_max_len, dtype=tf.float32), axis=1) - small_label_mask = tf.expand_dims(tf.sequence_mask( - label_length, maxlen=target_max_len, dtype=tf.float32), axis=1) - input_mask = tf.expand_dims(tf.sequence_mask( - logit_length, maxlen=input_max_len, dtype=tf.float32), axis=2) - small_input_mask = tf.expand_dims(tf.sequence_mask( - logit_length - 1, maxlen=input_max_len, dtype=tf.float32), axis=2) + label_mask = tf.expand_dims(tf.sequence_mask(label_length + 1, maxlen=target_max_len, dtype=tf.float32), axis=1) + small_label_mask = tf.expand_dims(tf.sequence_mask(label_length, maxlen=target_max_len, dtype=tf.float32), axis=1) + input_mask = tf.expand_dims(tf.sequence_mask(logit_length, maxlen=input_max_len, dtype=tf.float32), axis=2) + small_input_mask = tf.expand_dims(tf.sequence_mask(logit_length - 1, maxlen=input_max_len, dtype=tf.float32), axis=2) mask = label_mask * input_mask grad_blank_mask = (label_mask * small_input_mask)[:, :-1, :] grad_truth_mask = (small_label_mask * input_mask)[:, :, :-1] @@ -172,30 +172,43 @@ def compute_rnnt_loss_and_grad_helper(logits, labels, label_length, logit_length indices = tf.stack([logit_length - 1, label_length], axis=1) blank_sl = tf.gather_nd(blank_probs, indices, batch_dims=1) - beta = backward_dp(bp_diags, tp_diags, batch_size, input_max_len, target_max_len, label_length, logit_length, - blank_sl) * mask + beta = backward_dp(bp_diags, tp_diags, batch_size, input_max_len, + target_max_len, label_length, logit_length, blank_sl) * mask beta = tf.where(tf.math.is_nan(beta), tf.zeros_like(beta), beta) final_state_probs = beta[:, 0, 0] # Compute gradients of loss w.r.t. blank log-probabilities. - grads_blank = -tf.exp((alpha[:, :-1, :] + beta[:, 1:, :] - tf.reshape(final_state_probs, - shape=[batch_size, 1, 1]) + blank_probs[:, - :-1, - :]) * grad_blank_mask) * grad_blank_mask + grads_blank = -tf.exp( + ( + alpha[:, :-1, :] + beta[:, 1:, :] + - tf.reshape(final_state_probs, shape=[batch_size, 1, 1]) + + blank_probs[:, :-1, :] + ) * grad_blank_mask + ) * grad_blank_mask grads_blank = tf.concat([grads_blank, tf.zeros(shape=(batch_size, 1, target_max_len))], axis=1) last_grads_blank = -1 * tf.scatter_nd( - tf.concat([tf.reshape(tf.range(batch_size, dtype=tf.int64), shape=[batch_size, 1]), tf.cast(indices, dtype=tf.int64)], axis=1), - tf.ones(batch_size, dtype=tf.float32), [batch_size, input_max_len, target_max_len]) + tf.concat([tf.reshape(tf.range(batch_size, dtype=tf.int64), shape=[batch_size, 1]), + tf.cast(indices, dtype=tf.int64)], axis=1), + tf.ones(batch_size, dtype=tf.float32), + [batch_size, input_max_len, target_max_len] + ) grads_blank = grads_blank + last_grads_blank # Compute gradients of loss w.r.t. truth log-probabilities. - grads_truth = -tf.exp((alpha[:, :, :-1] + beta[:, :, 1:] - tf.reshape(final_state_probs, shape=[batch_size, 1, - 1]) + truth_probs) * grad_truth_mask) * grad_truth_mask + grads_truth = -tf.exp( + ( + alpha[:, :, :-1] + beta[:, :, 1:] + - tf.reshape(final_state_probs, shape=[batch_size, 1, 1]) + + truth_probs + ) + * grad_truth_mask + ) * grad_truth_mask # Compute gradients of loss w.r.t. activations. a = tf.tile(tf.reshape(tf.range(target_max_len - 1, dtype=tf.int64), shape=(1, 1, target_max_len - 1, 1)), multiples=[batch_size, 1, 1, 1]) b = tf.cast(tf.reshape(labels - 1, shape=(batch_size, 1, target_max_len - 1, 1)), dtype=tf.int64) + # b = tf.where(tf.equal(b, -1), tf.zeros_like(b), b) # for cpu testing (index -1 on cpu will raise errors) c = tf.concat([a, b], axis=3) d = tf.tile(c, multiples=(1, input_max_len, 1, 1)) e = tf.tile(tf.reshape(tf.range(input_max_len, dtype=tf.int64), shape=(1, input_max_len, 1, 1)), @@ -208,8 +221,8 @@ def compute_rnnt_loss_and_grad_helper(logits, labels, label_length, logit_length probs = tf.exp(log_probs) grads_truth_scatter = tf.scatter_nd(scatter_idx, grads_truth, [batch_size, input_max_len, target_max_len, vocab_size - 1]) - grads = tf.concat( - [tf.reshape(grads_blank, shape=(batch_size, input_max_len, target_max_len, -1)), grads_truth_scatter], axis=3) + grads = tf.concat([tf.reshape(grads_blank, shape=(batch_size, input_max_len, target_max_len, -1)), + grads_truth_scatter], axis=3) grads_logits = grads - probs * (tf.reduce_sum(grads, axis=3, keepdims=True)) loss = -final_state_probs @@ -244,5 +257,4 @@ def grad(grad_loss): return result[0], grad - - return compute_rnnt_loss_and_grad(*args) \ No newline at end of file + return compute_rnnt_loss_and_grad(*args) diff --git a/tensorflow_asr/utils/__init__.py b/tensorflow_asr/utils/__init__.py index 8fc814dbec..e7becd8f27 100644 --- a/tensorflow_asr/utils/__init__.py +++ b/tensorflow_asr/utils/__init__.py @@ -64,7 +64,10 @@ def setup_strategy(devices): def setup_tpu(tpu_address=None): import tensorflow as tf - resolver = tf.distribute.cluster_resolver.TPUClusterResolver() if tpu_address is None else tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + tpu_address) + if tpu_address is None: + resolver = tf.distribute.cluster_resolver.TPUClusterResolver() + else: + resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + tpu_address) tf.config.experimental_connect_to_cluster(resolver) tf.tpu.experimental.initialize_tpu_system(resolver) print("All TPUs: ", tf.config.list_logical_devices('TPU')) diff --git a/tests/featurizer/test_sentencepiece.py b/tests/featurizer/test_sentencepiece.py index 4042eb6c8b..e289576915 100644 --- a/tests/featurizer/test_sentencepiece.py +++ b/tests/featurizer/test_sentencepiece.py @@ -48,13 +48,13 @@ def test_featurizer(): "normalize_per_feature": False} text_featurizer_sentencepiece = SentencePieceFeaturizer.load_from_file(config, None) - subwords_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + subwords_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), os.pardir, os.pardir, "vocabularies", - "librispeech_train_4_1030.subwords") + "librispeech_train_4_1030.subwords") text_featurizer_subwords = SubwordFeaturizer.load_from_file(config, subwords_path) - speech_featurizer = TFSpeechFeaturizer(config_speech) + speech_featurizer = TFSpeechFeaturizer(config_speech) data_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "transcripts_librispeech_train_clean_100.tsv") def get_data(featurizer: TextFeaturizer):