no text skip

Signed-off-by: fayejf <fayejf07@gmail.com>
NVIDIA · May 9, 2023 · ef5d91a · ef5d91a
1 parent 69ac874
commit ef5d91a
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 9 deletions.
diff --git a/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py b/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py
@@ -215,8 +215,9 @@ def autocast():
             use_cer=cfg.use_cer,
             output_filename=None,
         )
-        logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!")
-        logging.info(f"{total_res}")
+        if output_manifest_w_wer:
+            logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!")
+            logging.info(f"{total_res}")
 
     return cfg
 

diff --git a/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py b/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py
@@ -269,8 +269,9 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
             use_cer=cfg.use_cer,
             output_filename=None,
         )
-        logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!")
-        logging.info(f"{total_res}")
+        if output_manifest_w_wer:
+            logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!")
+            logging.info(f"{total_res}")
 
     return cfg
 

diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py
@@ -355,8 +355,9 @@ def autocast():
             use_cer=cfg.use_cer,
             output_filename=None,
         )
-        logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!")
-        logging.info(f"{total_res}")
+        if output_manifest_w_wer:
+            logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!")
+            logging.info(f"{total_res}")
 
     return cfg
 

diff --git a/nemo/collections/asr/parts/utils/eval_utils.py b/nemo/collections/asr/parts/utils/eval_utils.py
@@ -88,7 +88,7 @@ def cal_write_wer(
     langid: str = 'en',
     use_cer: bool = False,
     output_filename: str = None,
-) -> Tuple[str, dict]:
+) -> Tuple[str, dict, str]:
     """ 
     Calculate wer, inserion, deletion and substitution rate based on groundtruth text and pred_text_attr_name (pred_text) 
     We use WER in function name as a convention, but Error Rate (ER) currently support Word Error Rate (WER) and Character Error Rate (CER)
@@ -103,9 +103,10 @@ def cal_write_wer(
             sample = json.loads(line)
 
             if 'text' not in sample:
-                raise ValueError(
-                    "ground-truth text is not present in manifest! Cannot calculate Word Error Rate. Exiting!"
+                logging.info(
+                    "ground-truth text is not present in manifest! Cannot calculate Word Error Rate. Returning!"
                 )
+                return None, None, eval_metric
 
             hyp = sample[pred_text_attr_name]
             ref = sample['text']