From 102e8997220bd077386b9fbe721a7e027cb60423 Mon Sep 17 00:00:00 2001
From: Evelina <10428420+ekmb@users.noreply.github.com>
Date: Mon, 14 Jun 2021 18:26:22 -0700
Subject: [PATCH] Text Normalization Update (#2356)

* upper cased date support

Signed-off-by: ekmb <ebakhturina@nvidia.com>

* update whitelist, change roman weights

Signed-off-by: ekmb <ebakhturina@nvidia.com>

* docstrings, space fix, init file

Signed-off-by: ekmb <ebakhturina@nvidia.com>

* lgtm

Signed-off-by: ekmb <ebakhturina@nvidia.com>

* fraction with measure class

Signed-off-by: ekmb <ebakhturina@nvidia.com>
Signed-off-by: mchrzanowski <mchrzanowski@nvidia.com>
---
 .../text_normalization/data/months/abbr.tsv   |  4 +-
 .../text_normalization/data/roman/__init__.py | 13 +++
 .../data/roman/digit_teen.tsv                 | 49 ++++++++++
 .../data/roman/hundreds.tsv                   |  9 ++
 .../text_normalization/data/roman/ties.tsv    |  5 +
 .../text_normalization/data/whitelist.tsv     |  3 +
 .../data/whitelist_alternatives.tsv           |  5 +-
 .../text_normalization/data_loader_utils.py   | 21 ++++
 .../text_normalization/normalize.py           | 23 ++++-
 .../normalize_with_audio.py                   | 98 +++++++++----------
 .../text_normalization/taggers/cardinal.py    |  2 +-
 .../text_normalization/taggers/date.py        | 18 +++-
 .../text_normalization/taggers/fraction.py    | 24 ++++-
 .../text_normalization/taggers/measure.py     |  8 +-
 .../text_normalization/taggers/ordinal.py     |  5 +-
 .../text_normalization/taggers/roman.py       | 61 ++++++++++++
 .../taggers/tokenize_and_classify.py          | 12 ++-
 .../text_normalization/verbalizers/date.py    | 16 +--
 .../verbalizers/fraction.py                   | 45 ++++++++-
 .../text_normalization/verbalizers/measure.py | 10 +-
 .../text_normalization/verbalizers/roman.py   | 46 +++++++++
 .../verbalizers/verbalize.py                  | 12 ++-
 .../test_cases_cardinal.txt                   |  1 +
 .../test_cases_date.txt                       |  2 +
 .../test_cases_fraction.txt                   | 11 +++
 .../test_cases_measure.txt                    |  1 +
 .../test_cases_normalize_with_audio.txt       | 11 ++-
 tests/nemo_text_processing/test_boundary.py   |  2 +-
 tests/nemo_text_processing/test_fraction.py   | 38 +++++++
 29 files changed, 474 insertions(+), 81 deletions(-)
 create mode 100644 nemo_text_processing/text_normalization/data/roman/__init__.py
 create mode 100644 nemo_text_processing/text_normalization/data/roman/digit_teen.tsv
 create mode 100644 nemo_text_processing/text_normalization/data/roman/hundreds.tsv
 create mode 100644 nemo_text_processing/text_normalization/data/roman/ties.tsv
 create mode 100644 nemo_text_processing/text_normalization/taggers/roman.py
 create mode 100644 nemo_text_processing/text_normalization/verbalizers/roman.py
 create mode 100644 tests/nemo_text_processing/data_text_normalization/test_cases_fraction.txt
 create mode 100644 tests/nemo_text_processing/test_fraction.py

diff --git a/nemo_text_processing/text_normalization/data/months/abbr.tsv b/nemo_text_processing/text_normalization/data/months/abbr.tsv
index fb1f5c70309a..5609e211d60a 100644
--- a/nemo_text_processing/text_normalization/data/months/abbr.tsv
+++ b/nemo_text_processing/text_normalization/data/months/abbr.tsv
@@ -2,11 +2,11 @@ jan	january
 feb	february
 mar	march
 apr	april
-jun june
+jun	june
 jul	july
 aug	august
 sep	september
 sept	september
 oct	october
 nov	november
-dec	december
+dec	december
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/data/roman/__init__.py b/nemo_text_processing/text_normalization/data/roman/__init__.py
new file mode 100644
index 000000000000..bc443be41c4c
--- /dev/null
+++ b/nemo_text_processing/text_normalization/data/roman/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/text_normalization/data/roman/digit_teen.tsv b/nemo_text_processing/text_normalization/data/roman/digit_teen.tsv
new file mode 100644
index 000000000000..cd0991331f38
--- /dev/null
+++ b/nemo_text_processing/text_normalization/data/roman/digit_teen.tsv
@@ -0,0 +1,49 @@
+i	1
+ii	2
+iii	3
+iv	4
+v	5
+vi	6
+vii	7
+viii	8
+ix	9
+x	10
+xi	11
+xii	12
+xiii	13
+xiv	14
+xv	15
+xvi	16
+xvii	17
+xviii	18
+xix	19
+xx	20
+xxi	21
+xxii	22
+xxiii	23
+xxiv	24
+xxv	25
+xxvi	26
+xxvii	27
+xxviii	28
+xxix	29
+xxx	30
+xxxi	31
+xxxii	32
+xxxiii	33
+xxxiv	34
+xxxv	35
+xxxvi	36
+xxxvii	37
+xxxviii	38
+xxxix	39
+xl	40
+xli	41
+xlii	42
+xliii	43
+xliv	44
+xlv	45
+xlvi	46
+xlvii	47
+xlviii	48
+xlix	49
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/data/roman/hundreds.tsv b/nemo_text_processing/text_normalization/data/roman/hundreds.tsv
new file mode 100644
index 000000000000..0aafad3049cd
--- /dev/null
+++ b/nemo_text_processing/text_normalization/data/roman/hundreds.tsv
@@ -0,0 +1,9 @@
+c	100
+cc	200
+ccc	300
+cd	400
+d	500
+dc	600
+dcc	700
+dccc	800
+cm	900
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/data/roman/ties.tsv b/nemo_text_processing/text_normalization/data/roman/ties.tsv
new file mode 100644
index 000000000000..5516676f4be3
--- /dev/null
+++ b/nemo_text_processing/text_normalization/data/roman/ties.tsv
@@ -0,0 +1,5 @@
+l	50
+lx	60
+lxx	70
+lxxx	80
+xc	90
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/data/whitelist.tsv b/nemo_text_processing/text_normalization/data/whitelist.tsv
index 641a15560079..853993190452 100644
--- a/nemo_text_processing/text_normalization/data/whitelist.tsv
+++ b/nemo_text_processing/text_normalization/data/whitelist.tsv
@@ -1,6 +1,9 @@
 Ph.D.	p h d
 Hon.	honorable
 &	and
+Mt.	Mount
+Maj.	Major
+Rev.	Reverend
 #	hash
 Gov.	governor
 7-eleven	seven eleven
diff --git a/nemo_text_processing/text_normalization/data/whitelist_alternatives.tsv b/nemo_text_processing/text_normalization/data/whitelist_alternatives.tsv
index d3c878e34b43..8cea774c58f0 100644
--- a/nemo_text_processing/text_normalization/data/whitelist_alternatives.tsv
+++ b/nemo_text_processing/text_normalization/data/whitelist_alternatives.tsv
@@ -11,4 +11,7 @@ Mrs.	Misses
 Ms.	Miss
 Mr	Mister
 Mrs	Misses
-Ms	Miss
\ No newline at end of file
+Ms	Miss
+&Co.	and Co.
+§	section
+=	equals
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/data_loader_utils.py b/nemo_text_processing/text_normalization/data_loader_utils.py
index b361ab7d729e..1aaac7a76246 100644
--- a/nemo_text_processing/text_normalization/data_loader_utils.py
+++ b/nemo_text_processing/text_normalization/data_loader_utils.py
@@ -16,6 +16,7 @@
 import csv
 import json
 import os
+import re
 from collections import defaultdict, namedtuple
 from typing import Dict, List, Optional, Set, Tuple
 
@@ -241,8 +242,28 @@ def post_process_punctuation(text: str) -> str:
         .replace('“', '"')
         .replace("‘", "'")
         .replace('`', "'")
+        .replace('- -', "--")
     )
 
     for punct in "!,.:;?":
         text = text.replace(f' {punct}', punct)
     return text.strip()
+
+
+def pre_process(text: str) -> str:
+    """
+    Adds space around punctuation marks
+
+    Args:
+        text: string that may include semiotic classes
+
+    Returns: text with spaces around punctuation marks
+    """
+    space_both = '*<=>^[]{}'
+    for punct in space_both:
+        text = text.replace(punct, ' ' + punct + ' ')
+
+    text = text.replace('--', ' ' + '--' + ' ')
+    # remove extra space
+    text = re.sub(r' +', ' ', text)
+    return text
diff --git a/nemo_text_processing/text_normalization/normalize.py b/nemo_text_processing/text_normalization/normalize.py
index 1ff591c3876b..353025a3a989 100644
--- a/nemo_text_processing/text_normalization/normalize.py
+++ b/nemo_text_processing/text_normalization/normalize.py
@@ -17,7 +17,7 @@
 from collections import OrderedDict
 from typing import List
 
-from nemo_text_processing.text_normalization.data_loader_utils import post_process_punctuation
+from nemo_text_processing.text_normalization.data_loader_utils import post_process_punctuation, pre_process
 from nemo_text_processing.text_normalization.taggers.tokenize_and_classify import ClassifyFst
 from nemo_text_processing.text_normalization.token_parser import PRESERVE_ORDER_KEY, TokenParser
 from nemo_text_processing.text_normalization.verbalizers.verbalize_final import VerbalizeFinalFst
@@ -67,7 +67,9 @@ def normalize_list(self, texts: List[str], verbose=False) -> List[str]:
             res.append(text)
         return res
 
-    def normalize(self, text: str, verbose: bool, punct_post_process: bool = False) -> str:
+    def normalize(
+        self, text: str, verbose: bool, punct_pre_process: bool = False, punct_post_process: bool = False
+    ) -> str:
         """
         Main function. Normalizes tokens from written to spoken form
             e.g. 12 kg -> twelve kilograms
@@ -75,10 +77,13 @@ def normalize(self, text: str, verbose: bool, punct_post_process: bool = False)
         Args:
             text: string that may include semiotic classes
             verbose: whether to print intermediate meta information
-            punct_post_process: set to True to normalize punctuation
+            punct_pre_process: whether to perform punctuation pre-processing, for example, [25] -> [ 25 ]
+            punct_post_process: whether to normalize punctuation
 
         Returns: spoken form
         """
+        if punct_pre_process:
+            text = pre_process(text)
         text = text.strip()
         if not text:
             if verbose:
@@ -222,10 +227,20 @@ def parse_args():
     parser.add_argument(
         "--punct_post_process", help="set to True to enable punctuation post processing", action="store_true"
     )
+    parser.add_argument(
+        "--punct_pre_process", help="set to True to enable punctuation pre processing", action="store_true"
+    )
     return parser.parse_args()
 
 
 if __name__ == "__main__":
     args = parse_args()
     normalizer = Normalizer(input_case=args.input_case)
-    print(normalizer.normalize(args.input_string, verbose=args.verbose, punct_post_process=args.punct_post_process))
+    print(
+        normalizer.normalize(
+            args.input_string,
+            verbose=args.verbose,
+            punct_pre_process=args.punct_pre_process,
+            punct_post_process=args.punct_post_process,
+        )
+    )
diff --git a/nemo_text_processing/text_normalization/normalize_with_audio.py b/nemo_text_processing/text_normalization/normalize_with_audio.py
index 46d34afc1ee9..4df248f2d401 100644
--- a/nemo_text_processing/text_normalization/normalize_with_audio.py
+++ b/nemo_text_processing/text_normalization/normalize_with_audio.py
@@ -14,16 +14,15 @@
 
 import json
 import os
-import re
 import time
 from argparse import ArgumentParser
 from typing import List, Tuple
 
-from nemo_text_processing.text_normalization.data_loader_utils import post_process_punctuation
+from joblib import Parallel, delayed
+from nemo_text_processing.text_normalization.data_loader_utils import post_process_punctuation, pre_process
 from nemo_text_processing.text_normalization.normalize import Normalizer
 from nemo_text_processing.text_normalization.taggers.tokenize_and_classify import ClassifyFst
 from nemo_text_processing.text_normalization.verbalizers.verbalize_final import VerbalizeFinalFst
-from tqdm import tqdm
 
 from nemo.collections.asr.metrics.wer import word_error_rate
 from nemo.collections.asr.models import ASRModel
@@ -79,7 +78,14 @@ def __init__(self, input_case: str):
         self.tagger = ClassifyFst(input_case=input_case, deterministic=False)
         self.verbalizer = VerbalizeFinalFst(deterministic=False)
 
-    def normalize(self, text: str, n_tagged: int, punct_post_process: bool = True, verbose: bool = False) -> str:
+    def normalize(
+        self,
+        text: str,
+        n_tagged: int,
+        punct_pre_process: bool = True,
+        punct_post_process: bool = True,
+        verbose: bool = False,
+    ) -> str:
         """
         Main function. Normalizes tokens from written to spoken form
             e.g. 12 kg -> twelve kilograms
@@ -87,12 +93,15 @@ def normalize(self, text: str, n_tagged: int, punct_post_process: bool = True, v
         Args:
             text: string that may include semiotic classes
             n_tagged: number of tagged options to consider, -1 - to get all possible tagged options
+            punct_pre_process: whether to perform punctuation pre-processing, for example, [25] -> [ 25 ]
             punct_post_process: whether to normalize punctuation
             verbose: whether to print intermediate meta information
 
         Returns:
             normalized text options (usually there are multiple ways of normalizing a given semiotic class)
         """
+        if punct_pre_process:
+            text = pre_process(text)
         text = text.strip()
         if not text:
             if verbose:
@@ -108,7 +117,6 @@ def normalize(self, text: str, n_tagged: int, punct_post_process: bool = True, v
         normalized_texts = []
         for tagged_text in tagged_texts:
             self._verbalize(tagged_text, normalized_texts)
-
         if len(normalized_texts) == 0:
             raise ValueError()
         if punct_post_process:
@@ -183,36 +191,12 @@ def calculate_cer(normalized_texts: List[str], transcript: str, remove_punct=Fal
         text_clean = text.replace('-', ' ').lower()
         if remove_punct:
             for punct in "!?:;,.-()*+-/<=>@^_":
-                text_clean = text_clean.replace(punct, " ")
-        text_clean = re.sub(r' +', ' ', text_clean)
+                text_clean = text_clean.replace(punct, "")
         cer = round(word_error_rate([transcript], [text_clean], use_cer=True) * 100, 2)
         normalized_options.append((text, cer))
     return normalized_options
 
 
-def pre_process(text: str) -> str:
-    """
-    Adds space around punctuation marks
-
-    Args:
-        text: string that may include semiotic classes
-
-    Returns: text with spaces around punctuation marks
-    """
-    text = text.replace('--', '-')
-    space_right = '!?:;,.-()*+-/<=>@^_'
-    space_both = '-()*+-/<=>@^_'
-
-    for punct in space_right:
-        text = text.replace(punct, punct + ' ')
-    for punct in space_both:
-        text = text.replace(punct, ' ' + punct + ' ')
-
-    # remove extra space
-    text = re.sub(r' +', ' ', text)
-    return text
-
-
 def get_asr_model(asr_model: ASRModel):
     """
     Returns ASR Model
@@ -249,12 +233,36 @@ def parse_args():
     )
     parser.add_argument("--verbose", help="print info for debugging", action="store_true")
     parser.add_argument("--remove_punct", help="remove punctuation before calculating cer", action="store_true")
+    parser.add_argument(
+        "--no_punct_pre_process", help="set to True to disable punctuation pre processing", action="store_true"
+    )
     parser.add_argument(
         "--no_punct_post_process", help="set to True to disable punctuation post processing", action="store_true"
     )
     return parser.parse_args()
 
 
+def _normalize_line(normalizer: NormalizerWithAudio, line: str, asr_model: ASRModel = None):
+    line = json.loads(line)
+    audio = line['audio_filepath']
+    if 'transcript' in line:
+        transcript = line['transcript']
+    else:
+        transcript = asr_model.transcribe([audio])[0]
+
+    normalized_texts = normalizer.normalize(
+        text=line['text'],
+        verbose=args.verbose,
+        n_tagged=args.n_tagged,
+        punct_pre_process=not args.no_punct_pre_process,
+        punct_post_process=not args.no_punct_post_process,
+    )
+    normalized_text, cer = normalizer.select_best_match(normalized_texts, transcript, args.verbose, args.remove_punct)
+    line['nemo_normalized'] = normalized_text
+    line['CER_nemo_normalized'] = cer
+    return line
+
+
 def normalize_manifest(args):
     """
     Args:
@@ -265,26 +273,15 @@ def normalize_manifest(args):
     asr_model = None
     with open(args.audio_data, 'r') as f:
         with open(manifest_out, 'w') as f_out:
-            for line in tqdm(f):
-                line = json.loads(line)
-                audio = line['audio_filepath']
-                if 'transcript' in line:
-                    transcript = line['transcript']
-                else:
-                    if asr_model is None:
-                        asr_model = get_asr_model(args.model)
-                    transcript = asr_model.transcribe([audio])[0]
-                normalized_texts = normalizer.normalize(
-                    text=line['text'],
-                    verbose=args.verbose,
-                    n_tagged=args.n_tagged,
-                    punct_post_process=not args.no_punct_post_process,
-                )
-                normalized_text, cer = normalizer.select_best_match(
-                    normalized_texts, transcript, args.verbose, args.remove_punct
-                )
-                line['nemo_normalized'] = normalized_text
-                line['CER_nemo_normalized'] = cer
+            lines = f.readlines()
+            first_line = json.loads(lines[0])
+            if 'transcript' not in first_line:
+                asr_model = get_asr_model(args.model)
+            normalized_lines = Parallel(n_jobs=-1)(
+                delayed(_normalize_line)(normalizer, line, asr_model) for line in lines
+            )
+
+            for line in normalized_lines:
                 f_out.write(json.dumps(line, ensure_ascii=False) + '\n')
     print(f'Normalized version saved at {manifest_out}')
 
@@ -302,6 +299,7 @@ def normalize_manifest(args):
             text=args.text,
             verbose=args.verbose,
             n_tagged=args.n_tagged,
+            punct_pre_process=not args.no_punct_pre_process,
             punct_post_process=not args.no_punct_post_process,
         )
         if args.audio_data:
diff --git a/nemo_text_processing/text_normalization/taggers/cardinal.py b/nemo_text_processing/text_normalization/taggers/cardinal.py
index e578b0d31752..2d01a49357e3 100644
--- a/nemo_text_processing/text_normalization/taggers/cardinal.py
+++ b/nemo_text_processing/text_normalization/taggers/cardinal.py
@@ -104,7 +104,7 @@ def get_serial_graph(self):
             letter_pronunciation = pynini.string_map(load_labels(get_abs_path("data/letter_pronunciation.tsv")))
             alpha |= letter_pronunciation
 
-        delimiter = insert_space | pynini.cross("-", " ")
+        delimiter = insert_space | pynini.cross("-", " ") | pynini.cross("/", " ")
         letter_num = pynini.closure(alpha + delimiter, 1) + num_graph
         num_letter = pynini.closure(num_graph + delimiter, 1) + alpha
         next_alpha_or_num = pynini.closure(delimiter + (alpha | num_graph))
diff --git a/nemo_text_processing/text_normalization/taggers/date.py b/nemo_text_processing/text_normalization/taggers/date.py
index 96bba2c25b66..8c7c6a713afe 100644
--- a/nemo_text_processing/text_normalization/taggers/date.py
+++ b/nemo_text_processing/text_normalization/taggers/date.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.text_normalization.data_loader_utils import get_abs_path
+from nemo_text_processing.text_normalization.data_loader_utils import get_abs_path, load_labels
 from nemo_text_processing.text_normalization.graph_utils import (
     NEMO_CHAR,
     NEMO_DIGIT,
@@ -96,7 +96,13 @@ def _get_year_graph(deterministic: bool = True):
     2000 - 2009 will be verbalized as two thousand.
     """
     graph = get_hundreds_graph(deterministic)
-    graph = (pynini.union("1", "2") + NEMO_DIGIT + NEMO_DIGIT + NEMO_DIGIT + pynini.closure("s", 0, 1)) @ graph
+    graph = (
+        pynini.union("1", "2")
+        + NEMO_DIGIT
+        + NEMO_DIGIT
+        + NEMO_DIGIT
+        + pynini.closure(pynini.cross(" s", "s") | "s", 0, 1)
+    ) @ graph
     return graph
 
 
@@ -128,8 +134,14 @@ def __init__(self, cardinal: GraphFst, deterministic: bool):
         ) + pynini.closure(pynutil.delete("."), 0, 1)
         month_graph |= month_abbr_graph
 
-        month_numbers_graph = pynini.string_file(get_abs_path("data/months/numbers.tsv")).optimize()
+        # to support all caps names
+        names_all_caps = [[x[0].upper()] for x in load_labels(get_abs_path("data/months/names.tsv"))]
+        abbr_all_caps = [(x.upper(), y) for x, y in load_labels(get_abs_path("data/months/abbr.tsv"))]
+        month_graph |= pynini.string_map(names_all_caps) | (
+            pynini.string_map(abbr_all_caps) + pynini.closure(pynutil.delete("."), 0, 1)
+        )
 
+        month_numbers_graph = pynini.string_file(get_abs_path("data/months/numbers.tsv")).optimize()
         cardinal_graph = cardinal.graph_hundred_component_at_least_one_none_zero_digit
 
         year_graph = _get_year_graph(deterministic)
diff --git a/nemo_text_processing/text_normalization/taggers/fraction.py b/nemo_text_processing/text_normalization/taggers/fraction.py
index 9524d74228b9..0feb5ce1dcb9 100644
--- a/nemo_text_processing/text_normalization/taggers/fraction.py
+++ b/nemo_text_processing/text_normalization/taggers/fraction.py
@@ -15,16 +15,36 @@
 
 from nemo_text_processing.text_normalization.graph_utils import GraphFst
 
+try:
+    import pynini
+    from pynini.lib import pynutil
+
+    PYNINI_AVAILABLE = True
+except (ModuleNotFoundError, ImportError):
+    PYNINI_AVAILABLE = False
+
 
 class FractionFst(GraphFst):
     """
     Finite state transducer for classifying fraction
+    "23 4/5" ->
+    tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } }
 
     Args:
         deterministic: if True will provide a single transduction option,
             for False multiple transduction are generated (used for audio-based normalization)
     """
 
-    def __init__(self, deterministic: bool = True):
+    def __init__(self, cardinal, deterministic: bool = True):
         super().__init__(name="fraction", kind="classify", deterministic=deterministic)
-        # integer_part # numerator # denominator
+        cardinal_graph = cardinal.graph
+
+        integer = pynutil.insert("integer: \"") + cardinal_graph + pynutil.insert("\"") + pynini.accep(" ")
+        numerator = (
+            pynutil.insert("numerator: \"") + cardinal_graph + (pynini.cross("/", "\" ") | pynini.cross(" / ", "\" "))
+        )
+        denominator = pynutil.insert("denominator: \"") + cardinal_graph + pynutil.insert("\"")
+
+        self.graph = pynini.closure(integer, 0, 1) + numerator + denominator
+        final_graph = self.add_tokens(self.graph)
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/text_normalization/taggers/measure.py b/nemo_text_processing/text_normalization/taggers/measure.py
index b32fd27dc554..a0475c5b10cb 100644
--- a/nemo_text_processing/text_normalization/taggers/measure.py
+++ b/nemo_text_processing/text_normalization/taggers/measure.py
@@ -43,11 +43,12 @@ class MeasureFst(GraphFst):
     Args:
         cardinal: CardinalFst
         decimal: DecimalFst
+        fraction: FractionFst
         deterministic: if True will provide a single transduction option,
             for False multiple transduction are generated (used for audio-based normalization)
     """
 
-    def __init__(self, cardinal: GraphFst, decimal: GraphFst, deterministic: bool = True):
+    def __init__(self, cardinal: GraphFst, decimal: GraphFst, fraction: GraphFst, deterministic: bool = True):
         super().__init__(name="measure", kind="classify", deterministic=deterministic)
         cardinal_graph = cardinal.graph
 
@@ -144,6 +145,10 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst, deterministic: bool =
             + pynutil.insert(" } preserve_order: true")
         )
 
+        subgraph_fraction = (
+            pynutil.insert("fraction { ") + fraction.graph + delete_space + pynutil.insert(" } ") + unit_plural
+        )
+
         final_graph = (
             subgraph_decimal
             | subgraph_cardinal
@@ -151,6 +156,7 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst, deterministic: bool =
             | alpha_dash_cardinal
             | decimal_dash_alpha
             | alpha_dash_decimal
+            | subgraph_fraction
         )
         final_graph = self.add_tokens(final_graph)
         self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/text_normalization/taggers/ordinal.py b/nemo_text_processing/text_normalization/taggers/ordinal.py
index 2df87d3b5c90..49e452729dcc 100644
--- a/nemo_text_processing/text_normalization/taggers/ordinal.py
+++ b/nemo_text_processing/text_normalization/taggers/ordinal.py
@@ -39,9 +39,10 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
         super().__init__(name="ordinal", kind="classify", deterministic=deterministic)
 
         cardinal_graph = cardinal.graph
+        endings = ["rd", "th", "st", "nd"]
+        endings += [x.upper() for x in endings]
         self.graph = (
-            (pynini.closure(NEMO_DIGIT | pynini.accep(",")) + pynutil.delete(pynini.union("rd", "th", "st", "nd")))
-            @ cardinal_graph
+            (pynini.closure(NEMO_DIGIT | pynini.accep(",")) + pynutil.delete(pynini.union(*endings))) @ cardinal_graph
         ).optimize()
         final_graph = pynutil.insert("integer: \"") + self.graph + pynutil.insert("\"")
         final_graph = self.add_tokens(final_graph)
diff --git a/nemo_text_processing/text_normalization/taggers/roman.py b/nemo_text_processing/text_normalization/taggers/roman.py
new file mode 100644
index 000000000000..136d0fb52063
--- /dev/null
+++ b/nemo_text_processing/text_normalization/taggers/roman.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from nemo_text_processing.text_normalization.data_loader_utils import load_labels
+from nemo_text_processing.text_normalization.graph_utils import GraphFst, get_abs_path, insert_space
+from nemo_text_processing.text_normalization.taggers.cardinal import CardinalFst
+
+try:
+    import pynini
+    from pynini.lib import pynutil
+
+    PYNINI_AVAILABLE = True
+except (ModuleNotFoundError, ImportError):
+    PYNINI_AVAILABLE = False
+
+
+class RomanFst(GraphFst):
+    """
+    Finite state transducer for classifying electronic: as URLs, email addresses, etc.
+        e.g. cdf1@abc.edu -> tokens { electronic { username: "cdf1" domain: "abc.edu" } }
+
+    Args:
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, deterministic: bool = True):
+        super().__init__(name="roman", kind="classify", deterministic=deterministic)
+
+        def _load_roman(file: str):
+            roman = load_labels(get_abs_path(file))
+            roman_numerals = [(x, y) for x, y in roman] + [(x.upper(), y) for x, y in roman]
+            return pynini.string_map(roman_numerals)
+
+        cardinal_graph = CardinalFst(deterministic=True).graph
+        digit_teen = _load_roman("data/roman/digit_teen.tsv") @ cardinal_graph
+        ties = _load_roman("data/roman/ties.tsv") @ cardinal_graph
+        hundreds = _load_roman("data/roman/hundreds.tsv") @ cardinal_graph
+
+        graph = (
+            (ties | digit_teen | hundreds)
+            | (ties + insert_space + digit_teen)
+            | (hundreds + pynini.closure(insert_space + ties, 0, 1) + pynini.closure(insert_space + digit_teen, 0, 1))
+        ).optimize()
+
+        graph = pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
+        graph = self.add_tokens(graph)
+        self.fst = graph.optimize()
diff --git a/nemo_text_processing/text_normalization/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/taggers/tokenize_and_classify.py
index 9e15f12ad937..1bef666a11f5 100644
--- a/nemo_text_processing/text_normalization/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/taggers/tokenize_and_classify.py
@@ -18,10 +18,12 @@
 from nemo_text_processing.text_normalization.taggers.date import DateFst
 from nemo_text_processing.text_normalization.taggers.decimal import DecimalFst
 from nemo_text_processing.text_normalization.taggers.electronic import ElectronicFst
+from nemo_text_processing.text_normalization.taggers.fraction import FractionFst
 from nemo_text_processing.text_normalization.taggers.measure import MeasureFst
 from nemo_text_processing.text_normalization.taggers.money import MoneyFst
 from nemo_text_processing.text_normalization.taggers.ordinal import OrdinalFst
 from nemo_text_processing.text_normalization.taggers.punctuation import PunctuationFst
+from nemo_text_processing.text_normalization.taggers.roman import RomanFst
 from nemo_text_processing.text_normalization.taggers.telephone import TelephoneFst
 from nemo_text_processing.text_normalization.taggers.time import TimeFst
 from nemo_text_processing.text_normalization.taggers.whitelist import WhiteListFst
@@ -59,8 +61,10 @@ def __init__(self, input_case: str, deterministic: bool = True):
 
         decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic)
         decimal_graph = decimal.fst
+        fraction = FractionFst(deterministic=deterministic, cardinal=cardinal)
+        fraction_graph = fraction.fst
 
-        measure = MeasureFst(cardinal=cardinal, decimal=decimal, deterministic=deterministic)
+        measure = MeasureFst(cardinal=cardinal, decimal=decimal, fraction=fraction, deterministic=deterministic)
         measure_graph = measure.fst
         date_graph = DateFst(cardinal=cardinal, deterministic=deterministic).fst
         word_graph = WordFst(deterministic=deterministic).fst
@@ -82,9 +86,15 @@ def __init__(self, input_case: str, deterministic: bool = True):
             | pynutil.add_weight(money_graph, 1.1)
             | pynutil.add_weight(telephone_graph, 1.1)
             | pynutil.add_weight(electonic_graph, 1.1)
+            | pynutil.add_weight(fraction_graph, 1.1)
             | pynutil.add_weight(word_graph, 100)
         )
 
+        if not deterministic:
+            roman_graph = RomanFst(deterministic=deterministic).fst
+            # the weight matches the word_graph weight for "I" cases in long sentences with multiple semiotic tokens
+            classify |= pynutil.add_weight(roman_graph, 100)
+
         punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=1.1) + pynutil.insert(" }")
         token = pynutil.insert("tokens { ") + classify + pynutil.insert(" }")
         token_plus_punct = (
diff --git a/nemo_text_processing/text_normalization/verbalizers/date.py b/nemo_text_processing/text_normalization/verbalizers/date.py
index 7d7f0479f929..08195927df5e 100644
--- a/nemo_text_processing/text_normalization/verbalizers/date.py
+++ b/nemo_text_processing/text_normalization/verbalizers/date.py
@@ -45,19 +45,21 @@ def __init__(self, ordinal: GraphFst, deterministic: bool = True):
         super().__init__(name="date", kind="verbalize", deterministic=deterministic)
 
         month = pynini.closure(NEMO_NOT_QUOTE, 1)
+        day_cardinal = (
+            pynutil.delete("day:")
+            + delete_space
+            + pynutil.delete("\"")
+            + pynini.closure(NEMO_NOT_QUOTE, 1)
+            + pynutil.delete("\"")
+        )
+        day = day_cardinal @ ordinal.suffix
 
         if not deterministic:
             month |= pynutil.insert(" of ") + month
+            day |= day_cardinal
 
         month = pynutil.delete("month:") + delete_space + pynutil.delete("\"") + month + pynutil.delete("\"")
 
-        day = (
-            pynutil.delete("day:")
-            + delete_space
-            + pynutil.delete("\"")
-            + pynini.closure(NEMO_NOT_QUOTE, 1)
-            + pynutil.delete("\"")
-        ) @ ordinal.suffix
         year = (
             pynutil.delete("year:")
             + delete_space
diff --git a/nemo_text_processing/text_normalization/verbalizers/fraction.py b/nemo_text_processing/text_normalization/verbalizers/fraction.py
index 74a1a844d12b..d49bd4bf5fc0 100644
--- a/nemo_text_processing/text_normalization/verbalizers/fraction.py
+++ b/nemo_text_processing/text_normalization/verbalizers/fraction.py
@@ -13,12 +13,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.text_normalization.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.graph_utils import NEMO_NOT_QUOTE, GraphFst, insert_space
+from nemo_text_processing.text_normalization.verbalizers.ordinal import OrdinalFst
+
+try:
+    import pynini
+    from pynini.lib import pynutil
+
+    PYNINI_AVAILABLE = True
+except (ModuleNotFoundError, ImportError):
+    PYNINI_AVAILABLE = False
 
 
 class FractionFst(GraphFst):
     """
     Finite state transducer for verbalizing fraction
+        e.g. tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } } ->
+        twenty three four fifth
 
     Args:
         deterministic: if True will provide a single transduction option,
@@ -27,3 +38,35 @@ class FractionFst(GraphFst):
 
     def __init__(self, deterministic: bool = True):
         super().__init__(name="fraction", kind="verbalize", deterministic=deterministic)
+        suffix = OrdinalFst().suffix
+
+        integer = pynutil.delete("integer: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\" ")
+        numerator = pynutil.delete("numerator: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\" ")
+        numerator_one = pynutil.delete("numerator: \"") + pynini.accep("one") + pynutil.delete("\" ")
+        denominator = pynutil.delete("denominator: \"") + (
+            pynini.closure(NEMO_NOT_QUOTE) @ suffix | pynini.cross('four', 'quarter')
+        )
+        conjunction = pynutil.insert("and ")
+        if not deterministic:
+            conjunction = pynini.closure(conjunction, 0, 1)
+
+        integer = pynini.closure(integer + insert_space + conjunction, 0, 1)
+
+        denominator_half = pynini.cross("numerator: \"one\" denominator: \"two\"", "a half")
+        denominator_one_two = pynini.cross("denominator: \"one\"", "over one") | pynini.cross(
+            "denominator: \"two\"", "halves"
+        )
+        fraction_default = pynutil.add_weight(
+            numerator + insert_space + denominator + pynutil.insert("s") + pynutil.delete("\""), 0.001
+        )
+        fraction_with_one = pynutil.add_weight(
+            numerator_one + insert_space + denominator + pynutil.delete("\""), 0.0001
+        )
+
+        graph = integer + denominator_half | (fraction_with_one | fraction_default)
+        graph |= pynini.cross("numerator: \"one\" denominator: \"two\"", "one half")
+        graph |= (numerator | numerator_one) + insert_space + denominator_one_two
+
+        self.graph = graph
+        delete_tokens = self.delete_tokens(self.graph)
+        self.fst = delete_tokens.optimize()
diff --git a/nemo_text_processing/text_normalization/verbalizers/measure.py b/nemo_text_processing/text_normalization/verbalizers/measure.py
index 0718f42374b8..3937efe26c48 100644
--- a/nemo_text_processing/text_normalization/verbalizers/measure.py
+++ b/nemo_text_processing/text_normalization/verbalizers/measure.py
@@ -34,11 +34,12 @@ class MeasureFst(GraphFst):
     Args:
         decimal: DecimalFst
         cardinal: CardinalFst
+        fraction: FractionFst
         deterministic: if True will provide a single transduction option,
             for False multiple transduction are generated (used for audio-based normalization)
     """
 
-    def __init__(self, decimal: GraphFst, cardinal: GraphFst, deterministic: bool = True):
+    def __init__(self, decimal: GraphFst, cardinal: GraphFst, fraction: GraphFst, deterministic: bool = True):
         super().__init__(name="measure", kind="verbalize", deterministic=deterministic)
         optional_sign = cardinal.optional_sign
         unit = pynutil.delete("units: \"") + pynini.closure(NEMO_CHAR - " ", 1) + pynutil.delete("\"") + delete_space
@@ -61,7 +62,12 @@ def __init__(self, decimal: GraphFst, cardinal: GraphFst, deterministic: bool =
             + delete_space
             + pynutil.delete("}")
         )
-        graph = (graph_cardinal | graph_decimal) + delete_space + insert_space + unit
+
+        graph_fraction = (
+            pynutil.delete("fraction {") + delete_space + fraction.graph + delete_space + pynutil.delete("}")
+        )
+
+        graph = (graph_cardinal | graph_decimal | graph_fraction) + delete_space + insert_space + unit
 
         # SH adds "preserve_order: true" by default
         preserve_order = pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
diff --git a/nemo_text_processing/text_normalization/verbalizers/roman.py b/nemo_text_processing/text_normalization/verbalizers/roman.py
new file mode 100644
index 000000000000..bb42f3c52294
--- /dev/null
+++ b/nemo_text_processing/text_normalization/verbalizers/roman.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.text_normalization.graph_utils import NEMO_NOT_QUOTE, GraphFst
+from nemo_text_processing.text_normalization.verbalizers.ordinal import OrdinalFst
+
+try:
+    import pynini
+    from pynini.lib import pynutil
+
+    PYNINI_AVAILABLE = True
+except (ModuleNotFoundError, ImportError):
+    PYNINI_AVAILABLE = False
+
+
+class RomanFst(GraphFst):
+    """
+    Finite state transducer for verbalizing roman numerals
+        e.g. tokens { roman { integer: "one" } } -> one
+
+    Args:
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, deterministic: bool = True):
+        super().__init__(name="roman", kind="verbalize", deterministic=deterministic)
+        suffix = OrdinalFst().suffix
+
+        integer = pynini.closure(NEMO_NOT_QUOTE)
+        integer |= pynini.closure(pynutil.insert("the "), 0, 1) + integer @ suffix
+        graph = pynutil.delete("integer: \"") + integer + pynutil.delete("\"")
+        delete_tokens = self.delete_tokens(graph)
+        self.fst = delete_tokens.optimize()
diff --git a/nemo_text_processing/text_normalization/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/verbalizers/verbalize.py
index 04e01e8a0d5a..b14468a8f088 100644
--- a/nemo_text_processing/text_normalization/verbalizers/verbalize.py
+++ b/nemo_text_processing/text_normalization/verbalizers/verbalize.py
@@ -18,9 +18,11 @@
 from nemo_text_processing.text_normalization.verbalizers.date import DateFst
 from nemo_text_processing.text_normalization.verbalizers.decimal import DecimalFst
 from nemo_text_processing.text_normalization.verbalizers.electronic import ElectronicFst
+from nemo_text_processing.text_normalization.verbalizers.fraction import FractionFst
 from nemo_text_processing.text_normalization.verbalizers.measure import MeasureFst
 from nemo_text_processing.text_normalization.verbalizers.money import MoneyFst
 from nemo_text_processing.text_normalization.verbalizers.ordinal import OrdinalFst
+from nemo_text_processing.text_normalization.verbalizers.roman import RomanFst
 from nemo_text_processing.text_normalization.verbalizers.telephone import TelephoneFst
 from nemo_text_processing.text_normalization.verbalizers.time import TimeFst
 from nemo_text_processing.text_normalization.verbalizers.whitelist import WhiteListFst
@@ -45,14 +47,17 @@ def __init__(self, deterministic: bool = True):
         decimal_graph = decimal.fst
         ordinal = OrdinalFst(deterministic=deterministic)
         ordinal_graph = ordinal.fst
+        fraction = FractionFst(deterministic=deterministic)
+        fraction_graph = fraction.fst
         telephone_graph = TelephoneFst(deterministic=deterministic).fst
         electronic_graph = ElectronicFst(deterministic=deterministic).fst
-        measure = MeasureFst(decimal=decimal, cardinal=cardinal, deterministic=deterministic)
+        measure = MeasureFst(decimal=decimal, cardinal=cardinal, fraction=fraction, deterministic=deterministic)
         measure_graph = measure.fst
         time_graph = TimeFst(deterministic=deterministic).fst
         date_graph = DateFst(ordinal=ordinal, deterministic=deterministic).fst
         money_graph = MoneyFst(decimal=decimal, deterministic=deterministic).fst
         whitelist_graph = WhiteListFst(deterministic=deterministic).fst
+
         graph = (
             time_graph
             | date_graph
@@ -63,7 +68,12 @@ def __init__(self, deterministic: bool = True):
             | cardinal_graph
             | telephone_graph
             | electronic_graph
+            | fraction_graph
             | whitelist_graph
         )
 
+        if not deterministic:
+            roman_graph = RomanFst(deterministic=deterministic).fst
+            graph |= roman_graph
+
         self.fst = graph
diff --git a/tests/nemo_text_processing/data_text_normalization/test_cases_cardinal.txt b/tests/nemo_text_processing/data_text_normalization/test_cases_cardinal.txt
index 9dcb6805db0f..3d505b2ce447 100644
--- a/tests/nemo_text_processing/data_text_normalization/test_cases_cardinal.txt
+++ b/tests/nemo_text_processing/data_text_normalization/test_cases_cardinal.txt
@@ -15,3 +15,4 @@ C24~C two four
 W2s~W two s
 1-4-a-b-1-5~one four a b one five
 b-c-1-5-b-s-b~b c one five b s b
+1/f-4s~one f four s
diff --git a/tests/nemo_text_processing/data_text_normalization/test_cases_date.txt b/tests/nemo_text_processing/data_text_normalization/test_cases_date.txt
index 35b33cd1a04d..b5f95f466959 100644
--- a/tests/nemo_text_processing/data_text_normalization/test_cases_date.txt
+++ b/tests/nemo_text_processing/data_text_normalization/test_cases_date.txt
@@ -1,6 +1,7 @@
 july 25 2012~july twenty fifth twenty twelve
 jul 25 2012~july twenty fifth twenty twelve
 1980s~nineteen eighties
+1980 s~nineteen eighties
 25 july 2012~the twenty fifth of july twenty twelve
 25 jul 2012~the twenty fifth of july twenty twelve
 22 july 2012~the twenty second of july twenty twelve
@@ -28,3 +29,4 @@ august 23, 2002~august twenty third two thousand two
 1910s~nineteen tens
 25 sept.~the twenty fifth of september
 1000~one thousand
+SEPT. 15TH~september fifteenth
diff --git a/tests/nemo_text_processing/data_text_normalization/test_cases_fraction.txt b/tests/nemo_text_processing/data_text_normalization/test_cases_fraction.txt
new file mode 100644
index 000000000000..15d32caa9b55
--- /dev/null
+++ b/tests/nemo_text_processing/data_text_normalization/test_cases_fraction.txt
@@ -0,0 +1,11 @@
+1/2007~one two thousand seventh
+12639/12640~twelve thousand six hundred thirty nine twelve thousand six hundred fortieths
+2/4~two quarters
+1/4~one quarter
+31/32~thirty one thirty seconds
+22/3~twenty two thirds
+1/3~one third
+142/1~one hundred forty two over one
+1/2~one half
+2 1/2~two and a half
+1795 / 1805~one thousand seven hundred ninety five one thousand eight hundred fifths
\ No newline at end of file
diff --git a/tests/nemo_text_processing/data_text_normalization/test_cases_measure.txt b/tests/nemo_text_processing/data_text_normalization/test_cases_measure.txt
index 6f395da8679f..3279e6863cc0 100644
--- a/tests/nemo_text_processing/data_text_normalization/test_cases_measure.txt
+++ b/tests/nemo_text_processing/data_text_normalization/test_cases_measure.txt
@@ -12,3 +12,4 @@ covid-19.5~covid nineteen point five
 covid-19~covid nineteen
 a 4-kilogram bag~a four kilogram bag
 7.2-millimeter bullet~seven point two millimeter bullet
+4 1/2 lbs~four and a half pounds
diff --git a/tests/nemo_text_processing/data_text_normalization/test_cases_normalize_with_audio.txt b/tests/nemo_text_processing/data_text_normalization/test_cases_normalize_with_audio.txt
index 800a46038d0f..436bff19205f 100644
--- a/tests/nemo_text_processing/data_text_normalization/test_cases_normalize_with_audio.txt
+++ b/tests/nemo_text_processing/data_text_normalization/test_cases_normalize_with_audio.txt
@@ -49,7 +49,7 @@ It seemed to her that the jacket Oswald wore was darker than Commission Exhibit
 It seemed to her that the jacket Oswald wore was darker than Commission Exhibit number one hundred and sixty two.
 It seemed to her that the jacket Oswald wore was darker than Commission Exhibit number one six two.
 ~"Father, let this cup pass." He prayed--was heard. What cup was it that passed away from him? Sure not the death-cup, now filled to the brim! There was no quailing in the awful word; He still was king of kings, of lords the lord:-- He feared lest, in the suffering waste and grim, His faith might grow too faint and sickly dim."
-"Father, let this cup pass." He prayed--was heard. What cup was it that passed away from him? Sure not the death-cup, now filled to the brim! There was no quailing in the awful word; He still was king of kings, of lords the lord:-- He feared lest, in the suffering waste and grim, His faith might grow too faint and sickly dim."
+"Father, let this cup pass." He prayed -- was heard. What cup was it that passed away from him? Sure not the death-cup, now filled to the brim! There was no quailing in the awful word; He still was king of kings, of lords the lord: -- He feared lest, in the suffering waste and grim, His faith might grow too faint and sickly dim."
 ~1970-2010
 nineteen seventy to twenty ten
 one thousand nine seventy to two thousand ten
@@ -97,4 +97,11 @@ one dollar and zero one cents
 ~$17.31
 seventeen dollars and thirty one cent
 seventeen dollars and thirty one cents
-seventeen point three one dollars
\ No newline at end of file
+seventeen point three one dollars
+~25.]
+two five.]
+twenty five.]
+~Francis I--test
+Francis the first -- test
+Francis one -- test
+Francis first -- test
\ No newline at end of file
diff --git a/tests/nemo_text_processing/test_boundary.py b/tests/nemo_text_processing/test_boundary.py
index 75760fd9ebcf..907423005091 100644
--- a/tests/nemo_text_processing/test_boundary.py
+++ b/tests/nemo_text_processing/test_boundary.py
@@ -34,6 +34,6 @@ def test_norm(self, test_input, expected):
         pred = self.normalizer.normalize(test_input, verbose=False)
         assert pred == expected
         pred_non_deterministic = self.normalizer_with_audio.normalize(
-            test_input, n_tagged=100, punct_post_process=False
+            test_input, n_tagged=100, punct_pre_process=False, punct_post_process=False
         )
         assert expected in pred_non_deterministic
diff --git a/tests/nemo_text_processing/test_fraction.py b/tests/nemo_text_processing/test_fraction.py
new file mode 100644
index 000000000000..24006c43cce5
--- /dev/null
+++ b/tests/nemo_text_processing/test_fraction.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
+from parameterized import parameterized
+from utils import PYNINI_AVAILABLE, parse_test_case_file
+
+
+class TestFraction:
+    normalizer = Normalizer(input_case="cased") if PYNINI_AVAILABLE else None
+    normalizer_with_audio = NormalizerWithAudio(input_case='cased') if PYNINI_AVAILABLE else None
+
+    @parameterized.expand(parse_test_case_file('data_text_normalization/test_cases_fraction.txt'))
+    @pytest.mark.skipif(
+        not PYNINI_AVAILABLE, reason="`pynini` not installed, please install via nemo_text_processing/setup.sh"
+    )
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_norm(self, test_input, expected):
+        pred = self.normalizer.normalize(test_input, verbose=False)
+        assert pred == expected
+        pred_non_deterministic = self.normalizer_with_audio.normalize(test_input, n_tagged=100)
+        assert expected in pred_non_deterministic