Merge pull request RVC-Boss#812 from KamioRinn/Optimize-English-G2P

修复中文标点在英文中的问题，添加所有格匹配
KakaruHayate · Mar 19, 2024 · b451372 · b451372
2 parents 06ec491 + e3d3c32
commit b451372
Showing 1 changed file with 19 additions and 5 deletions.
diff --git a/GPT_SoVITS/text/english.py b/GPT_SoVITS/text/english.py
@@ -90,7 +90,7 @@
 
 
 def replace_phs(phs):
-    rep_map = {";": ",", ":": ",", "'": "-", '"': "-"}
+    rep_map = {"'": "-"}
     phs_new = []
     for ph in phs:
         if ph in symbols:
@@ -193,8 +193,18 @@ def get_dict():
 
 def text_normalize(text):
     # todo: eng text normalize
-    # 适配 g2p_en 标点
-    return text.replace(";", ",").replace(":", ",").replace('"', "'")
+    # 适配中文及 g2p_en 标点
+    rep_map = {
+        "[;:：，；]": ",",
+        '["’]': "'",
+        "。": ".",
+        "！": "!",
+        "？": "?",
+    }
+    for p, r in rep_map.items():
+        text = re.sub(p, r, text)
+
+    return text
 
 
 class en_G2p(G2p):
@@ -219,6 +229,12 @@ def predict(self, word):
         if (len(word) <= 3):
             return [phone for w in word for phone in self(w)]
 
+        # 尝试分离所有格
+        if re.match(r"^([a-z]+)('s)$", word):
+            phone = self(word[:-2])
+            phone.extend(['Z'])
+            return phone
+
         # 尝试进行分词，应对复合词
         comps = wordsegment.segment(word.lower())
 
@@ -234,8 +250,6 @@ def predict(self, word):
 
 
 def g2p(text):
-    text = text_normalize(text)
-
     # g2p_en 整段推理，剔除不存在的arpa返回
     phone_list = _g2p(text)
     phones = [ph if ph != "<unk>" else "UNK" for ph in phone_list if ph not in [" ", "<pad>", "UW", "</s>", "<s>"]]