Skip to content

Commit

Permalink
Merge pull request RVC-Boss#812 from KamioRinn/Optimize-English-G2P
Browse files Browse the repository at this point in the history
修复中文标点在英文中的问题,添加所有格匹配
  • Loading branch information
RVC-Boss committed Mar 19, 2024
2 parents 06ec491 + e3d3c32 commit b451372
Showing 1 changed file with 19 additions and 5 deletions.
24 changes: 19 additions & 5 deletions GPT_SoVITS/text/english.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@


def replace_phs(phs):
rep_map = {";": ",", ":": ",", "'": "-", '"': "-"}
rep_map = {"'": "-"}
phs_new = []
for ph in phs:
if ph in symbols:
Expand Down Expand Up @@ -193,8 +193,18 @@ def get_dict():

def text_normalize(text):
# todo: eng text normalize
# 适配 g2p_en 标点
return text.replace(";", ",").replace(":", ",").replace('"', "'")
# 适配中文及 g2p_en 标点
rep_map = {
"[;::,;]": ",",
'["’]': "'",
"。": ".",
"!": "!",
"?": "?",
}
for p, r in rep_map.items():
text = re.sub(p, r, text)

return text


class en_G2p(G2p):
Expand All @@ -219,6 +229,12 @@ def predict(self, word):
if (len(word) <= 3):
return [phone for w in word for phone in self(w)]

# 尝试分离所有格
if re.match(r"^([a-z]+)('s)$", word):
phone = self(word[:-2])
phone.extend(['Z'])
return phone

# 尝试进行分词,应对复合词
comps = wordsegment.segment(word.lower())

Expand All @@ -234,8 +250,6 @@ def predict(self, word):


def g2p(text):
text = text_normalize(text)

# g2p_en 整段推理,剔除不存在的arpa返回
phone_list = _g2p(text)
phones = [ph if ph != "<unk>" else "UNK" for ph in phone_list if ph not in [" ", "<pad>", "UW", "</s>", "<s>"]]
Expand Down

0 comments on commit b451372

Please sign in to comment.