Skip to content

Commit

Permalink
Merge branch 'r1.0.0rc1' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
okuchaiev committed Feb 26, 2021
2 parents b43a20a + 1f51f83 commit efaab9e
Showing 1 changed file with 3 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def eval_epoch_end(self, outputs, mode):
translations = list(itertools.chain(*[x['translations'] for x in outputs]))
ground_truths = list(itertools.chain(*[x['ground_truths'] for x in outputs]))

detokenizer = self.get_detokenizer(self.tgt_language)
detokenizer = self.get_detokenizer(self.src_language, self.tgt_language)

translations = [detokenizer.detokenize(sent.split()) for sent in translations]
ground_truths = [detokenizer.detokenize(sent.split()) for sent in ground_truths]
Expand Down Expand Up @@ -357,7 +357,7 @@ def get_normalizer_and_tokenizer(self, source_lang, target_lang):
"""
Returns a normalizer and tokenizer for the source language.
"""
if source_lang in ['en', 'ja'] and target_lang in ['en', 'ja']:
if (source_lang == 'en' and target_lang == 'ja') or (source_lang == 'ja' and target_lang == 'en'):
normalizer = MosesPunctNormalizer(
lang=source_lang, pre_replace_unicode_punct=True, post_remove_control_chars=True
)
Expand All @@ -375,7 +375,7 @@ def get_detokenizer(self, source_lang, target_lang):
"""
Returns a detokenizer for a specific target language.
"""
if source_lang in ['en', 'ja'] and target_lang in ['en', 'ja']:
if (source_lang == 'en' and target_lang == 'ja') or (source_lang == 'ja' and target_lang == 'en'):
detokenizer = EnJaDetokenizer(target_lang)
elif target_lang == 'zh':
detokenizer = ChineseDetokenizer()
Expand Down

0 comments on commit efaab9e

Please sign in to comment.