diff --git a/pinyin/model.py b/pinyin/model.py index 572b0de..0ad30db 100644 --- a/pinyin/model.py +++ b/pinyin/model.py @@ -558,7 +558,26 @@ def visitPinyin(self, pinyin): def visitTonedCharacter(self, tonedcharacter): # Treat characters like normal text - self.visitText(tonedcharacter) + return self.visitText(tonedcharacter) + +""" +Attempts to invert formatreadingfordisplay. For use when recovering +a clean set of tokens from user input. +""" +def unformatreadingfordisplay(words): + visitor = UnformatReadingForDisplayVisitor() + return [word.concatmap(visitor) for word in words] + +class UnformatReadingForDisplayVisitor(TokenVisitor): + def visitText(self, text): + stripped = text.strip() + return len(stripped) > 0 and [Text(stripped)] or [] + + def visitPinyin(self, pinyin): + return [pinyin] + + def visitTonedCharacter(self, tonedcharacter): + return [tonedcharacter] """ Makes some tokens that faithfully represent the given characters diff --git a/pinyin/tests/model.py b/pinyin/tests/model.py index 1a60f33..6f0755e 100644 --- a/pinyin/tests/model.py +++ b/pinyin/tests/model.py @@ -355,6 +355,9 @@ def testSimpleErhuaSingleton(self): def testSimpleErhua(self): self.assertEquals(self.format([Word(Pinyin.parse(u"hen3"), Pinyin.parse(u"ma5"), Pinyin.parse("r5"))]), u"hen3 mar") + def testSimpleTonedCharacter(self): + self.assertEquals(self.format([Word(TonedCharacter(u"塊", 1))]), u"塊") + def testErhuaNextToText(self): self.assertEquals(self.format([Word(Text("not pinyin"), Pinyin.parse(u"r5"))]), u"not pinyin r") @@ -368,6 +371,17 @@ def format(self, what): def reading(self, what): return self.format(englishdict.reading(what)) +class UnformatReadingForDisplayTest(unittest.TestCase): + def testNoUnformatting(self): + self.assertEquals(self.unformat([Word(Text("not pinyin"), Pinyin.parse(u"ni3"), TonedCharacter(u"一", 1))]), u"not pinyinni3一") + + def testUnformatting(self): + self.assertEquals(self.unformat([Word(Pinyin.parse(u"ni3"), Text(" "), Pinyin.parse(u"hao3"), Text("\ttons more junk!! "), )]), u"ni3hao3tons more junk!!") + + # Test helpers + def unformat(self, what): + return flatten(unformatreadingfordisplay(what)) + class PinyinTonifierTest(unittest.TestCase): def testEasy(self): self.assertEquals(PinyinTonifier().tonify(u"Han4zi4 bu4 mie4, Zhong1guo2 bi4 wang2!"), diff --git a/pinyin/tests/updatergraph.py b/pinyin/tests/updatergraph.py index eaec521..87ba129 100644 --- a/pinyin/tests/updatergraph.py +++ b/pinyin/tests/updatergraph.py @@ -61,7 +61,7 @@ def testPreferUpdatersWhichUseChangedField(self): graph = filledgraphforupdaters(updaters, { field : "", other_field : "present!", "output" : "" }, { field : "go" }) yield assert_equal, graph["output"][1](), "from " + field -class TestUpdaterGraphUpdaters(unittest.TestCase): +class TestUpdaterGraphUpdaters(object): def testEverythingEnglish(self): config = dict(prefersimptrad = "simp", forceexpressiontobesimptrad = False, tonedisplay = "tonified", hanzimasking = False, emphasisemainmeaning = False, meaningnumbering = "circledChinese", colormeaningnumbers = False, meaningseperator = "lines", @@ -236,6 +236,15 @@ def testUpdateReadingAndColoredHanziAndAudioWithSandhi(self): def testUpdateSimplifiedTraditionalDoesNothingIfSimpTradIdentical(self): self.assertProduces({ "expression" : u"鼠" }, {}, { "simp" : u"", "trad" : u"" }) + def testUpdateColoredCharactersFromReading(self): + config = dict(colorizedcharactergeneration = True, tonecolors = [u"#ff0000", u"#ffaa00", u"#00aa00", u"#0000ff", u"#545454"]) + + for reading in [u"chi1 fan1", u"chī fān", u'chī fān']: + yield (lambda reading: self.assertProduces({ "reading" : reading, "expression" : u"吃饭" }, config, { + "reading" : reading, + "color" : u'' + }), reading) + def assertProduces(self, known, configdict, expected, mediapacks=None, notifierassertion=None): if mediapacks == None: mediapacks = [media.MediaPack("Test", { "shu1.mp3" : "shu1.mp3", "shu1.ogg" : "shu1.ogg", diff --git a/pinyin/updatergraph.py b/pinyin/updatergraph.py index dfa1536..0653aa0 100644 --- a/pinyin/updatergraph.py +++ b/pinyin/updatergraph.py @@ -25,6 +25,9 @@ def preparetokens(config, tokens): return model.flatten(tokens, tonify=config.shouldtonify) +def unpreparetokens(flat): + return [model.Word(*model.tokenize(striphtml(flat)))] + def generateaudio(notifier, mediamanager, config, dictreading): mediapacks = mediamanager.discovermediapacks() if len(mediapacks) == 0: @@ -109,8 +112,8 @@ def __init__(self, notifier, mediamanager, config): ("simptrad", self.expression2simptrad, ("expression",)), ("trad", lambda x: x["simp"] != x["trad"] and x["trad"] or "", ("simptrad",)), ("simp", lambda x: x["simp"] != x["trad"] and x["simp"] or "", ("simptrad",)), - ("expression", lambda x: x, ["simp"]), - ("expression", lambda x: x, ["trad"]), + ("expression", lambda x: x, ("simp",)), + ("expression", lambda x: x, ("trad",)), ("dictmeaningsmwssource", self.expression2dictmeaningsmwssource, ("expression",)), ("dictmeaningsmws", fst, ("dictmeaningsmwssource",)), @@ -266,7 +269,7 @@ def dictreading2reading(self, dictreading): return preparetokens(self.config, model.formatreadingfordisplay(dictreading)).lower() def reading2dictreading(self, reading): - return [model.Word(*model.tokenize(reading))] + return model.unformatreadingfordisplay(unpreparetokens(reading)) def expressiondictreading2color(self, expression, dictreading): return model.flatten(transformations.colorize(self.config.tonecolors, model.tonedcharactersfromreading(expression, dictreading)))