diff --git a/pinyin/model.py b/pinyin/model.py
index 572b0de..0ad30db 100644
--- a/pinyin/model.py
+++ b/pinyin/model.py
@@ -558,7 +558,26 @@ def visitPinyin(self, pinyin):
def visitTonedCharacter(self, tonedcharacter):
# Treat characters like normal text
- self.visitText(tonedcharacter)
+ return self.visitText(tonedcharacter)
+
+"""
+Attempts to invert formatreadingfordisplay. For use when recovering
+a clean set of tokens from user input.
+"""
+def unformatreadingfordisplay(words):
+ visitor = UnformatReadingForDisplayVisitor()
+ return [word.concatmap(visitor) for word in words]
+
+class UnformatReadingForDisplayVisitor(TokenVisitor):
+ def visitText(self, text):
+ stripped = text.strip()
+ return len(stripped) > 0 and [Text(stripped)] or []
+
+ def visitPinyin(self, pinyin):
+ return [pinyin]
+
+ def visitTonedCharacter(self, tonedcharacter):
+ return [tonedcharacter]
"""
Makes some tokens that faithfully represent the given characters
diff --git a/pinyin/tests/model.py b/pinyin/tests/model.py
index 1a60f33..6f0755e 100644
--- a/pinyin/tests/model.py
+++ b/pinyin/tests/model.py
@@ -355,6 +355,9 @@ def testSimpleErhuaSingleton(self):
def testSimpleErhua(self):
self.assertEquals(self.format([Word(Pinyin.parse(u"hen3"), Pinyin.parse(u"ma5"), Pinyin.parse("r5"))]), u"hen3 mar")
+ def testSimpleTonedCharacter(self):
+ self.assertEquals(self.format([Word(TonedCharacter(u"塊", 1))]), u"塊")
+
def testErhuaNextToText(self):
self.assertEquals(self.format([Word(Text("not pinyin"), Pinyin.parse(u"r5"))]), u"not pinyin r")
@@ -368,6 +371,17 @@ def format(self, what):
def reading(self, what):
return self.format(englishdict.reading(what))
+class UnformatReadingForDisplayTest(unittest.TestCase):
+ def testNoUnformatting(self):
+ self.assertEquals(self.unformat([Word(Text("not pinyin"), Pinyin.parse(u"ni3"), TonedCharacter(u"一", 1))]), u"not pinyinni3一")
+
+ def testUnformatting(self):
+ self.assertEquals(self.unformat([Word(Pinyin.parse(u"ni3"), Text(" "), Pinyin.parse(u"hao3"), Text("\ttons more junk!! "), )]), u"ni3hao3tons more junk!!")
+
+ # Test helpers
+ def unformat(self, what):
+ return flatten(unformatreadingfordisplay(what))
+
class PinyinTonifierTest(unittest.TestCase):
def testEasy(self):
self.assertEquals(PinyinTonifier().tonify(u"Han4zi4 bu4 mie4, Zhong1guo2 bi4 wang2!"),
diff --git a/pinyin/tests/updatergraph.py b/pinyin/tests/updatergraph.py
index eaec521..87ba129 100644
--- a/pinyin/tests/updatergraph.py
+++ b/pinyin/tests/updatergraph.py
@@ -61,7 +61,7 @@ def testPreferUpdatersWhichUseChangedField(self):
graph = filledgraphforupdaters(updaters, { field : "", other_field : "present!", "output" : "" }, { field : "go" })
yield assert_equal, graph["output"][1](), "from " + field
-class TestUpdaterGraphUpdaters(unittest.TestCase):
+class TestUpdaterGraphUpdaters(object):
def testEverythingEnglish(self):
config = dict(prefersimptrad = "simp", forceexpressiontobesimptrad = False, tonedisplay = "tonified", hanzimasking = False,
emphasisemainmeaning = False, meaningnumbering = "circledChinese", colormeaningnumbers = False, meaningseperator = "lines",
@@ -236,6 +236,15 @@ def testUpdateReadingAndColoredHanziAndAudioWithSandhi(self):
def testUpdateSimplifiedTraditionalDoesNothingIfSimpTradIdentical(self):
self.assertProduces({ "expression" : u"鼠" }, {}, { "simp" : u"", "trad" : u"" })
+ def testUpdateColoredCharactersFromReading(self):
+ config = dict(colorizedcharactergeneration = True, tonecolors = [u"#ff0000", u"#ffaa00", u"#00aa00", u"#0000ff", u"#545454"])
+
+ for reading in [u"chi1 fan1", u"chī fān", u'chī fān']:
+ yield (lambda reading: self.assertProduces({ "reading" : reading, "expression" : u"吃饭" }, config, {
+ "reading" : reading,
+ "color" : u'吃饭'
+ }), reading)
+
def assertProduces(self, known, configdict, expected, mediapacks=None, notifierassertion=None):
if mediapacks == None:
mediapacks = [media.MediaPack("Test", { "shu1.mp3" : "shu1.mp3", "shu1.ogg" : "shu1.ogg",
diff --git a/pinyin/updatergraph.py b/pinyin/updatergraph.py
index dfa1536..0653aa0 100644
--- a/pinyin/updatergraph.py
+++ b/pinyin/updatergraph.py
@@ -25,6 +25,9 @@ def preparetokens(config, tokens):
return model.flatten(tokens, tonify=config.shouldtonify)
+def unpreparetokens(flat):
+ return [model.Word(*model.tokenize(striphtml(flat)))]
+
def generateaudio(notifier, mediamanager, config, dictreading):
mediapacks = mediamanager.discovermediapacks()
if len(mediapacks) == 0:
@@ -109,8 +112,8 @@ def __init__(self, notifier, mediamanager, config):
("simptrad", self.expression2simptrad, ("expression",)),
("trad", lambda x: x["simp"] != x["trad"] and x["trad"] or "", ("simptrad",)),
("simp", lambda x: x["simp"] != x["trad"] and x["simp"] or "", ("simptrad",)),
- ("expression", lambda x: x, ["simp"]),
- ("expression", lambda x: x, ["trad"]),
+ ("expression", lambda x: x, ("simp",)),
+ ("expression", lambda x: x, ("trad",)),
("dictmeaningsmwssource", self.expression2dictmeaningsmwssource, ("expression",)),
("dictmeaningsmws", fst, ("dictmeaningsmwssource",)),
@@ -266,7 +269,7 @@ def dictreading2reading(self, dictreading):
return preparetokens(self.config, model.formatreadingfordisplay(dictreading)).lower()
def reading2dictreading(self, reading):
- return [model.Word(*model.tokenize(reading))]
+ return model.unformatreadingfordisplay(unpreparetokens(reading))
def expressiondictreading2color(self, expression, dictreading):
return model.flatten(transformations.colorize(self.config.tonecolors, model.tonedcharactersfromreading(expression, dictreading)))