Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fix reading -> dictreading to be much smarter, so now we get really b…
…eautiful updates of every dependent generated field by changing the reading
  • Loading branch information
batterseapower committed Dec 6, 2009
1 parent 29e5a20 commit d3559d1
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 5 deletions.
21 changes: 20 additions & 1 deletion pinyin/model.py
Expand Up @@ -558,7 +558,26 @@ def visitPinyin(self, pinyin):


def visitTonedCharacter(self, tonedcharacter): def visitTonedCharacter(self, tonedcharacter):
# Treat characters like normal text # Treat characters like normal text
self.visitText(tonedcharacter) return self.visitText(tonedcharacter)

"""
Attempts to invert formatreadingfordisplay. For use when recovering
a clean set of tokens from user input.
"""
def unformatreadingfordisplay(words):
visitor = UnformatReadingForDisplayVisitor()
return [word.concatmap(visitor) for word in words]

class UnformatReadingForDisplayVisitor(TokenVisitor):
def visitText(self, text):
stripped = text.strip()
return len(stripped) > 0 and [Text(stripped)] or []

def visitPinyin(self, pinyin):
return [pinyin]

def visitTonedCharacter(self, tonedcharacter):
return [tonedcharacter]


""" """
Makes some tokens that faithfully represent the given characters Makes some tokens that faithfully represent the given characters
Expand Down
14 changes: 14 additions & 0 deletions pinyin/tests/model.py
Expand Up @@ -355,6 +355,9 @@ def testSimpleErhuaSingleton(self):
def testSimpleErhua(self): def testSimpleErhua(self):
self.assertEquals(self.format([Word(Pinyin.parse(u"hen3"), Pinyin.parse(u"ma5"), Pinyin.parse("r5"))]), u"hen3 mar") self.assertEquals(self.format([Word(Pinyin.parse(u"hen3"), Pinyin.parse(u"ma5"), Pinyin.parse("r5"))]), u"hen3 mar")


def testSimpleTonedCharacter(self):
self.assertEquals(self.format([Word(TonedCharacter(u"塊", 1))]), u"塊")

def testErhuaNextToText(self): def testErhuaNextToText(self):
self.assertEquals(self.format([Word(Text("not pinyin"), Pinyin.parse(u"r5"))]), u"not pinyin r") self.assertEquals(self.format([Word(Text("not pinyin"), Pinyin.parse(u"r5"))]), u"not pinyin r")


Expand All @@ -368,6 +371,17 @@ def format(self, what):
def reading(self, what): def reading(self, what):
return self.format(englishdict.reading(what)) return self.format(englishdict.reading(what))


class UnformatReadingForDisplayTest(unittest.TestCase):
def testNoUnformatting(self):
self.assertEquals(self.unformat([Word(Text("not pinyin"), Pinyin.parse(u"ni3"), TonedCharacter(u"一", 1))]), u"not pinyinni3一")

def testUnformatting(self):
self.assertEquals(self.unformat([Word(Pinyin.parse(u"ni3"), Text(" "), Pinyin.parse(u"hao3"), Text("\ttons more junk!! "), )]), u"ni3hao3tons more junk!!")

# Test helpers
def unformat(self, what):
return flatten(unformatreadingfordisplay(what))

class PinyinTonifierTest(unittest.TestCase): class PinyinTonifierTest(unittest.TestCase):
def testEasy(self): def testEasy(self):
self.assertEquals(PinyinTonifier().tonify(u"Han4zi4 bu4 mie4, Zhong1guo2 bi4 wang2!"), self.assertEquals(PinyinTonifier().tonify(u"Han4zi4 bu4 mie4, Zhong1guo2 bi4 wang2!"),
Expand Down
11 changes: 10 additions & 1 deletion pinyin/tests/updatergraph.py
Expand Up @@ -61,7 +61,7 @@ def testPreferUpdatersWhichUseChangedField(self):
graph = filledgraphforupdaters(updaters, { field : "", other_field : "present!", "output" : "" }, { field : "go" }) graph = filledgraphforupdaters(updaters, { field : "", other_field : "present!", "output" : "" }, { field : "go" })
yield assert_equal, graph["output"][1](), "from " + field yield assert_equal, graph["output"][1](), "from " + field


class TestUpdaterGraphUpdaters(unittest.TestCase): class TestUpdaterGraphUpdaters(object):
def testEverythingEnglish(self): def testEverythingEnglish(self):
config = dict(prefersimptrad = "simp", forceexpressiontobesimptrad = False, tonedisplay = "tonified", hanzimasking = False, config = dict(prefersimptrad = "simp", forceexpressiontobesimptrad = False, tonedisplay = "tonified", hanzimasking = False,
emphasisemainmeaning = False, meaningnumbering = "circledChinese", colormeaningnumbers = False, meaningseperator = "lines", emphasisemainmeaning = False, meaningnumbering = "circledChinese", colormeaningnumbers = False, meaningseperator = "lines",
Expand Down Expand Up @@ -236,6 +236,15 @@ def testUpdateReadingAndColoredHanziAndAudioWithSandhi(self):
def testUpdateSimplifiedTraditionalDoesNothingIfSimpTradIdentical(self): def testUpdateSimplifiedTraditionalDoesNothingIfSimpTradIdentical(self):
self.assertProduces({ "expression" : u"鼠" }, {}, { "simp" : u"", "trad" : u"" }) self.assertProduces({ "expression" : u"鼠" }, {}, { "simp" : u"", "trad" : u"" })


def testUpdateColoredCharactersFromReading(self):
config = dict(colorizedcharactergeneration = True, tonecolors = [u"#ff0000", u"#ffaa00", u"#00aa00", u"#0000ff", u"#545454"])

for reading in [u"chi1 fan1", u"chī fān", u'<span style="color:#ff0000">chī</span> <span style="color:#ff0000">fān</span>']:
yield (lambda reading: self.assertProduces({ "reading" : reading, "expression" : u"吃饭" }, config, {
"reading" : reading,
"color" : u'<span style="color:#ff0000">吃</span><span style="color:#ff0000">饭</span>'
}), reading)

def assertProduces(self, known, configdict, expected, mediapacks=None, notifierassertion=None): def assertProduces(self, known, configdict, expected, mediapacks=None, notifierassertion=None):
if mediapacks == None: if mediapacks == None:
mediapacks = [media.MediaPack("Test", { "shu1.mp3" : "shu1.mp3", "shu1.ogg" : "shu1.ogg", mediapacks = [media.MediaPack("Test", { "shu1.mp3" : "shu1.mp3", "shu1.ogg" : "shu1.ogg",
Expand Down
9 changes: 6 additions & 3 deletions pinyin/updatergraph.py
Expand Up @@ -25,6 +25,9 @@ def preparetokens(config, tokens):


return model.flatten(tokens, tonify=config.shouldtonify) return model.flatten(tokens, tonify=config.shouldtonify)


def unpreparetokens(flat):
return [model.Word(*model.tokenize(striphtml(flat)))]

def generateaudio(notifier, mediamanager, config, dictreading): def generateaudio(notifier, mediamanager, config, dictreading):
mediapacks = mediamanager.discovermediapacks() mediapacks = mediamanager.discovermediapacks()
if len(mediapacks) == 0: if len(mediapacks) == 0:
Expand Down Expand Up @@ -109,8 +112,8 @@ def __init__(self, notifier, mediamanager, config):
("simptrad", self.expression2simptrad, ("expression",)), ("simptrad", self.expression2simptrad, ("expression",)),
("trad", lambda x: x["simp"] != x["trad"] and x["trad"] or "", ("simptrad",)), ("trad", lambda x: x["simp"] != x["trad"] and x["trad"] or "", ("simptrad",)),
("simp", lambda x: x["simp"] != x["trad"] and x["simp"] or "", ("simptrad",)), ("simp", lambda x: x["simp"] != x["trad"] and x["simp"] or "", ("simptrad",)),
("expression", lambda x: x, ["simp"]), ("expression", lambda x: x, ("simp",)),
("expression", lambda x: x, ["trad"]), ("expression", lambda x: x, ("trad",)),


("dictmeaningsmwssource", self.expression2dictmeaningsmwssource, ("expression",)), ("dictmeaningsmwssource", self.expression2dictmeaningsmwssource, ("expression",)),
("dictmeaningsmws", fst, ("dictmeaningsmwssource",)), ("dictmeaningsmws", fst, ("dictmeaningsmwssource",)),
Expand Down Expand Up @@ -266,7 +269,7 @@ def dictreading2reading(self, dictreading):
return preparetokens(self.config, model.formatreadingfordisplay(dictreading)).lower() return preparetokens(self.config, model.formatreadingfordisplay(dictreading)).lower()


def reading2dictreading(self, reading): def reading2dictreading(self, reading):
return [model.Word(*model.tokenize(reading))] return model.unformatreadingfordisplay(unpreparetokens(reading))


def expressiondictreading2color(self, expression, dictreading): def expressiondictreading2color(self, expression, dictreading):
return model.flatten(transformations.colorize(self.config.tonecolors, model.tonedcharactersfromreading(expression, dictreading))) return model.flatten(transformations.colorize(self.config.tonecolors, model.tonedcharactersfromreading(expression, dictreading)))
Expand Down

0 comments on commit d3559d1

Please sign in to comment.