Permalink
Browse files

Use English as a MW source for all languages, ensure that we look for…

… meanings in Google translate despite finding MWs in the dictionary (sutpid bug)
  • Loading branch information...
1 parent 7285f56 commit efaa4e20fe8fcd2b72258c06909385eea8c52fd3 @batterseapower batterseapower committed Feb 15, 2010
Showing with 38 additions and 32 deletions.
  1. +17 −17 pinyin/tests/updatergraph.py
  2. +21 −15 pinyin/updatergraph.py
@@ -82,7 +82,7 @@ def testEverythingGerman(self):
self.assertProduces({ "expression" : u"", "mwfieldinfact" : True }, config, {
"reading" : u'<span style="color:#ff0000">shū</span>',
"meaning" : u'Buch, Geschriebenes (S)',
- "mw" : u'',
+ "mw" : u'<span style="color:#00aa00">本</span> - <span style="color:#00aa00">běn</span>, <span style="color:#0000ff">册</span> - <span style="color:#0000ff">cè</span>, <span style="color:#0000ff">部</span> - <span style="color:#0000ff">bù</span>',
"audio" : u"[sound:" + os.path.join("Test", "shu1.ogg") + "]",
"color" : u'<span style="color:#ff0000">书</span>',
"trad" : u"", "simp" : u""
@@ -91,21 +91,14 @@ def testEverythingGerman(self):
def testEverythingFrench(self):
config = dict(forceexpressiontobesimptrad = False, tonedisplay = "tonified",
dictlanguage = "fr", detectmeasurewords = True,
- audioextensions = [".ogg"], tonecolors = [u"#ff0000", u"#ffaa00", u"#00aa00", u"#0000ff", u"#545454"])
- self.assertProduces({ "expression" : u"", "mwfieldinfact" : True }, config, {
- "reading" : u'<span style="color:#ff0000">shū</span>',
- "meaning" : None,
- "mw" : u'<span style="color:#00aa00">本</span> - <span style="color:#00aa00">běn</span>, <span style="color:#0000ff">册</span> - <span style="color:#0000ff">cè</span>, <span style="color:#0000ff">部</span> - <span style="color:#0000ff">bù</span>',
- "audio" : u"[sound:" + os.path.join("Test", "shu1.ogg") + "]",
- "color" : u'<span style="color:#ff0000">书</span>',
- "trad" : u"", "simp" : u""
- })
-
- def testFrenchMeaningFallbackOnGoogleEvenIfWeHaveMWs(self):
- config = dict(fallbackongoogletranslate = True, dictlanguage = "fr")
- self.assertProduces({ "expression" : u"", "mwfieldinfact" : True }, config, {
- "meaning" : "Livre",
- "mw" : u'<span style="color:#00aa00">本</span> - <span style="color:#00aa00">běn</span>, <span style="color:#0000ff">册</span> - <span style="color:#0000ff">cè</span>, <span style="color:#0000ff">部</span> - <span style="color:#0000ff">bù</span>'
+ audioextensions = [".mp3"], tonecolors = [u"#ff0000", u"#ffaa00", u"#00aa00", u"#0000ff", u"#545454"])
+ self.assertProduces({ "expression" : u"", "mwfieldinfact" : True }, config, {
+ "reading" : u'<span style="color:#00aa00">shuǐ</span>',
+ "meaning" : u'eau (n.v.) (n)',
+ "mw" : u'<span style="color:#ff0000">杯</span> - <span style="color:#ff0000">bēi</span>, <span style="color:#00aa00">筒</span> - <span style="color:#00aa00">tǒng</span>, <span style="color:#ffaa00">瓶</span> - <span style="color:#ffaa00">píng</span>',
+ "audio" : u"[sound:" + os.path.join("Test", "shui3.mp3") + "]",
+ "color" : u'<span style="color:#00aa00">水</span>',
+ "trad" : u'', "simp" : u''
})
def testPreservesWhitespace(self):
@@ -266,6 +259,13 @@ def testFallBackOnGoogleForPhrase(self):
"meaning" : u'Hello, you are my friend do<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>'
})
+ def testMeaningFallbackOnGoogleEvenIfWeHaveMWs(self):
+ config = dict(fallbackongoogletranslate = True, dictlanguage = "fr")
+ self.assertProduces({ "expression" : u"", "mwfieldinfact" : True }, config, {
+ "meaning" : u'Livre<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>',
+ "mw" : u'<span style="color:#00aa00">本</span> - <span style="color:#00aa00">běn</span>, <span style="color:#0000ff">册</span> - <span style="color:#0000ff">cè</span>, <span style="color:#0000ff">部</span> - <span style="color:#0000ff">bù</span>'
+ })
+
def testUpdateSimplifiedTraditional(self):
config = dict(simpgeneration = True, tradgeneration = True)
self.assertProduces({ "expression" : u"个個" }, config, {
@@ -297,7 +297,7 @@ def testUpdateColoredCharactersFromReading(self):
def assertProduces(self, known, configdict, expected, mediapacks=None, alreadyimported=[], notifierassertion=None):
if mediapacks == None:
- mediapacks = [media.MediaPack("Test", { "shu1.mp3" : "shu1.mp3", "shu1.ogg" : "shu1.ogg",
+ mediapacks = [media.MediaPack("Test", { "shu1.mp3" : "shu1.mp3", "shu1.ogg" : "shu1.ogg", "shui3.mp3" : "shui3.mp3",
"san1.mp3" : "san1.mp3", "qi1.ogg" : "qi1.ogg", "Kai1.mp3" : "location/Kai1.mp3",
"hen3.mp3" : "hen3.mp3", "hen2.mp3" : "hen2.mp3", "hao3.mp3" : "hao3.mp3" })]
View
@@ -123,17 +123,18 @@ def __init__(self, notifier, mediamanager, config):
("expression", lambda x: x, ("simp",)),
("expression", lambda x: x, ("trad",)),
- ("dictmeaningsmwssource", self.expression2dictmeaningsmwssource, ("expression",)),
- ("dictmeaningsmws", fst, ("dictmeaningsmwssource",)),
- ("dictmeaningssource", snd, ("dictmeaningsmwssource",)),
- ("mergeddictmeaningsmws", self.dictmeaningsmws2mergeddictmeaningsmws, ("dictmeaningsmws", "mwfieldinfact")),
+ ("dictmeaningsandsource", self.expression2dictmeaningssource, ("expression",)),
+ ("dictmeanings", fst, ("dictmeaningsandsource",)),
+ ("dictmeaningssource", snd, ("dictmeaningsandsource",)),
+ ("dictmws", self.expression2dictmws, ("expression",)),
+
+ ("mergeddictmeaningsmws", self.dictmeaningsmws2mergeddictmeaningsmws, ("dictmeanings", "dictmws", "mwfieldinfact")),
("mergeddictmeanings", fst, ("mergeddictmeaningsmws",)),
("mergeddictmws", snd, ("mergeddictmeaningsmws",)),
("meaning", self.dictmeaningsmws2meaning, ("expression", "mergeddictmeanings", "dictmeaningssource",)), # Need expression for Hanzi masking
#("mergeddictmeaningsmws", self.meaning2mergeddictmeaningsmws, ["meaning"]),
- ("dictmws", lambda x: x[1], ("dictmeaningsmws",)),
#("dictmws", self.mw2dictmws, ["mw"]), # TODO: think carefully about this and mergeddictmws for the update story here
("mw", self.mergeddictmws2mw, ("mergeddictmws",)),
("mwaudio", self.mergeddictmwdictreading2mwaudio, ("dictmws", "dictreading")), # Need dictreading for the noun
@@ -166,36 +167,39 @@ def expression2simptrad(self, expression):
return result
- def expression2dictmeaningsmwssource(self, expression):
+ def expression2dictmeaningssource(self, expression):
dictmeaningssources = [
# Use CEDICT to get meanings
(u"",
- lambda: self.dictionary.meanings(expression, self.config.prefersimptrad)),
+ lambda: self.dictionary.meanings(expression, self.config.prefersimptrad)[0]),
# Interpret Hanzi as numbers. NB: only consult after CEDICT so that we
# handle curious numbers such as 'liang' using the dictionary
(u"",
- lambda: (numbers.meaningfromnumberlike(expression, self.dictionary), None))
+ lambda: numbers.meaningfromnumberlike(expression, self.dictionary))
] + (self.config.shouldusegoogletranslate and [
# If the dictionary can't answer our question, ask Google Translate.
# If there is a long word followed by another word then this will be treated as a phrase.
# Phrases are also queried using googletranslate rather than the local dictionary.
# This helps deal with small dictionaries (for example French)
(u'<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>',
- lambda: (dictionaryonline.gTrans(expression, self.config.dictlanguage), None))
+ lambda: dictionaryonline.gTrans(expression, self.config.dictlanguage))
] or [])
# Find the first source that returns a sensible meaning
for dictmeaningssource, lookup in dictmeaningssources:
- dictmeanings, dictmws = lookup()
- if dictmeanings != None or dictmws != None:
- return (dictmeanings or [], dictmws or []), dictmeaningssource
+ dictmeanings = lookup()
+ if dictmeanings != None:
+ return dictmeanings, dictmeaningssource
# No information available
return None
- def dictmeaningsmws2mergeddictmeaningsmws(self, dictmeaningsmws, mwfieldinfact):
- dictmeanings, dictmws = dictmeaningsmws
-
+ def expression2dictmws(self, expression):
+ # Currently, we only use CEDICT to discover the measure words. Note that we *always*
+ # use the English dictionary, because it has the most comprehensive coverage of MWs.
+ return self.dictionaries('en').meanings(expression, self.config.prefersimptrad)[1]
+
+ def dictmeaningsmws2mergeddictmeaningsmws(self, dictmeanings, dictmws, mwfieldinfact):
# If the user wants the measure words to be folded into the definition or there
# is no MW field for us to split them out into, fold them in there
if not(self.config.detectmeasurewords) or not mwfieldinfact:
@@ -373,6 +377,8 @@ def fillme(field=field, possiblefillers=possiblefillers):
# What if all of the possible updaters failed? Ideally we would not be in the graph at all, but it's too late for that.
# All we can do is return None, and deal with this possibility later on.
+ dirty[field] = False
+ return None
graph[field] = (True, Thunk(fillme))

0 comments on commit efaa4e2

Please sign in to comment.