Permalink
Browse files

Some outdated TODOs are done

  • Loading branch information...
1 parent 5b85233 commit 13fd83801299204198522dee40274b047ab8d199 @batterseapower batterseapower committed Feb 11, 2010
Showing with 1 addition and 4 deletions.
  1. +0 −2 pinyin/model.py
  2. +1 −2 pinyin/tests/model.py
View
@@ -319,8 +319,6 @@ def tokenizetext(text, forcenumeric):
else:
tokens.append(Text(match))
- # TODO: for robustness, we should explicitly parse around HTML tags
-
# TODO: could be much smarter about segmentation here. For example, we could use the
# pinyin regex to split up run on groups of pinyin-like characters.
return tokens
View
@@ -308,8 +308,7 @@ def testTokenizeHTML(self):
tokenize(u'<span style="color:#123456">tou2</span> <span style="color:#123456">er4</span>'))
def testTokenizeUnrecognisedHTML(self):
- # TODO: enable this test and make it pass somehow... SGMLParser doesn't support self-closing tags :-(
- #self.assertEquals([Text(u'<b />')], tokenize(u'<b />'))
+ self.assertEquals([Text(u'<b>'), Text(u'</b>')], tokenize(u'<b />'))
self.assertEquals([Text(u'<span style="mehhhh!">'), Text("</span>")], tokenize(u'<span style="mehhhh!"></span>'))
def testTokenizeWeirdyRomanCharacters(self):

0 comments on commit 13fd838

Please sign in to comment.