Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'v0.6-fixes' into updater-refactoring

Conflicts:
	pinyin/dictionaries/handedict-20090912.zip
	pinyin/tests/updater.py
	pinyin/utils.py
  • Loading branch information...
commit 14117dbf7b406c3be424366a2e11862f2f8415f5 2 parents 0d8b730 + f180197
@batterseapower batterseapower authored
View
27 Pinyin Toolkit.txt
@@ -14,6 +14,33 @@ http://github.com/batterseapower/pinyin-toolkit/issues
h1. Changelog
+h2. Version 0.6.3 (22/11/2009)
+
+| Max Bolingbroke | "Email":mailto:batterseapower@hotmail.com | "Website":http://blog.omega-prime.co.uk |
+| Nick Cook | "Email":mailto:nick@n-line.co.uk | "Website":http://www.n-line.co.uk |
+
+* The Toolkit should now work on the latest Anki snapshot (fixes the 'sgmllib' error)
+
+
+h2. Version 0.6.2 (20/11/2009)
+
+| Max Bolingbroke | "Email":mailto:batterseapower@hotmail.com | "Website":http://blog.omega-prime.co.uk |
+| Nick Cook | "Email":mailto:nick@n-line.co.uk | "Website":http://www.n-line.co.uk |
+
+* Fix packaging error in the 0.6.1 release (cjklib was not included). Version 0.6.1 contained the following fixes:
+** Improvements to the parser for Google Translate. This should fix the crashes people are seeing
+** Make field detection case-insensitive
+
+
+h2. Version 0.6.1 (19/11/2009)
+
+| Max Bolingbroke | "Email":mailto:batterseapower@hotmail.com | "Website":http://blog.omega-prime.co.uk |
+| Nick Cook | "Email":mailto:nick@n-line.co.uk | "Website":http://www.n-line.co.uk |
+
+* Improvements to the parser for Google Translate. This should fix the crashes people are seeing
+* Make field detection case-insensitive
+
+
h2. Version 0.6 (13/09/2009)
| Max Bolingbroke | "Email":mailto:batterseapower@hotmail.com | "Website":http://blog.omega-prime.co.uk |
View
9 pinyin/__init__.py
@@ -10,6 +10,13 @@
if sys.version_info[0:2] < (2, 5):
sys.path.append(utils.toolkitdir("pinyin", "vendor", "python25"))
+# Anki excludes some standard Python modules from its new 2.6 distribution. Why God, why?
+try:
+ import ConfigParser
+except ImportError:
+ sys.path.append(utils.toolkitdir("pinyin", "vendor", "perverseness"))
+
+# Sanity prevails from this point:
for vendor_package in ["cjklib"]:
sys.path.append(utils.toolkitdir("pinyin", "vendor", vendor_package))
@@ -32,5 +39,5 @@
# Expose package metadata via the quasi-standard __version__
# attribute: http://www.python.org/dev/peps/pep-0008/
-__version_info__ = ('0', '6')
+__version_info__ = ('0', '6', '3')
__version__ = '.'.join(__version_info__)
View
54 pinyin/db/builder.py
@@ -125,14 +125,32 @@ def go(target):
return inner
+ def findtimestampedfile(pathpattern):
+ path, timestamp = None, None
+ for file in os.listdir(dictionarydir()):
+ # We want to find the file with the maximal timestamp. Luckily, I have carefully
+ # constructed the filenames so that this is just the ordering on the strings
+ match = re.match(pathpattern % "(.+)", file)
+ if match and match.group(1) > timestamp:
+ path, timestamp = match.group(0), match.group(1)
+
+ return path, timestamp
+
+ def timestampedFileSource(pathpattern):
+ def inner():
+ path, _timestamp = findtimestampedfile(pathpattern)
+
+ if path is None:
+ log.info("Missing file matching the timestamped pattern %s", pathpattern)
+ return None
+
+ return fileSource(path)()
+
+ return inner
+
def timestampedArchiveSource(pathpattern, pathinzippattern):
def inner():
- path, timestamp = None, None
- for file in os.listdir(dictionarydir()):
- match = re.match(pathpattern % "(.+)", file)
- if match:
- path, timestamp = match.group(0), match.group(1)
- break
+ path, timestamp = findtimestampedfile(pathpattern)
if path is None:
log.info("Missing archive matching the timestamped pattern %s", pathpattern)
@@ -142,15 +160,23 @@ def inner():
return inner
+ # NB: because we currently use the first matching source, I've put the timestamped .txt files that
+ # come with the Toolkit at the end of the list. This ensures that if we ever implement dictionary
+ # download, the resulting .zip files will be used in preference to the .txt files.
requirements = {
- "cedict_ts.u8" : [fileSource("cedict_ts.u8"), plainArchiveSource("cedict_1_0_ts_utf-8_mdbg.zip", ["cedict_ts.u8"]),
- timestampedArchiveSource("cedict-%s.zip", ["cedict_ts.u8"]), plainArchiveSource("shipped.zip", ["cedict_ts.u8"])],
- "handedict.u8" : [fileSource("handedict_nb.u8"), timestampedArchiveSource("handedict-%s.zip", ["handedict-%s", "handedict_nb.u8"]),
- plainArchiveSource("shipped.zip", ["handedict_nb.u8"])],
- "cfdict.u8" : [fileSource("cfdict_nb.u8"), timestampedArchiveSource("cfdict-%s.zip", ["cfdict-%s", "cfdict_nb.u8"]),
- plainArchiveSource("shipped.zip", ["cfdict_nb.u8"])],
- "Unihan.txt" : [fileSource("Unihan.txt"), plainArchiveSource("Unihan.zip", ["Unihan.txt"]),
- plainArchiveSource("shipped.zip", ["Unihan.txt"])]
+ "cedict_ts.u8" : [fileSource("cedict_ts.u8"),
+ plainArchiveSource("cedict_1_0_ts_utf-8_mdbg.zip", ["cedict_ts.u8"]),
+ timestampedArchiveSource("cedict-%s.zip", ["cedict_ts.u8"]),
+ timestampedFileSource("cedict-%s.txt")],
+ "handedict.u8" : [fileSource("handedict_nb.u8"),
+ timestampedArchiveSource("handedict-%s.zip", ["handedict-%s", "handedict_nb.u8"]),
+ timestampedFileSource("handedict-%s.txt")],
+ "cfdict.u8" : [fileSource("cfdict_nb.u8"),
+ timestampedArchiveSource("cfdict-%s.zip", ["cfdict-%s", "cfdict_nb.u8"]),
+ plainArchiveSource("shipped.zip", ["cfdict_nb.u8"]),
+ timestampedFileSource("cfdict-%s.txt")],
+ "Unihan.txt" : [fileSource("Unihan.txt"),
+ plainArchiveSource("Unihan.zip", ["Unihan.txt"])]
}
maxtimestamp = 0
View
BIN  pinyin/dictionaries/cedict-20090912.zip
Binary file not shown
View
100,449 pinyin/dictionaries/cedict-20091118.txt
100,449 additions, 0 deletions not shown
View
BIN  pinyin/dictionaries/cfdict-20090706.zip
Binary file not shown
View
389 pinyin/dictionaries/cfdict-20091018.txt
@@ -0,0 +1,389 @@
+# CFDICT Sun Oct 18 01:59:29 2009; Copyright 2009 (see http://www.chinaboard.de/fr/cfdict.php?mode=dl for details); -*- coding: utf-8 -*-
+芭蕾舞 芭蕾舞 [ba1 lei3 wu3] /balet (dance) (n.v.) (n)/
+爸爸 爸爸 [ba4 ba5] /papa (n.v.) (n)/
+杯子 杯子 [bei1 zi5] /verre, gobelet, tasse (n.v.) (n)/
+被 被 [bei4] /par (indicateur de la voix passive) (n.v.)/
+邊 边 [bian1] /côté (n.v.) (n)/
+波士頓 波士顿 [bo1 shi4 dun4] /Boston (n.v.) (prop)/
+不 不 [bu4] /non, ne ... pas (n.v.)/
+才 才 [cai2] /seulement, pas avant (n.v.) (adv)/
+曾經 曾经 [ceng2 jing1] /par le passé, précédemment (n.v.) (adv)/
+唱片行 唱片行 [chang4 pian4 xing2] /discaire, magazin de disques (n.v.) (n)/
+抽象畫 抽象画 [chou1 xiang4 hua4] /peinture abstraite (n.v.) (n)/
+傳播媒體 传播媒体 [chuan2 bo4 mei2 ti3] /media d'information (n.v.) (n)/
+從來不 从来不 [cong2 lai2 bu4] /jamais (habitude) (n.v.)/
+打工 打工 [da3 gong1] /boulot, job, petit travail (n.v.) (n)/
+的 的 [de5] /(particule possessive, équivalent de l'anglais " 's ") (n.v.)/
+地方 地方 [di4 fang5] /place, lieu (n.v.) (n)/
+地精 地精 [di4 jing1] /nain de jardin, gnome (n.v.) (n, bio)/
+地圖 地图 [di4 tu2] /carte, plan (n.v.) (n)/
+電腦 电脑 [dian4 nao3] /ordinateur (n.v.) (n)/
+電梯 电梯 [dian4 ti1] /ascenseur (n.v.) (n)/
+鬥志 斗志 [dou4 zhi4] /battant, esprit combatif (n.v.) (n)/
+二十 二十 [er4 shi2] /vingt, 20 (n.v.) (num)/
+法蘭西共和國 法兰西共和国 [fa3 lan2 xi1 gong4 he2 guo2] /République française (n.v.) (n)/
+飯館兒 饭馆儿 [fan4 guan3 er2] /restaurant (n.v.) (n)/
+仿冒品 仿冒品 [fang3 mao4 pin3] /copie, copie pirate, imitation (n.v.) (n)/
+飛馬 飞马 [fei1 ma3] /Pégase (n.v.) (n)/
+負面 负面 [fu4 mian4] /côté négatif (n.v.) (n)/
+感動 感动 [gan3 dong4] /émouvoir quelqu'un, toucher quelqu'un (n.v.) (v)/être ému, être touché par un geste gentil (n.v.) (adj)/
+高潮 高潮 [gao1 chao2] /apogée, point culminant (n.v.) (n)/
+哥哥 哥哥 [ge1 ge5] /grand frère (n.v.) (n)/
+歌劇 歌剧 [ge1 ju4] /opéra (n.v.) (n, art)/
+歌劇院 歌剧院 [ge1 ju4 yuan4] /Opéra (bâtiment) (n.v.) (n, arch)/
+歌廳 歌厅 [ge1 ting1] /salon (n.v.) (n)/
+個 个 [ge4] /(spécificatif universel et indefinit) (n.v.) (meas)/
+公平 公平 [gong1 ping2] /juste, fairplay (n.v.) (adj)/
+工 工 [gong1] /travail, profession, habiletés (n.v.) (n)/
+恭喜 恭喜 [gong1 xi3] /féliciter (n.v.) (v)/
+狗 狗 [gou3] /chien (n.v.) (n)/
+古代 古代 [gu3 dai4] /antiquité, temps anciens (n.v.) (n)/
+古典 古典 [gu3 dian3] /classique (n.v.) (adj)/
+故障 故障 [gu4 zhang4] /défaillant, cassé (n.v.) (adj)/
+怪 怪 [guai4] /étrange, inabituel (n.v.) (adj)/
+怪獸 怪兽 [guai4 shou4] /monstre, animal imaginaire néfaste (n.v.) (n)/
+關係 关系 [guan1 xi5] /relation, lien (n.v.) (n)/
+關心 关心 [guan1 xin1] /se préoccuper de, se sentir concerné par (n.v.) (v)/
+規矩 规矩 [gui1 ju3] /conventions, politesses, règles de bienséances (n.v.) (n)/
+貴 贵 [gui4] /cher, couteux (n.v.) (adj)/
+國 国 [guo2] /pays, État, nation (n.v.) (n)/
+國家地理雜誌 国家地理杂志 [guo2 jia1 di4 li3 za2 zhi4] /National Geographic (n.v.) (prop, geo)/
+果然 果然 [guo3 ran2] /comme prévu (n.v.)/
+還是 还是 [hai2 shi4] /ou, ou bien (n.v.) (conj)/
+漢 汉 [han4] /peuple Han (n.v.) (n)/
+何必 何必 [he2 bi4] /pourquoi devrait-on (n.v.)/
+河 河 [he2] /fleuve, rivière (n.v.) (n)/
+禾 禾 [he2] /céréale, grain (n.v.) (n)/
+很 很 [hen3] /très, bien (n.v.) (v)/
+轟動 轰动 [hong1 dong4] /faire sensation, thème à la mode (n.v.) (adj)/
+後 后 [hou4] /après, plus tard, derrière, dans le dos de (n.v.)/
+後悔 后悔 [hou4 hui3] /regretter (n.v.) (v)/avoir des regrets (n.v.) (adj)/
+後黎朝 后黎朝 [hou4 li2 chao2] /dynastie des Lê postérieurs (Vietnam) (n.v.) (n, hist)/
+後面 后面 [hou4 mian4] /derrière (n.v.) (n)/
+後天 后天 [hou4 tian1] /après demain (n.v.) (n)/
+糊塗 糊涂 [hu2 tu5] /être embrouillé, confus (n.v.) (adj)/
+花燈 花灯 [hua1 deng1] /statue de papier translucide éclairée par des lumières internes utilisée lors de festivals (n.v.) (n)/
+畫家 画家 [hua4 jia1] /peintre (n.v.) (n)/
+火 火 [huo3] /feu (n.v.)/
+或是 或是 [huo4 shi4] /ou, X ou Y (n.v.) (conj)/
+積極 积极 [ji1 ji2] /enthousiaste, actif (n.v.) (adv)/
+吉利 吉利 [ji2 li4] /porter chance, être de bonne augure (n.v.) (v)/chanceux, de bonne augure (n.v.) (adj)/
+祭祀 祭祀 [ji4 si4] /faire des offrandes religieuses (n.v.) (v)/
+家 家 [jia1] /famille (n.v.) (n)/maison (n.v.) (n)/(spécificatif pour les magasin) (n.v.) (meas)/
+間 间 [jian1] /(spécificatif pour les pièces d'un immeuble) (n.v.) (meas)/
+件 件 [jian4] /spécificatifs pour les habits, des affaires en cours, etc. (n.v.) (meas)/
+江 江 [jiang1] /fleuve (n.v.)/
+讲电话 讲电话 [jiang3 dian4 hua4] /parler au téléhone, être au téléphone (n.v.) (v)/
+階段 阶段 [jie1 duan4] /période, niveau (n.v.)/
+結局 结局 [jie2 ju2] /fin, conclusion, résultat final (n.v.) (n)/
+姐姐 姐姐 [jie3 jie5] /grande sœur (n.v.) (n)/
+巾 巾 [jin1] /serviette, tissus (n.v.) (n)/
+近 近 [jin4] /être proche (n.v.) (adj)/
+進步 进步 [jin4 bu4] /progrès, progression (n.v.) (n)/progresser, ameliorer (n.v.) (v)/
+警察 警察 [jing3 cha2] /police (officier) (n.v.) (n)/
+九 九 [jiu3] /neuf, 9 (n.v.) (num)/
+玖 九 [jiu3] /neuf, 9 (n.v.) (num)/
+就是 就是 [jiu4 shi4] /dans ce cas, dès lors (n.v.) (adv)/
+舉目無親 举目无亲 [ju3 mu4 wu2 qin1] /vivre seul loin de sa famille; n'avoir personne sur qui compter (n.v.)/
+劇本 剧本 [ju4 ben3] /script (d'une pièce de théâtre) (n.v.) (n)/
+卷 卷 [juan3] /(spécificatifs pour les objets roulés, enroulés) (n.v.) (meas)/
+角色 角色 [jue2 se4] /rôle, personnage (théâtre, films) (n.v.) (n)/
+開 开 [kai1] /ouvrir, démarrer (n.v.) (v)/ouvert (n.v.)/
+可憐 可怜 [ke3 lian2] /avoir pitié de, plaindre (n.v.) (v)/misérable, pitoyable (n.v.) (adj)/
+課 课 [ke4] /leçon, cours (n.v.) (n)/
+空氣 空气 [kong1 qi4] /l'air (n.v.) (n)/
+口 口 [kou3] /bouche, ouverture (n.v.) (n)/
+口音 口音 [kou3 yin1] /accent local (n.v.) (adj)/
+快遞 快递 [kuai4 di4] /livraison expresse (n.v.) (n)/
+快樂 快乐 [kuai4 le4] /être joyeux, être heureux (n.v.) (v)/
+愧疚 愧疚 [kui4 jiu4] /culpabiliser, se sentir coupable (n.v.) (v)/
+困擾 困扰 [kun4 rao3] /préoccupation, inquiétude (n.v.) (n)/perturber, troubler, harceler (n.v.) (v)/
+蛞蝓 蛞蝓 [kuo4 yu2] /Limace (n.v.) (n)/
+籃球 篮球 [lan2 qiu2] /basketball (n.v.) (n)/
+嘮叨 唠叨 [lao2 dao5] /radoter, se répéter en boucle des reproches ou conseils, (n.v.) (v)/
+老 老 [lao3] /vieux, vieille (n.v.) (adj)/
+老師 老师 [lao3 shi1] /professeur (n.v.) (n)/
+老鼠 老鼠 [lao3 shu3] /souris, rongeurs (n.v.) (n)/
+離 离 [li2] /est éloigné de, est distant de (n.v.) (v)/
+理論 理论 [li3 lun4] /théorie (n.v.) (n)/
+禮物 礼物 [li3 wu4] /cadeau (n.v.) (n)/
+裡 里 [li3] /dedans, dans, en (n.v.)/
+裡面 里面 [li3 mian4] /à l'intérieur (n.v.)/
+鯉魚 鲤鱼 [li3 yu2] /carpe (n.v.) (n)/
+例子 例子 [li4 zi5] /exemple, cas (n.v.) (n)/
+力 力 [li4] /force (n.v.) (n)/
+立 立 [li4] /être debout, rester debout (n.v.) (v)/
+連續 连续 [lian2 xu4] /continuer (n.v.) (v)/
+臉 脸 [lian3] /visage, face (n.v.) (n)/
+聊 聊 [liao2] /discuter, parler de (n.v.) (v)/
+林 林 [lin2] /forêt, bois, garène (n.v.) (n)/
+流行歌曲 流行歌曲 [liu2 xing2 ge1 qu3] /chanson populaire (n.v.) (n)/
+六 六 [liu4] /six, 6 (n.v.) (num)/
+樓 楼 [lou2] /niveau, étage (n.v.) (n)/
+樓上 楼上 [lou2 shang4] /en haut, à l'étage (n.v.) (n)/
+樓梯 楼梯 [lou2 ti1] /escalier (n.v.) (n)/
+樓下 楼下 [lou2 xia4] /en bas, au rez de chaussé (n.v.) (n)/
+螻蛄 蝼蛄 [lou2 gu1] /Courtilière ou taupe-grillon (insecte de la famille des gryllotalpidés) (n.v.)/
+媽 妈 [ma1] /mère, maman (n.v.) (n)/
+媽媽 妈妈 [ma1 ma5] /maman (n.v.) (n)/
+馬 马 [ma3] /cheval (n.v.) (n)/
+嗎 吗 [ma5] /Est-ce que, marque de l'interrogatif (n.v.)/
+賣座 卖座 [mai4 zuo4] /succès de vente, faire un taba (n.v.) (adj)/
+慢跑 慢跑 [man4 pao3] /jogging (n.v.) (n)/trotter, faire du jogging, faire de la course à pied (n.v.) (v)/
+貓 猫 [mao1] /chat (n.v.) (n)/
+沒關係 没关系 [mei2 guan1 xi5] /ce n'est pas grave, cela n'a pas d'importante, ne vous en faite pas (n.v.)/
+妹妹 妹妹 [mei4 mei5] /petite sœur (n.v.) (n)/
+們 们 [men5] /...s, marque du pluriel (n.v.)/
+迷失 迷失 [mi2 shi1] /perdre ses repères géographique ou moraux, s'égarer, perdre le nord (n.v.) (v)/
+名 名 [ming2] /nom (n.v.) (n)/
+明 明 [ming2] /lumière, lumineux (n.v.)/prochain (n.v.)/
+哪兒 哪儿 [na3 er5] /où ? (n.v.)/
+哪裡 哪里 [na3 li5] /où ? (n.v.)/
+那些 哪些 [na3 xie1] /ces (n.v.)/
+那邊 那边 [na4 bian5] /là bas, de ce côté (n.v.)/
+那兒 那儿 [na4 er5] /là bas (n.v.)/
+那裡 那里 [na4 li5] /là bas (n.v.)/
+那麼 那么 [na4 me5] /Dans ce cas, Si c'est ainsi (n.v.) (adv)/
+耐人尋味 耐人寻味 [nai4 ren2 xun2 wei4] /Fournir matière à réflexion (n.v.) (v)/
+男 男 [nan2] /homme, mâle, garçon (n.v.) (n)/
+男孩子 男孩子 [nan2 hai2 zi5] /garçon (n.v.) (n)/
+男朋友 男朋友 [nan2 peng2 you3] /petit ami, petit copain (n.v.) (n)/
+男人 男人 [nan2 ren2] /homme (masculin) (n.v.) (n)/
+男主角 男主角 [nan2 zhu3 jiao3] /acteur principal (n.v.) (n)/
+難怪 难怪 [nan2 guai4] /sans surprise (n.v.) (adv)/
+難 难 [nan4] /difficile, dur (n.v.) (adj)/
+呢 呢 [ne5] /et toi ?, particule interrogative (n.v.)/
+你好 你好 [ni3 hao3] /bonjour (n.v.) (int)/
+年 年 [nian2] /année (n.v.) (n)/
+嚙齒目 啮齿目 [nie4 chi3 mu4] /Ordre des rongeurs (n.v.) (n)/
+您 您 [nin2] /vous (politesse) (n.v.) (pron)/
+牛仔褲 牛仔裤 [niu2 zai3 ku4] /Blue-jeans, Jeans (n.v.) (n)/
+牛仔 牛仔 [niu2 zi3] /Cowboy (n.v.) (n)/
+紐約州 纽约州 [niu3 yue1 zhou1] /État de New York (n.v.) (prop)/
+女 女 [nü3] /fille, femme, féminin (n.v.)/
+女兒 女儿 [nü3 er2] /fille (des parents) (n.v.) (n)/
+女孩子 女孩子 [nü3 hai2 zi5] /fille (n.v.) (n)/
+女朋友 女朋友 [nü3 peng2 you3] /petite amie, petite copine (n.v.) (n)/
+女人 女人 [nü3 ren2] /femme (n.v.) (n)/
+女詩人 女诗人 [nü3 shi1 ren2] /poétesse (n.v.) (n)/
+女主角 女主角 [nü3 zhu3 jiao3] /actrice principale (n.v.) (n)/
+排球 排球 [pai2 qiu2] /volley-ball (n.v.) (n)/
+旁邊 旁边 [pang2 bian1] /à côté (n.v.)/
+配角 配角 [pei4 jue2] /second rôle, personnage secondaire (n.v.) (n)/
+朋 朋 [peng2] /ami (n.v.) (n)/
+朋友 朋友 [peng2 you5] /ami (n.v.) (n)/
+篇 篇 [pian1] /(spécificatif pour articles, rédactions, essaies,...) (n.v.) (meas)/
+瓶 瓶 [ping2] /bouteille, jarre, recipient, vase (n.v.)/(spécificatif pour les recipients solides pour liquides type bouteilles, vases, etc.) (n.v.) (meas)/
+瓶子 瓶子 [ping2 zi5] /bouteille (n.v.) (n)/
+評論 评论 [ping2 lun4] /commentaire (n.v.) (n)/
+破 破 [po4] /être cassé, endommagé (n.v.) (adj)/
+七 七 [qi1] /sept, 7 (n.v.) (num)/
+奇怪 奇怪 [qi2 guai4] /étrange, inabituel (n.v.) (adj)/
+起初 起初 [qi3 chu1] /au début, à l'origine (n.v.)/
+氣憤 气愤 [qi4 fen4] /révoltant, indignant (n.v.) (adj)/
+氣人 气人 [qi4 ren2] /énervant, ennuyeux, irritant (n.v.) (adj)/
+前 前 [qian2] /devant, avant (n.v.)/
+強調 强调 [qiang2 diao4] /souligner, mettre l'accent sur (n.v.) (v)/
+鍬形蟲 锹形虫 [qiao1 xing2 chong2] /Lucane (terme général désignant les coléoptères de la famille des lucanidés) (n.v.) (n)/
+鞘翅 鞘翅 [qiao4 chi4] /élytre des coléoptères (n.v.)/
+清楚 清楚 [qing1 chu3] /être clair (n.v.) (adj)/
+青 青 [qing1] /vert-bleu (n.v.) (adj)/
+請 请 [qing3] /s'il vous plaît (n.v.) (v)/
+球 球 [qiu2] /balle, ballon (n.v.) (n)/
+糗 糗 [qiu3] /ambarasser, embêter (n.v.) (v)/ambarassant (n.v.) (adj)/
+全國 全国 [quan2 guo2] /tout le pays, la nation entière (n.v.) (n)/
+全家 全家 [quan2 jia1] /toute la famille (n.v.)/
+全校 全校 [quan2 jiao4] /toute l'école (n.v.) (n)/
+全世界 全世界 [quan2 shi4 jie4] /toute la terre, l'humanité entière (n.v.) (n)/
+熱烈 热烈 [re4 lie4] /chaleureusement accueillit (n.v.)/
+熱門 热门 [re4 men2] /être populaire, fréquenté (n.v.) (adj)/
+熱門音樂 热门音乐 [re4 men2 yin1 yue4] /musique populaire (n.v.) (n)/
+人民 人民 [ren2 min2] /peuple (n.v.) (n)/
+忍耐 忍耐 [ren3 nai4] /patience, contrôle (n.v.) (n)/tenir bon, patienter (n.v.) (v)/
+熱鬧 热闹 [ren4 ao5] /vivant, bruyant (n.v.) (adj)/
+認 认 [ren4] /reconnaitre, connaitre, admettre (n.v.) (v)/
+日 日 [ri4] /Japon (n.v.)/jour (n.v.)/lumière (n.v.)/
+鯊魚 鲨鱼 [sha1 yu2] /requin (n.v.) (n)/
+商店 商店 [shang1 dian4] /magasin (n.v.) (n)/
+上 上 [shang4] /sur, dessus, au dessus (n.v.)/monter, s'assoir (n.v.) (v)/
+上面 上面 [shang4 mian5] /sur, dessus (n.v.)/
+設計 设计 [she4 ji4] /design, dessin (n.v.) (n)/
+身 身 [shen1] /corps (n.v.) (n)/
+什麼的 什么的 [shen2 me5 de5] /et etc., et ainsi de suite (n.v.)/
+升天 升天 [sheng1 tian1] /monter aux cieux, mourrir (n.v.) (v)/
+生 生 [sheng1] /créature (n.v.) (n)/naître (n.v.) (v)/
+生氣 生气 [sheng1 qi4] /être énervé, vexé (n.v.) (adj)/
+蝨子 虱子 [shi1 zi5] /Pou (pediculus humanus) (n.v.)/
+什 什 [shi2] /quoi (n.v.)/
+十八 十八 [shi2 ba1] /dix-huit, 18 (n.v.) (num)/
+十二 十二 [shi2 er4] /douze, 12 (n.v.) (num)/
+十九 十九 [shi2 jiu3] /dix-neuf, 19 (n.v.) (num)/
+十六 十六 [shi2 liu4] /seize, 16 (n.v.) (num)/
+十七 十七 [shi2 qi1] /dix-sept, 17 (n.v.) (num)/
+十三 十三 [shi2 san1] /treize, 13 (n.v.) (num)/
+十四 十四 [shi2 si4] /quatorze, 14 (n.v.) (num)/
+十五 十五 [shi2 wu3] /quinze, 15 (n.v.) (num)/
+十一 十一 [shi2 yi1] /onze, 11 (n.v.) (num)/
+實際 实际 [shi2 ji4] /pratique, réaliste (n.v.) (adj)/
+石 石 [shi2] /cailloux, pierre, roche (n.v.)/
+食蟲目 食虫目 [shi2 chong2 mu4] /Ordre des insectivores (ancien ordre de la famille des mammifères aujourd'hui abandonné) (n.v.) (n)/
+事件 事件 [shi4 jian4] /incident (n.v.) (n)/
+事情 事情 [shi4 qing2] /affaire, chose a faire, évènement (n.v.) (n)/
+室友 室友 [shi4 you3] /camarade de chambre (n.v.) (n)/
+是 是 [shi4] /être (n.v.) (v)/oui (n.v.)/
+識 识 [shi4] /connaître, savoir, se souvenir (n.v.) (v)/
+守歲 守岁 [shou3 sui4] /faire nuit blanche pour la nouvelle année (n.v.) (v)/
+受不了 受不了 [shou4 bu5 liao3] /ne pas supporter, en avoir assez (n.v.) (v)/
+書桌 书桌 [shu1 zhuo1] /bureau (meuble) (n.v.) (n)/
+暑假 暑假 [shu3 jia4] /vancances d'été (n.v.) (n)/
+鼠 鼠 [shu3] /rat (n.v.)/
+水 水 [shui3] /eau (n.v.) (n)/
+水牛成 水牛成 [shui3 niu2 cheng2] /Buffalo (n.v.) (prop)/
+水準 水准 [shui3 zhun3] /niveau (n.v.) (n)/
+說 说 [shui4] /dire (n.v.) (v)/
+四 四 [si4] /quatre, 4 (n.v.) (num)/
+宿舍 宿舍 [su4 she4] /dortoir (n.v.) (n)/
+隨便 随便 [sui2 bian4] /comme vous voulez, selon vos souhaits (n.v.)/
+隨時 随时 [sui2 shi2] /n'importe quand, tout moment, lorsque cela vous conviendra (n.v.)/
+碎 碎 [sui4] /casser en pièce de la céramique ou du verre (n.v.) (v)/
+他 他 [ta1] /il, lui (n.v.) (pron)/
+她 她 [ta1] /elle (n.v.)/
+台北國家劇院 台北国家剧院 [tai2 bei3 guo2 jia1 ju4 yuan4] /Le Théâtre National de Taipei (n.v.) (n, geo)/
+貪 贪 [tan1] /convoiter, souhaiter avidement, désirer (n.v.) (v)/détourner, avoir céder à la tentation (n.v.) (v)/être avide, être corrompu (n.v.) (adj)/
+湯圓 汤圆 [tang1 yuan2] /boulette blanche de patte de riz présente dans une soupe et souvent fourée d'un élément sucré (n.v.) (n)/
+躺 躺 [tang3] /s'allonger, être allonger, s'étendre (n.v.) (v)/
+疼 疼 [teng2] /avoir mal, souffrir (n.v.) (adj)/
+梯子 梯子 [ti1 zi5] /échelle, escabot (n.v.) (n)/
+題材 题材 [ti2 cai2] /sujet, thème (n.v.) (n)/
+天 天 [tian1] /ciel, jour, cieux (n.v.)/
+田 田 [tian2] /champ (n.v.) (n)/
+跳蚤 跳蚤 [tiao4 zao3] /Puce (n.v.) (n)/
+同情 同情 [tong2 qing2] /compation (n.v.) (n)/compatir, montrer de la compation (n.v.) (v)/
+同情心 同情心 [tong2 qing2 xin1] /compassion, empathie (n.v.) (n)/
+偷 偷 [tou1] /voler (un objet) (n.v.) (v)/
+圖書館 图书馆 [tu2 shu1 guan3] /bibliothèque (n.v.) (n)/
+土 土 [tu3] /terre (matière) (n.v.) (n)/
+土樓 土楼 [tu3 lou2] /maison communautaire traditionnelle circulaire de la région du Fujian. (n.v.) (n)/
+團圓 团圆 [tuan2 yuan2] /réunion de famille, d'un groupe (n.v.) (n)/
+外面 外面 [wai4 mian4] /dehors, à l'exterieur (n.v.) (n)/
+完 完 [wan2] /finir une tache, achever une action, (n.v.) (v)/
+完全 完全 [wan2 quan2] /tout, totalement, entièrement, l'ensemble de (n.v.) (adv)/
+萬裡長城 万里长城 [wan4 li3 chang2 cheng2] /Grande muraille de Chine (n.v.) (n)/
+萬事如意 万事如意 [wan4 shi4 ru2 yi4] /ayez tout ce que votre coeur désire (n.v.)/
+王 王 [wang2] /roi, nom de famille "Wang" (n.v.)/
+網球 网球 [wang3 qiu2] /tennis (sport) (n.v.) (n)/
+唯一 唯一 [wei2 yi1] /le seul, l'unique (n.v.)/
+維語 维语 [wei2 yu3] /langue ouïgoure (n.v.) (n)/
+文化 文化 [wen2 hua4] /culture (n.v.) (n)/
+我 我 [wo3] /je (n.v.) (pron)/
+握手 握手 [wo4 shou3] /serrer la main (n.v.)/
+烏龍茶 乌龙茶 [wu1 long2 cha2] /Thé Oolong (n.v.)/
+屋子 屋子 [wu1 zi5] /pièce de maison (n.v.) (n)/
+五 五 [wu3] /cinq, 5 (n.v.)/
+午 午 [wu3] /midi (n.v.)/
+午夜 午夜 [wu3 ye4] /minuit (n.v.) (n)/
+舞台劇 舞台剧 [wu3 tai2 ju4] /scène (n.v.) (n)/
+夕 夕 [xi1] /soir (n.v.) (n)/
+希望 希望 [xi1 wang4] /espoir (n.v.) (n)/espérer (n.v.)/
+西方 西方 [xi1 fang1] /l'occident, l'ouest (n.v.) (n)/
+西山朝 西山朝 [xi1 shan1 chao2] /Dynastie des Tây Sơn (Vietnam) (n.v.)/
+習 习 [xi2] /s'exercer, pratiquer (n.v.) (v)/
+喜劇 喜剧 [xi3 ju4] /comédie (n.v.) (n)/
+洗手間 洗手间 [xi3 shou3 jian1] /Toilette (n.v.) (n)/
+洗澡 洗澡 [xi3 zao3] /se doucher, se laver, se baigner (n.v.) (v)/
+洗澡間 洗澡间 [xi3 zao3 jian1] /douche, salle de bain (n.v.) (n)/
+戲 戏 [xi4] /pièce de théâtre (n.v.) (n)/
+戲劇 戏剧 [xi4 ju4] /théâtre (n.v.) (n)/
+系 系 [xi4] /département d'étude (n.v.) (n)/lier, filer (n.v.) (v)/
+下 下 [xia4] /descendre (n.v.) (v)/sous, dessous, suivant (semaines, cours), au dessous (n.v.)/
+下面 下面 [xia4 mian4] /sous, desous, en dessous (n.v.)/
+像片兒 像片儿 [xiang4 pian4 er2] /photographie (n.v.) (n)/
+消息 消息 [xiao1 xi2] /nouvelle, information (n.v.) (n)/
+小 小 [xiao3] /jeune (n.v.)/petit, peu (n.v.)/
+小偷 小偷 [xiao3 tou1] /voleur (n.v.) (n)/
+小學 小学 [xiao3 xue2] /école primaire (n.v.) (n)/
+效率 效率 [xiao4 lü4] /efficacité (n.v.) (n)/
+笑話 笑话 [xiao4 hua4] /blague (n.v.) (n)/se moquer de, rire de (n.v.) (v)/
+寫 写 [xie3] /écrire (n.v.) (v)/
+謝 谢 [xie4] /merci (n.v.) (int)/Xie (n.v.) (n, fam)/
+謝謝 谢谢 [xie4 xie5] /merci (n.v.)/
+心 心 [xin1] /coeur (n.v.) (n)/
+興 兴 [xing1] /florissant, à la mode (n.v.)/
+行為 行为 [xing2 wei2] /comportement, conduite (n.v.) (n)/
+興趣 兴趣 [xing4 qu4] /centre d'intérêt (n.v.) (n)/
+姓 姓 [xing4] /nom de famille (n.v.)/
+幸會 幸会 [xing4 hui4] /enchanté, enchanté de vous connaitre (n.v.)/
+姓名 性命 [xing4 ming4] /nom complet (n.v.) (n)/
+選 选 [xuan3] /choisir, sélectionner (n.v.) (v)/
+學生 学生 [xue2 sheng5] /étudiant (n.v.) (n)/
+壓歲錢 压岁钱 [ya1 sui4 qian2] /enveloppe rouge avec de l'argent neuf offert aux enfants et jeunes de la famille proche lors de la nouvelle année chinoise (n.v.) (n)/
+研究 研究 [yan2 jiu1] /recherche, étude (n.v.) (n)/
+研究所 研究所 [yan2 jiu4 suo3] /Institue, université (n.v.) (n)/
+言 言 [yan2] /parole, discour (n.v.) (n)/dire (n.v.) (v)/
+演唱會 演唱会 [yan3 chang4 hui4] /concert (chant et musique) (n.v.) (n)/
+演技 演技 [yan3 ji4] /capacité d'acteur, talent d'acteur, performance théâtrale (n.v.) (meas)/
+演員 演员 [yan3 yuan2] /acteur (n.v.) (n)/
+眼淚 眼泪 [yan3 lei4] /larme (n.v.) (n)/
+鼴鼠 鼹鼠 [yan3 shu3] /Taupe (n.v.) (n)/
+鑰匙 钥匙 [yao4 shi5] /clef, clé (n.v.) (n)/
+也 也 [ye3] /aussi (n.v.) (adv)/
+一 一 [yi1] /un, une, 1 (n.v.)/
+儀式 仪式 [yi2 shi4] /cérémonie, rite (n.v.) (n)/
+移民 移民 [yi2 min2] /migrant, immigrant (n.v.) (n)/migrer, immigrer (n.v.) (v)/
+椅子 椅子 [yi3 zi5] /Chaise (n.v.) (n)/
+意義 意义 [yi4 yi4] /signification, sens (n.v.) (n)/
+藝術工作者 艺术工作者 [yi4 shu4 gong1 zuo4 zhe3] /personne travaillant dans les arts (n.v.) (n, art)/
+藝術家 艺术家 [yi4 shu4 jia1] /artiste (n.v.) (n)/
+陰曆 阴历 [yin1 li4] /calendrier lunaire (n.v.) (n)/
+音樂 音乐 [yin1 yue4] /musique (n.v.) (n, mus)/
+音樂家 音乐家 [yin1 yue4 jia1] /musicien (n.v.) (n)/
+銀行 银行 [yin2 hang2] /banque (n.v.) (n)/
+迎接 迎接 [ying2 jie5] /saluer, rencontrer, accueillir (n.v.) (v)/
+尤其是 尤其是 [you2 qi2 shi4] /en particulier (n.v.) (adv)/
+游泳 游泳 [you2 yong3] /nager (n.v.) (v)/
+遊 游 [you2] /nager (n.v.) (v)/
+由於 由于 [you2 yu2] /C'est parceque, Du fait que (n.v.) (conj)/
+友 友 [you3] /ami (n.v.) (n)/amical (n.v.)/
+有的 有的 [you3 de5] /certains, il y a des (n.v.) (n)/
+有規矩 有规矩 [you3 gui1 ju3] /avoir de la politesse, être poli (n.v.) (adj)/
+有趣 有趣 [you3 qu4] /intéressant, fascinant, amusant (n.v.) (adj)/
+有興趣 有兴趣 [you3 xing4 qu4] /être intéressé, avoir de la curiosité (n.v.) (v)/
+又 又 [you4] /encode, aussi (n.v.)/
+語 语 [yu3] /langage, langue (n.v.) (n)/
+元寶 元宝 [yuan2 bao3] /ravioli du jour de l'an chinois (n.v.) (n)/
+元宵節 元宵节 [yuan2 xiao1 jie2] /Festival des lanternes (n.v.) (prop)/
+遠 远 [yuan3] /éloigné, lointain, distant (n.v.) (adj)/
+院子 院子 [yuan4 zi5] /jardin (n.v.) (n)/
+月 月 [yue4] /lune, mois (n.v.) (n)/
+運動 运动 [yun4 dong4] /sport, exercice physique (n.v.) (n)/faire du sport, faire de l'exercice (n.v.) (v)/
+雜誌 杂志 [za2 zhi4] /magazine (n.v.) (n)/
+再見 再见 [zai4 jian4] /au revoir (n.v.) (int)/
+再說 再说 [zai4 shuo1] /de plus, mais en plus (n.v.) (adv)/
+在 在 [zai4] /à, sur, dans (n.v.)/
+遭遇 遭遇 [zao1 yu4] /experience très difficile ou malheureuse (n.v.) (n)/rencontrer des malheurs (n.v.) (v)/
+灶神 灶神 [zao4 shen2] /Dieu du fourneau (n.v.) (n)/
+怎麼 怎么 [zen3 me5] /comment ce fait-il que, pourquoi (n.v.)/
+摘 摘 [zhai1] /prendre et poser plus bas, cueillir, arracher; choisir, sélectionner (n.v.) (v)/
+召開 召开 [zhao4 kai1] /Convoquer une réunion (n.v.) (v)/
+照顧 照顾 [zhao4 gu4] /s'occuper de, prendre soin de (n.v.) (v)/
+這兒 这儿 [zhe4 er5] /ici (n.v.)/
+這裡 这里 [zhe4 li3] /ici (n.v.)/
+這些 这些 [zhe4 xie1] /ces (n.v.)/
+真實 真实 [zhen1 shi2] /réel, factuel, vrai (n.v.) (adj)/
+正月 正月 [zheng1 yue4] /permier mois lunaire (n.v.) (n)/
+正規 正规 [zheng4 gui1] /normal, standard (n.v.) (adj)/
+正面 正面 [zheng4 mian4] /côté positif, positif (n.v.)/en face, devant (n.v.)/
+只 只 [zhi3] /juste, seulement (n.v.) (adv)/
+中國菜 中国菜 [zhong1 guo2 cai4] /cuisine chinoise (n.v.) (n)/
+終身 终身 [zhong1 shen1] /toute la vie, pour la vie (n.v.) (n)/
+螽斯 螽斯 [zhong1 si1] /Sauterelle (de la famille des tettigonidés) (n.v.)/
+重要 重要 [zhong4 yao4] /être important, vital (n.v.) (adj)/
+豬 猪 [zhu1] /cochon (n.v.) (n)/
+主角 主角 [zhu3 jue2] /acteur principal (n.v.) (n)/
+住宅區 住宅区 [zhu4 zhai2 qu1] /zone résidentielle (n.v.) (n)/
+捉 捉 [zhuo1] /attraper, capturer (n.v.) (v)/
+桌子 桌子 [zhuo1 zi5] /table (n.v.) (n)/
+字典 字典 [zi4 dian3] /dictionnaire (n.v.) (n)/
+總是 总是 [zong3 shi4] /toujours, systématiquement (n.v.) (adv)/
+足球 足球 [zu2 qiu2] /football (n.v.) (n)/
+作家 作家 [zuo4 jia1] /écrivain, artiste litéraire, poète (n.v.) (n)/
View
42 pinyin/dictionaries/downloader.py
@@ -1,8 +1,10 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
+import os
import re
import urllib
import urlparse
+import zipfile
import pinyin.utils as utils
@@ -12,15 +14,21 @@ def cedictParser(page):
m_page = re.search('<a href="([^"]*cedict_[^"]+_ts_utf-8_mdbg\\.zip)">', page)
return utils.bind_none(m_page, lambda m_page: utils.bind_none(m_date, lambda m_date: (m_page.group(1), (int(m_date.group(1)), int(m_date.group(2)), int(m_date.group(3))))))
+cedictUseful = lambda _: True # The zip contains a single file - the UTF8 dictionary. Perfect.
+
def handedictParser(page):
return utils.bind_none(re.search('<a href="([^"]*handedict-([0-9]{8})\\.zip)">', page), lambda m: (m.group(1), splitRunOnDate(m.group(2))))
+handedictUseful = lambda name: name.endswith("handedict_nb.u8")
+
def cfdictParser(page):
return utils.bind_none(re.search('<a href="([^"]*cfdict-([0-9]{8})\\.zip)">', page), lambda m: (m.group(1), splitRunOnDate(m.group(2))))
-dictionaries = [("CEDICT", "http://usa.mdbg.net/chindict/chindict.php?page=cc-cedict", cedictParser),
- ("HanDeDict", "http://www.chinaboard.de/chinesisch_deutsch.php?mode=dl&w=8", handedictParser),
- ("CFDICT", "http://www.chinaboard.de/fr/cfdict.php?mode=dl&w=8", cfdictParser)]
+cfdictUseful = lambda name: name.endswith("cfdict_nb.u8")
+
+dictionaries = [("CEDICT", "http://usa.mdbg.net/chindict/chindict.php?page=cc-cedict", cedictParser, cedictUseful),
+ ("HanDeDict", "http://www.chinaboard.de/chinesisch_deutsch.php?mode=dl&w=8", handedictParser, handedictUseful),
+ ("CFDICT", "http://www.chinaboard.de/fr/cfdict.php?mode=dl&w=8", cfdictParser, cfdictUseful)]
def splitRunOnDate(date):
@@ -31,7 +39,7 @@ def runOnDate(date_tuple):
if __name__ == '__main__':
- for name, url, parser in dictionaries:
+ for name, url, parser, useful in dictionaries:
print "Querying download page for", name
# Download the contents of the download page itself
@@ -51,5 +59,27 @@ def runOnDate(date_tuple):
# Great - download the dictionary to the well-known location
zip_path = utils.toolkitdir("pinyin", "dictionaries", name.lower() + "-" + runOnDate(date) + ".zip")
- urllib.urlretrieve(urlparse.urljoin(url, zip_relative_url), zip_path)
- print "> Downloaded to", zip_path
+ if os.path.exists(zip_path):
+ print "> Skipping download because it already exists"
+ else:
+ urllib.urlretrieve(urlparse.urljoin(url, zip_relative_url), zip_path)
+ print "> Downloaded to", zip_path
+
+ # Trim the zip files down to size by removing useless dictionaries:
+
+ # a) Gather the information we actually want to keep
+ sourcezip = zipfile.ZipFile(zip_path, "r")
+ target = {}
+ for name in sourcezip.namelist():
+ if not useful(name):
+ print "> Removing", name, "from file"
+ continue
+
+ target[name] = sourcezip.read(name)
+ sourcezip.close()
+
+ # b) Truncate the zip file and write back just that information
+ targetzip = zipfile.ZipFile(zip_path, "w")
+ for name, contents in target.items():
+ targetzip.writestr(name, contents)
+ targetzip.close()
View
BIN  pinyin/dictionaries/handedict-20090912.zip
Binary file not shown
View
143,482 pinyin/dictionaries/handedict-20091119.txt
143,482 additions, 0 deletions not shown
View
146 pinyin/dictionaryonline.py
@@ -37,7 +37,7 @@ def gTrans(query, destlanguage='en', prompterror=True):
return None
except ValueError, e:
# Not an internet problem
- log.exception("Error while parsing Google response: %s" % repr(literal))
+ log.exception("Error while interpreting translation response from Google")
if prompterror:
return [[Word(Text('<span style="color:gray">[Error In Google Translate Response]</span>'))]]
else:
@@ -72,11 +72,11 @@ def lookup(query, destlanguage):
# Parse the response:
try:
- log.info("Parsing response %s from Google", literal)
+ log.info("Parsing response %r from Google", literal)
result = parsegoogleresponse(literal)
except ValueError, e:
# Give the exception a more precise error message for debugging
- raise ValueError("Error while parsing translation response from Google")
+ raise ValueError("Error while parsing translation response from Google: %s" % str(e))
# What sort of result did we get?
if isinstance(result, basestring):
@@ -99,10 +99,37 @@ def lookup(query, destlanguage):
try:
return [[Word(Text(result[0]))]] + [[Word(Text(definition[0].capitalize() + ": " + ", ".join(definition[1:])))] for definition in result[1]]
except IndexError:
- raise ValueError("Result %s from Google Translate looked like a definition but was not in the expected format" % result)
+ raise ValueError("Result %s from Google Translate looked like a definition but was not in the expected list format" % str(result))
+ elif isinstance(result, dict):
+ # Oh dear, they have devised another method of returning results. This time, it looks like this:
+ # {"sentences":[{"trans":"Hello, you are my friend?",
+ # "orig":"你好,你是我的朋友吗?",
+ # "translit":""}],
+ # "src":"zh-CN"}
+ # {"sentences":[{"trans":"Well",
+ # "orig":"好",
+ # "translit":""}],
+ # "dict":[{"pos":"verb",
+ # "terms":["like","love"]},
+ # {"pos":"adjective",
+ # "terms":["good"]},
+ # {"pos":"adverb",
+ # "terms":["fine","OK","okay","okey","okey dokey","well"]},
+ # {"pos":"interjection",
+ # "terms":["OK!","okay!","okey!"]}],
+ # "src":"en"}
+ try:
+ sentences = " ".join([sentence["trans"] for sentence in result["sentences"]])
+ if sentences == query:
+ # The result was, unhelpfully, what we queried. Give up:
+ return None
+ else:
+ return [[Word(Text(sentences))]] + [[Word(Text(definition["pos"].capitalize() + ": " + ", ".join(definition["terms"])))] for definition in result.get("dict", [])]
+ except KeyError:
+ raise ValueError("Result %s from Google Translate looked like a definition but was not in the expected dict format" % str(result))
else:
# Haven't seen any other case in the wild
- raise ValueError("Couldn't deal with the correctly-parsed response %s from Google" % result)
+ raise ValueError("Couldn't deal with the correctly-parsed response %s from Google" % str(result))
def parsegoogleresponse(response):
# This code is basically a hand-rolled (and rather specialised) LL parser
@@ -110,17 +137,15 @@ def parsegoogleresponse(response):
# * String literals (with escaping) of the form "foo\tbar", possibly containing Unicode
# * Numeric literals (returned as longs)
# * List literals
+ # * Dictionary literals
itemseperatorregex = re.compile('\\s*,')
listendregex = re.compile('\\s*\\]')
+ kvpseperatorregex = re.compile('\\s*:')
+ dictendregex = re.compile('\\s*\\}')
- # Utility to consume from the string using the regex and return the new string if successful
- def munch(regex, what):
- match = regex.match(what)
- if match:
- return what[match.end():]
- else:
- return None
+ def literaltoken(match, what):
+ return match, what
def stringtoken(match, what):
# Remove escape characters from the captured string with eval - nasty!
@@ -131,53 +156,90 @@ def numbertoken(match, what):
return long(match.group(0)), what
def listtoken(match, what):
- list = []
+ thelist = []
while True:
# Process this list item
- item, what = parse(what)
- list.append(item)
+ item, what = expraction(what)
+ thelist.append(item)
# End of item - must be followed by a comma or closing bracket
- whataftercomma = munch(itemseperatorregex, what)
+ whataftercomma = makemunchaction(itemseperatorregex)(what)
if whataftercomma is None:
- # No comma - must be list end
- what = munch(listendregex, what)
- if what is None:
- # No list end - very confusing!
- raise ValueError("Unexpected end of list with no closing bracket")
- else:
- # End of list: continue after the closing bracket
- return list, what
+ # End of list: continue after the closing bracket
+ return thelist, unfailing("the end of a list", makemunchaction(listendregex))(what)
else:
# Comma: expect another item, so continue after the comma
what = whataftercomma
- def whitespacetoken(match, what):
- return parse(what)
+ def dicttoken(match, what):
+ thedict = {}
+ while True:
+ # Process this dict key/value pair
+ key, what = expraction(what)
+ _, what = unfailing("a dictionary key-value pair seperator", makeparseaction(kvpseperatorregex, literaltoken))(what)
+ value, what = expraction(what)
+ thedict[key] = value
+
+ # End of item - must be followed by a comma or closing bracket
+ whataftercomma = makemunchaction(itemseperatorregex)(what)
+ if whataftercomma is None:
+ # End of dict: continue after the closing brace
+ return thedict, unfailing("the end of a dictionary", makemunchaction(dictendregex))(what)
+ else:
+ # Comma: expect another item, so continue after the comma
+ what = whataftercomma
+
+ # Utility to consume from the string using the regex and return the new string if successful
+ makemunchaction = lambda regex: makeparseaction(regex, lambda match, what: what)
+
+ def makeparseaction(regex, processor):
+ def inner(what):
+ match = regex.match(what)
+
+ # Run processor if the regular expression matches
+ if match:
+ return processor(match, what[match.end():])
+ else:
+ return None
+
+ return inner
+ def makechoiceaction(actions):
+ def inner(what):
+ # Match processors from top to bottom
+ for action in actions:
+ result = action(what)
+ if result:
+ return result
+
+ return None
+
+ return inner
+
+ def unfailing(text, action):
+ def inner(what):
+ result = action(what)
+ if result is None:
+ raise ValueError("Couldn't parse %s when expecting %s" % (repr(what), text))
+ else:
+ return result
+
+ return inner
+
# Action table keyed off regular expressions. Matched top to bottom against
# the current string, with the corresponding token handler fired if the regex
# can deal with it.
- actions = [
- (re.compile('"((?:[^\\\\"]|\\\\.)*)"'), stringtoken),
- (re.compile('-?[0-9]+'), numbertoken),
- (re.compile('\\['), listtoken),
- (re.compile('\\s+'), whitespacetoken)
- ]
+ stringaction = makeparseaction(re.compile('"((?:[^\\\\"]|\\\\.)*)"'), stringtoken)
+ intaction = makeparseaction(re.compile('-?[0-9]+'), numbertoken)
+ listaction = makeparseaction(re.compile('\\['), listtoken)
+ dictaction = makeparseaction(re.compile('\\{'), dicttoken)
# Parse loop using the action table
- def parse(what):
- # Match processors from top to bottom
- for regex, processor in actions:
- # Run processor if the regular expression matches
- match = regex.match(what)
- if match:
- return processor(match, what[match.end():])
-
- raise ValueError("Couldn't parse %s" % repr(what))
+ whitespacetoken = lambda match, what: expraction(what)
+ expraction = unfailing("an expression", makechoiceaction([stringaction, intaction, listaction, dictaction, makeparseaction(re.compile('\\s+'), whitespacetoken)]))
# Use the constructed action table to parse the supplied string
- value, rest = parse(response)
+ value, rest = expraction(response)
if len(rest) != 0:
raise ValueError("Unexpected trailing characters %s" % repr(rest))
else:
View
192 pinyin/model.py
@@ -3,7 +3,7 @@
import htmlentitydefs
import re
-from sgmllib import SGMLParser
+from BeautifulSoup import BeautifulSoup, Tag
import sqlalchemy
import unicodedata
@@ -330,135 +330,109 @@ def tokenizetext(text, forcenumeric):
Turns an arbitrary string containing pinyin and HTML into a sequence of tokens. Does its best
to seperate pinyin out from normal text, but no guarantees!
"""
+
def tokenize(html, forcenumeric=False):
- tokenizer = HTMLAwareTokenizer(forcenumeric)
- tokenizer.feed(html)
- tokenizer.close()
-
- return tokenizer.tokens
+ def extract_attr_maybe(attrs, attr, into, extractor):
+ if attr not in attrs:
+ return {}
-class HTMLAwareTokenizer(SGMLParser):
- def __init__(self, forcenumeric):
- self.forcenumeric = forcenumeric
- SGMLParser.__init__(self)
-
- def reset(self):
- self.tokens = []
- self.attributesstack = []
- SGMLParser.reset(self)
-
- def unknown_starttag(self, tag, attrs):
- strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
- self.tokens.append(Text("<%s%s>" % (tag, strattrs)))
+ res = extractor(attrs[attr])
+ if res is None:
+ return {}
- def unknown_endtag(self, tag):
- self.tokens.append(Text("</%s>" % tag))
+ (extracted, newattrval) = res
+ if newattrval is not None:
+ attrs[attr] = newattrval
+ else:
+ del attrs[attr]
- def start_span(self, attrs):
- def extract_attr_maybe(attr, into, extractor):
- if attr not in attrs:
- return {}
-
- res = extractor(attrs[attr])
- if res is None:
- return {}
-
- (extracted, newattrval) = res
- if newattrval is not None:
- attrs[attr] = newattrval
- else:
- del attrs[attr]
-
- return { into : extracted }
+ return { into : extracted }
- def take_dict_elem(dict, key):
- if key in dict:
- val = dict[key]
- del dict[key]
- return (val, dict)
+ def take_dict_elem(dict, key):
+ if key in dict:
+ val = dict[key]
+ del dict[key]
+ return (val, dict)
+ else:
+ return None
+
+ # Quick, dirty and wrong:
+ def parse_style(style):
+ intelligible = {}
+ unintelligible = []
+ for pair in style.split(";"):
+ split = pair.split(":")
+ if len(split) == 2:
+ k, v = split
+ intelligible[k.strip().lower()] = v
else:
- return None
-
- # Quick, dirty and wrong:
- def parse_style(style):
- intelligible = {}
- unintelligible = []
- for pair in style.split(";"):
- split = pair.split(":")
- if len(split) == 2:
- k, v = split
- intelligible[k.strip().lower()] = v
- else:
- unintelligible.append(pair)
-
- return (intelligible, unintelligible)
-
- return dict([(pair.split(":")[0].strip(), pair.split(":")[1].strip()) for pair in style.split(";")])
+ unintelligible.append(pair)
- def unparse_style(parsed_style):
- intelligible, unintelligible = parsed_style
- return "; ".join([k + " : " + v for k, v in intelligible] + unintelligible)
+ return (intelligible, unintelligible)
- # It's more convenient if we can see the attributes as a dictionary,
- # although we might e.g. drop duplicates
- attrs = dict([(k.lower(), v) for k, v in attrs])
-
- # For now, we only worry about the color attribute in the span tag's style
- def take_style_val(key):
- def go(style):
- intelligible, unintelligible = parse_style(style)
-
- taken = take_dict_elem(intelligible, key)
- if taken is not None:
- value, intelligible = taken
- else:
- value = None
-
- return (value, unparse_style((intelligible, unintelligible)))
-
- return go
-
- self.attributesstack.append(extract_attr_maybe("style", "color", take_style_val("color")))
-
- # We are still interested in writing out the remainder of the <span> tag, in
- # case it had other information in it (apart from the "style" attribute)
- self.unknown_starttag("span", attrs.items())
-
- def end_span(self):
- self.unknown_endtag("span")
- self.attributesstack.pop()
+ return dict([(pair.split(":")[0].strip(), pair.split(":")[1].strip()) for pair in style.split(";")])
- def handle_charref(self, ref):
- self.tokens.append(Text("&#%s;" % ref))
+ def unparse_style(parsed_style):
+ intelligible, unintelligible = parsed_style
+ return "; ".join([k + " : " + v for k, v in intelligible] + unintelligible)
- def handle_entityref(self, ref):
- self.tokens.append(Text("&%s" % ref))
- # standard HTML entities are closed with a semicolon; other entities are not
- if htmlentitydefs.entitydefs.has_key(ref):
- self.tokens.append(Text(";"))
+ # For now, we only worry about the color attribute in the span tag's style
+ def take_style_val(key):
+ def go(style):
+ intelligible, unintelligible = parse_style(style)
- def handle_comment(self, text):
- self.tokens.append(Text("<!--%s-->" % text))
-
- def handle_data(self, text):
- self.tokens.extend([self.contextify(token) for token in tokenizetext(text, self.forcenumeric)])
+ taken = take_dict_elem(intelligible, key)
+ if taken is not None:
+ value, intelligible = taken
+ else:
+ value = None
- def handle_pi(self, text):
- self.tokens.append(Text("<?%s>" % text))
+ return (value, unparse_style((intelligible, unintelligible)))
- def handle_decl(self, text):
- self.tokens.append(Text("<!%s>" % text))
-
- def contextify(self, what):
+ return go
+
+ def contextify(attributesstack, what):
# Get the most recent attributes to apply at this point in time
current_attrs = {}
- for attrs in self.attributesstack:
+ for attrs in attributesstack:
current_attrs.update(attrs)
for k, v in current_attrs.items():
what.htmlattrs[k] = v
return what
+
+ # Stateful recursive algorithm for consuming the parse tree: tokens accumulate in the 'tokens' list
+ tokens = []
+ def recurse(attributesstack, parent):
+ for child in parent.contents:
+ if not isinstance(child, Tag):
+ tokens.extend([contextify(attributesstack, token) for token in tokenizetext(unicode(child), forcenumeric)])
+ elif child.isSelfClosing:
+ tokens.append(Text("<%s />" % child.name))
+ else:
+ if child.name.lower() == "span":
+ # It's more convenient if we can see the attributes as a dictionary,
+ # although we might e.g. drop duplicates
+ attrsdict = dict([(k.lower(), v) for k, v in child.attrs])
+
+ # This is why we're even at this party: we want to grab the style stuff out
+ thisattributesstack = attributesstack + [extract_attr_maybe(attrsdict, "style", "color", take_style_val("color"))]
+
+ # We are still interested in writing out the remainder of the <span> tag, in
+ # case it had other information in it (apart from the "style" attribute)
+ thisattrs = attrsdict.items()
+ else:
+ thisattributesstack = attributesstack
+ thisattrs = child.attrs
+
+ tokens.append(Text("<%s%s>" % (child.name, "".join([' %s="%s"' % (key, value) for key, value in thisattrs]))))
+ recurse(thisattributesstack, child)
+ tokens.append(Text("</%s>" % child.name))
+
+ # This is it, chaps: let's munge that HTML!
+ recurse([], BeautifulSoup(html))
+ return tokens
"""
Represents a word boundary in the system, where the tokens inside represent a complete Chinese word.
View
189 pinyin/release.py
@@ -6,6 +6,7 @@
import sys
import time
+import pinyin
import pinyin.utils
@@ -13,53 +14,84 @@
# Code from ActiveState recipe (http://code.activestate.com/recipes/146306/)
#
-import httplib, mimetypes, mimetools, urllib2, cookielib
-
-cj = cookielib.CookieJar()
-opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
-urllib2.install_opener(opener)
-
-def post_multipart(host, selector, fields, files):
- """
- Post fields and files to an http host as multipart/form-data.
- fields is a sequence of (name, value) elements for regular form fields.
- files is a sequence of (name, filename, value) elements for data to be uploaded as files
- Return the server's response page.
- """
- content_type, body = encode_multipart_formdata(fields, files)
- headers = {'Content-Type': content_type,
- 'Content-Length': str(len(body))}
- r = urllib2.Request("http://%s%s" % (host, selector), body, headers)
- return urllib2.urlopen(r).read()
-
-def encode_multipart_formdata(fields, files):
- """
- fields is a sequence of (name, value) elements for regular form fields.
- files is a sequence of (name, filename, value) elements for data to be uploaded as files
- Return (content_type, body) ready for httplib.HTTP instance
- """
- BOUNDARY = mimetools.choose_boundary()
- CRLF = '\r\n'
- L = []
- for (key, value) in fields:
- L.append('--' + BOUNDARY)
- L.append('Content-Disposition: form-data; name="%s"' % key)
- L.append('')
- L.append(value)
- for (key, filename, value) in files:
- L.append('--' + BOUNDARY)
- L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
- L.append('Content-Type: %s' % get_content_type(filename))
- L.append('')
- L.append(value)
- L.append('--' + BOUNDARY + '--')
- L.append('')
- body = CRLF.join(L)
- content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
- return content_type, body
-
-def get_content_type(filename):
- return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+import urllib
+import urllib2
+import mimetools, mimetypes
+import os, stat
+
+class Callable:
+ def __init__(self, anycallable):
+ self.__call__ = anycallable
+
+# Controls how sequences are uncoded. If true, elements may be given multiple values by
+# assigning a sequence.
+doseq = 1
+
+class MultipartPostHandler(urllib2.BaseHandler):
+ handler_order = urllib2.HTTPHandler.handler_order - 10 # needs to run first
+
+ def http_request(self, request):
+ data = request.get_data()
+ if data is not None and type(data) != str:
+ v_files = []
+ v_vars = []
+ try:
+ for(key, value) in data.items():
+ if type(value) == file:
+ v_files.append((key, value))
+ else:
+ v_vars.append((key, value))
+ except TypeError:
+ systype, value, traceback = sys.exc_info()
+ raise TypeError, "not a valid non-string sequence or mapping object", traceback
+
+ if len(v_files) == 0:
+ data = urllib.urlencode(v_vars, doseq)
+ else:
+ boundary, data = self.multipart_encode(v_vars, v_files)
+ contenttype = 'multipart/form-data; boundary=%s' % boundary
+ if(request.has_header('Content-Type')
+ and request.get_header('Content-Type').find('multipart/form-data') != 0):
+ print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')
+ request.add_unredirected_header('Content-Type', contenttype)
+
+ request.add_data(data)
+ return request
+
+ def multipart_encode(vars, files, boundary = None, buffer = None):
+ if boundary is None:
+ boundary = mimetools.choose_boundary()
+ if buffer is None:
+ buffer = ''
+ for(key, value) in vars:
+ buffer += '--%s\r\n' % boundary
+ buffer += 'Content-Disposition: form-data; name="%s"' % key
+ buffer += '\r\n\r\n' + value + '\r\n'
+ for(key, fd) in files:
+ file_size = os.fstat(fd.fileno())[stat.ST_SIZE]
+ filename = os.path.basename(fd.name)
+ contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+ buffer += '--%s\r\n' % boundary
+ buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename)
+ buffer += 'Content-Type: %s\r\n' % contenttype
+ # buffer += 'Content-Length: %s\r\n' % file_size
+ fd.seek(0)
+ buffer += '\r\n' + fd.read() + '\r\n'
+ buffer += '--%s--\r\n\r\n' % boundary
+ return boundary, buffer
+ multipart_encode = Callable(multipart_encode)
+
+ https_request = http_request
+
+import cookielib
+cookies = cookielib.CookieJar()
+
+def post_multipart(url, fields, file_fields):
+ # This extra handler is useful for debugging with Charles:
+ # urllib2.ProxyHandler({ "http" : "127.0.0.1:8888" })
+ opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies), MultipartPostHandler)
+ params = dict(fields + [(name, open(filename, "rb")) for name, filename in file_fields])
+ return opener.open(url, params).read()
#
# End code from ActiveState recipe
@@ -74,20 +106,26 @@ def parse_releases(text):
m = re.match("Version ([^ ]+) \(([^\)]+)\)", raw_release)
yield m.group(1), m.group(2), raw_release
-def preflight_checks(repo_dir):
+def preflight_checks(release_info, repo_dir):
errors = []
def visit(_arg, dirname, names):
for name in names:
full_path = os.path.join(dirname, name)
# See http://code.google.com/p/anki/issues/detail?id=1342&colspec=ID%20Type%20Status%20Priority%20Stars%20Summary
- if os.path.isfile(full_path) and os.path.getsize(full_path) == 0L:
+ if os.path.isfile(full_path) and os.path.getsize(full_path) == 0L and "vendor" not in full_path:
errors.append(full_path + " is 0 bytes long - a bug found in Anki 0.9.9.8.5 means that such files are not extracted")
os.path.walk(repo_dir, visit, None)
+ if pinyin.__version__ != release_info["version"]:
+ errors.append("You haven't updated pinyin.__version_info__: saw %s when it should be %s" % (pinyin.__version__, release_info["version"]))
+
return errors
+non_hidden = lambda item: item[0] != '.'
+list_non_hidden_files = lambda rootdir: pinyin.utils.concat([pinyin.utils.let(pinyin.utils.inplacefilter(non_hidden, folders), lambda _: [os.path.join(root, file) for file in files if non_hidden(file)]) for root, folders, files in os.walk(rootdir, topdown=True)])
+
def build_release(credentials, release_info, temp_dir):
# 1) Clone the whole repository to a fresh location -
# this ensures we don't have any crap in the release
@@ -95,20 +133,29 @@ def build_release(credentials, release_info, temp_dir):
repo_dir = pinyin.utils.toolkitdir()
print "Cloning current repo state to", temp_repo_dir
subprocess.check_call(["git", "clone", repo_dir, temp_repo_dir])
+ subprocess.check_call(["git", "submodule", "init"], cwd=temp_repo_dir)
+ subprocess.check_call(["git", "submodule", "update"], cwd=temp_repo_dir)
# 1.5) Sanity check directory
- errors = preflight_checks(temp_repo_dir)
+ errors = preflight_checks(release_info, temp_repo_dir)
if len(errors) > 0:
print "\n".join(errors)
sys.exit(1)
# 2) Build a ZIP of that fresh checkout, excluding the .git directory
- # and using maximal compression (-9) since the file is pretty big
+ # and using maximal compression (-9) since the file is pretty big.
+ # It's important that we exclude hidden files because otherwise the .git
+ # repo lives in the ZIP file as well...
zip_file = os.path.join(temp_dir, "pinyin-toolkit.zip")
- repo_contents = [f for f in os.listdir(temp_repo_dir) if f[0] != '.']
+ repo_contents = [pinyin.utils.lstripexactly(os.path.join(temp_repo_dir, ""), file) for file in list_non_hidden_files(temp_repo_dir)]
subprocess.check_call(["zip", "-9", "-r", zip_file] + repo_contents, cwd=temp_repo_dir)
- # 3) Upload to Anki
+ # 3) Get confirmation
+ print "The zipfile has been prepared at:"
+ print zip_file
+ raw_input("Press enter to upload to Anki Online ...")
+
+ # 4) Upload to Anki
upload_to_anki_online(credentials, release_info, zip_file)
def upload_to_anki_online(credentials, release_info, zip_file):
@@ -121,7 +168,7 @@ def upload_to_anki_online(credentials, release_info, zip_file):
# <input type="submit" value="Sign Up" />
# </form>
print "Logging in to the Anki website as", credentials["username"]
- post_multipart("anki.ichi2.net", "/account/login",
+ post_multipart("http://anki.ichi2.net/account/login",
[("username", credentials["username"]), ("password", credentials["password"]), ("submitted", "1")],
[])
@@ -136,10 +183,9 @@ def upload_to_anki_online(credentials, release_info, zip_file):
# <input name="submit" type="submit" value="Update" />
# </form>
print "Uploading a new version of the plugin"
- zip_file_contents = file_contents(zip_file, "rb")
- post_multipart("anki.ichi2.net", "/file/upload",
+ post_multipart("http://anki.ichi2.net/file/upload",
[("type", "plugin"), ("title", release_info["title"]), ("tags", release_info["tags"]), ("description", release_info["description"]), ("id", release_info["id"]), ("submit", "Update")],
- [("file", os.path.basename(zip_file), zip_file_contents)])
+ [("file", zip_file)])
def home_path(*components):
return os.path.join(os.path.expanduser("~"), *components)
@@ -153,32 +199,33 @@ def file_contents(path, mode="r"):
if __name__ == "__main__":
config = eval(file_contents(home_path(".pinyin-toolkit-release")))
- version, date, changelog = list(parse_releases(file_contents(pinyin.utils.toolkitdir("Pinyin Toolkit.txt"))))[0]
- print changelog
- print "Press enter to upload version", version, "(" + date + ") ... ",
+ version, date, changelog = list(parse_releases(file_contents(pinyin.utils.toolkitdir("Pinyin Toolkit.txt"))))[0]
try:
- sys.stdin.read()
+ print changelog
+ raw_input("Press enter to prepare version %s (%s) ..." % (version, date))
except KeyboardInterrupt, e:
sys.exit(1)
- description = ["The Pinyin Toolkit adds many useful features to Anki to assist the study of Mandarin. The aim of " +
- "the project is to greatly enhance the user-experience for students studying the Chinese language.",
- "",
- "Homepage: http://batterseapower.github.com/pinyin-toolkit/",
- "Full feature list: http://wiki.github.com/batterseapower/pinyin-toolkit/features",
- "Installation instructions: http://wiki.github.com/batterseapower/pinyin-toolkit/installation",
- "",
- "Changes in the most recent version:",
- ""] + changelog
+ description = """The Pinyin Toolkit adds many useful features to Anki to assist the study of Mandarin. The aim of
+the project is to greatly enhance the user-experience for students studying the Chinese language.
+
+Homepage: http://batterseapower.github.com/pinyin-toolkit/
+Full feature list: http://wiki.github.com/batterseapower/pinyin-toolkit/features
+Installation instructions: http://wiki.github.com/batterseapower/pinyin-toolkit/installation
+
+Changes in the most recent version:
+""" + changelog
release_info = {
"id" : "14",
"title" : "Pinyin Toolkit (" + version + ") - Advanced Mandarin Chinese Support",
"tags" : "pinyin Mandarin Chinese English dictionary hanzi graph graphs",
- "description" : "\r\n".join(description)
+ "description" : description,
+ "version" : version
}
#upload_to_anki_online(config["credentials"], release_info, home_path("Junk", "test-plugin", "test-plugin.zip"))
pinyin.utils.withtempdir(lambda tempdir: build_release(config["credentials"], release_info, tempdir))
+ print "Everything seems to have worked!"
View
8 pinyin/tests/dictionary.py
@@ -115,16 +115,16 @@ def testErhuaSpacedInReadingForKnownWords(self):
self.assertEquals(flatten(englishdict.reading(u"两头儿")), "liang3tou2r")
def testSimpMeanings(self):
- self.assertEquals(self.flatmeanings(englishdict, u"", prefersimptrad="simp"), [u"book", u"letter", u"same as 书经 Book of History", u"MW: 本 - ben3, 册 - ce4, 部 - bu4, 丛 - cong2"])
+ self.assertEquals(self.flatmeanings(englishdict, u"", prefersimptrad="simp"), [u"book", u"letter", u"see also 书经 Book of History", u"MW: 本 - ben3, 册 - ce4, 部 - bu4"])
def testTradMeanings(self):
- self.assertEquals(self.flatmeanings(englishdict, u"", prefersimptrad="trad"), [u"book", u"letter", u"same as 書經 Book of History", u"MW: 本 - ben3, 冊 - ce4, 部 - bu4, 叢 - cong2"])
+ self.assertEquals(self.flatmeanings(englishdict, u"", prefersimptrad="trad"), [u"book", u"letter", u"see also 書經 Book of History", u"MW: 本 - ben3, 冊 - ce4, 部 - bu4"])
def testNonFlatMeanings(self):
dictmeanings, dictmeasurewords = englishdict.meanings(u"", prefersimptrad="simp")
- self.assertEquals(self.flattenall(dictmeanings), [u"book", u"letter", u"same as 书经 Book of History"])
+ self.assertEquals(self.flattenall(dictmeanings), [u"book", u"letter", u"see also 书经 Book of History"])
self.assertEquals([(self.flattenall(dictmwcharacters)[0], self.flattenall(dictmwpinyin)[0]) for dictmwcharacters, dictmwpinyin in dictmeasurewords],
- [(u"", u"ben3"), (u"", u"ce4"), (u"", u"bu4"), (u"", u"cong2")])
+ [(u"", u"ben3"), (u"", u"ce4"), (u"", u"bu4")])
# Test helper
def flatmeanings(self, dictionary, what, prefersimptrad="simp"):
View
18 pinyin/tests/dictionaryonline.py
@@ -28,8 +28,15 @@ def testParseList(self):
def testParseListOfLists(self):
self.assertEquals(parsegoogleresponse('[1, [2, [3, 4]], [5, 6]]'), [1, [2, [3, 4]], [5, 6]])
+ def testParseDict(self):
+ self.assertEquals(parsegoogleresponse('{"fruit" : "orange", 1 : 2, "buy" : 1337}'), {"fruit" : "orange", 1 : 2, "buy" : 1337})
+
+ def testParseDictOfDicts(self):
+ self.assertEquals(parsegoogleresponse('{"fruits" : {"orange" : 1, "banana" : 2}, "numbers" : {1337 : ["cool"], 13 : ["bad", "unlucky"]}}'),
+ {"fruits" : {"orange" : 1, "banana" : 2}, "numbers" : {1337 : ["cool"], 13 : ["bad", "unlucky"]}})
+
def testParseWhitespace(self):
- self.assertEquals(parsegoogleresponse('[ 1 ,"hello",[10, "barr rr"], "world" , 1337 ]'), [1, "hello", [10, "barr rr"], "world", 1337])
+ self.assertEquals(parsegoogleresponse('[ 1 ,"hello",[10, "barr rr"], "world" , 1337, { "a" : "dict"} ]'), [1, "hello", [10, "barr rr"], "world", 1337, { "a": "dict" }])
def testParseErrorIfTrailingStuff(self):
self.assertRaises(ValueError, lambda: parsegoogleresponse('1 1'))
@@ -37,11 +44,18 @@ def testParseErrorIfTrailingStuff(self):
self.assertRaises(ValueError, lambda: parsegoogleresponse('[1] 1'))
def testParseErrorIfUnknownCharacters(self):
- self.assertRaises(ValueError, lambda: parsegoogleresponse('{ "hello" }'))
+ self.assertRaises(ValueError, lambda: parsegoogleresponse('! "hello" !'))
def testParseErrorIfListNotClosed(self):
self.assertRaises(ValueError, lambda: parsegoogleresponse('[ "hello"'))
+ def testParseErrorIfDictMissingValueNotClosed(self):
+ self.assertRaises(ValueError, lambda: parsegoogleresponse('{ "hello" }'))
+ self.assertRaises(ValueError, lambda: parsegoogleresponse('{ "hello" : }'))
+
+ def testParseErrorIfDictNotClosed(self):
+ self.assertRaises(ValueError, lambda: parsegoogleresponse('{ "hello" : "world"'))
+
def testParseErrorIfEmpty(self):
self.assertRaises(ValueError, lambda: parsegoogleresponse(''))
View
6 pinyin/tests/updater.py
@@ -94,7 +94,7 @@ def testReformattingRespectsExistingColorization(self):
def assertUpdatesTo(self, *args, **kwargs):
assertUpdatesTo(partial(FieldUpdater, "reading"), *args, **kwargs)
-class TestFieldUpdaterFromExpression(object):
+class FieldUpdaterFromExpressionTest(object):
def testAutoBlankingGenerated(self):
self.assertUpdatesTo(u"", {}, {
"reading" : markgeneratedfield("blather"),
@@ -122,8 +122,8 @@ def testGenerateAllFieldsWhenEmptyOrGenerated(self):
expected = {
"expression" : u"",
"reading" : markgeneratedfield(u'<span style="color:#ff0000">shū</span>'),
- "meaning" : markgeneratedfield(u'㊀ book<br />㊁ letter<br />㊂ same as <span style="color:#ff0000">\u4e66</span><span style="color:#ff0000">\u7ecf</span> Book of History'),
- "mw" : markgeneratedfield(u'<span style="color:#00aa00">本</span> - <span style="color:#00aa00">běn</span>, <span style="color:#0000ff">册</span> - <span style="color:#0000ff">cè</span>, <span style="color:#0000ff">部</span> - <span style="color:#0000ff">bù</span>, <span style="color:#ffaa00">丛</span> - <span style="color:#ffaa00">cóng</span>'),
+ "meaning" : markgeneratedfield(u'㊀ book<br />㊁ letter<br />㊂ see also <span style="color:#ff0000">\u4e66</span><span style="color:#ff0000">\u7ecf</span> Book of History'),
+ "mw" : markgeneratedfield(u'<span style="color:#00aa00">本</span> - <span style="color:#00aa00">běn</span>, <span style="color:#0000ff">册</span> - <span style="color:#0000ff">cè</span>, <span style="color:#0000ff">部</span> - <span style="color:#0000ff">bù</span>'),
"audio" : markgeneratedfield(u"[sound:" + os.path.join("Test", "shu1.mp3") + "]"),
"mwaudio" : lambda mwaudio: assert_equal(sanitizequantitydigits(mwaudio), markgeneratedfield((u"[sound:" + os.path.join("Test", "X.mp3") + u"][sound:" + os.path.join("Test", "shu1.mp3") + "]") * 4)),
"color" : markgeneratedfield(u'<span style="color:#ff0000">书</span>'),
View
15 pinyin/utils.py
@@ -18,6 +18,10 @@ def debugmode():
# Uncomment to force debug mode off:
#return False
+ # A simple way that ordinary users can get extra logging:
+ if os.path.exists(toolkitdir("enable-pinyin-toolkit-log.txt")):
+ return True
+
# Username as reported on Windows by typing into cmd:
# echo %USERNAME%
#
@@ -356,6 +360,12 @@ def cumulative(sequence):
sofar = sofar + n
yield sofar
+def lstripexactly(what, fromwhat):
+ if fromwhat[0:len(what)] == what:
+ return fromwhat[len(what):]
+ else:
+ raise ValueError("Couldn't strip %r from %r" % (what, fromwhat))
+
def urlescape(what):
import urllib
return urllib.quote(what.encode('utf-8'))
@@ -374,6 +384,11 @@ def splitat(what, n):
def seq(x, y):
return y()
+def inplacefilter(pred, list):
+ for i in range(len(list), 0, -1):
+ if not pred(list[i - 1]):
+ del list[i - 1]
+
def first(f):
def go(xy):
x, y = xy
View
2  pinyin/vendor/perverseness/ConfigParser.py
@@ -0,0 +1,2 @@
+# Imported in a non-lazy manner by cjklib.
+# Luckily it never actually uses it unless we don't supply a configuration when initialising the database!
View
3  pinyin/vendor/perverseness/__init__.py
@@ -0,0 +1,3 @@
+# This vendor directory is reserved for fake imports of real
+# Python libraries that are excluded from the Anki distribution
+# for no good reason that I can see.
Please sign in to comment.
Something went wrong with that request. Please try again.