Skip to content
Permalink
Browse files

Fix sort TranslationLangage

  • Loading branch information...
JackPotte committed Aug 4, 2019
1 parent e8d4747 commit 3e1f0a1be7c0a6a977d803ba127d6ef8dd5da7c1
Showing with 138 additions and 30 deletions.
  1. 0 languages.py
  2. +21 −27 src/lib/WiktionaryPageFunctions.py
  3. +87 −3 src/lib/languages.py
  4. +30 −0 src/lib/languagesGenerator.py
No changes.
@@ -1029,45 +1029,39 @@ def sortTranslations(pageContent, summary):
if debugLevel > 2: print(pageContent.encode(config.console_encoding, 'replace'))
if debugLevel > 0: print ''
pageContent = finalPageContent + pageContent
if debugLevel > 0:
print ' fin du tri des traductions'
raw_input(pageContent.encode(config.console_encoding, 'replace'))
if debugLevel > 0: raw_input(' fin du tri des traductions')

return pageContent, summary


def getTranslationCurrentLangage(pageContent):
language = ''
language1 = pageContent[pageContent.find(u'{{T|')+4:pageContent.find(u'}')]
if language1.find(u'|') != -1: language1 = language1[:language1.find(u'|')]
if language1 != u'':
if len(language1) > 3 and language1.find(u'-') == -1:
# TODO
language = language1
language = pageContent[pageContent.find(u'{{T|')+4:pageContent.find(u'}')]

return getLangageNameByCode(language)


def getTranslationNextLangage(finalPageContent):
language = finalPageContent[finalPageContent.rfind(u'{{T|')+len(u'{{T|'):]
language = language[:language.find('}}')]

return getLangageNameByCode(language)


def getLangageNameByCode(languageCode):
if languageCode.find(u'|') != -1: languageCode = languageCode[:languageCode.find(u'|')]
if languageCode != u'':
if len(languageCode) > 3 and languageCode.find(u'-') == -1:
if debugLevel > 0: print u'No ISO code (ex: gallo)'
else:
try:
language = defaultSort(languages[language1].decode('utf8'), 'UTF-8')
if debugLevel > 1: print u' Nom de langue 1 : ' + language
languageName = defaultSort(languages[languageCode].decode('utf8'), 'UTF-8')
if debugLevel > 1: print u' Language name: ' + languageName
except KeyError:
if debugLevel > 0: print u'KeyError l 2556'
except UnboundLocalError:
if debugLevel > 0: print u'UnboundLocalError l 2559'

return language


def getTranslationNextLangage(finalPageContent):
language2 = finalPageContent[finalPageContent.rfind(u'{{T|')+len(u'{{T|'):]
language2 = language2[:language2.find('}}')]
if language2.find(u'|') != -1: language2 = language2[:language2.find(u'|')]
try:
language2 = defaultSort(languages[language2].decode('utf8'), 'UTF-8')
if debugLevel > 1: print u' Nom de langue 2 : ' + language2
except KeyError:
if debugLevel > 0: print u'KeyError l 2160'
language2 = ''

return language2
return languageName


def getLanguageCodeISO693_1FromISO693_3(code):
@@ -1,7 +1,6 @@
#!/usr/bin/env python
# coding: utf-8
# Copy of https://fr.wiktionary.org/wiki/Module:langues/data with regex
# "\n *\t*l\['([^']+)'\] = (?:l\[|\{ nom = )'([^']+)'[^\n]+" to "\n '$1': '$2',"
# Generated by languagesGenerator.py

languages = {
'aa': 'afar',
@@ -6057,7 +6056,7 @@
'zgr': 'magori',
'zh-classical': 'lzh',
'zh-min-nan': 'nan',
'zh-yue': 'yue',
'zh-yue': 'cantonais',
'zh': 'chinois',
'zhb': 'zhaba',
'zhd': 'dai zhuang',
@@ -6156,4 +6155,89 @@
'zyn': 'zhuang de Yongnan',
'zza': 'zazaki',
'zzj': 'zhuang de Zuojiang',

'aka': 'akan',
'ancien danois': 'vieux danois',
'ancien suédois': 'vieux suédois',
'arb': 'arabe',
'ava': 'avar',
'be-x-old': 'biélorusse (tarashkevitsa)',
'bel': 'biélorusse',
'ben': 'bengali',
'bih': 'langues biharies',
'ca-val': 'valencien',
'celtique cisalpin': 'lépontique',
'cha': 'chamorro',
'chu': 'vieux slave',
'chv': 'tchouvache',
'cym': 'gallois',
'dan': 'danois',
'dzo': 'dzongkha',
'erse': 'gaélique écossais',
'fas': 'persan',
'gaul': 'gaulois',
'gcf': 'créole guadeloupéen',
'gla': 'gaélique écossais',
'gle': 'gaélique irlandais',
'glg': 'galicien',
'guj': 'gujarati',
'hat': 'créole haïtien',
'hau': 'haoussa',
'hb': 'hébreu',
'hbs': 'serbo-croate',
'heb': 'hébreu',
'ibo': 'igbo',
'insubre': 'lépontique',
'ipk': 'inupiaq',
'kau': 'kanouri',
'kaz': 'kazakh',
'ko-Hani': 'coréen',
'ko-hanja': 'coréen',
'kur': 'kurde',
'lim': 'limbourgeois',
'lin': 'lingala',
'lit': 'lituanien',
'lusitanien': 'lusitain',
'mah': 'marshallais',
'mal': 'malayalam',
'manxois': 'mannois',
'mri': 'maori',
'nav': 'navajo',
'nde': 'ndébélé du Nord',
'nep': 'népalais',
'nno': 'norvégien (nynorsk)',
'nob': 'norvégien (bokmål)',
'npi': 'népalais',
'orm': 'oromo',
'per': 'persan',
'poitevin': 'poitevin-saintongeais',
'prv': 'occitan',
'roa-rup': 'aroumain',
'roh': 'romanche',
'ron': 'roumain',
'run': 'kirundi',
'rus': 'russe',
'saintongeais': 'poitevin-saintongeais',
'slk': 'slovaque',
'slo': 'slovaque',
'slv': 'slovène',
'smo': 'samoan',
'srd': 'sarde',
'srp': 'serbe',
'sud-picène': 'picène du Sud',
'tir': 'tigrigna',
'ton': 'tongien',
'tso': 'tsonga',
'ukr': 'ukrainien',
'ven': 'venda',
'vi-chunho': 'vietnamien',
'vi-chunom': 'vietnamien',
'vieux curonien': 'couronien',
'wel': 'gallois',
'xtg': 'gaulois',
'yid': 'yiddish',
'zahrar sproche': 'saurano',
'zh-classical': 'chinois classique',
'zh-min-nan': 'minnan',
'zh-yue': 'cantonais',
}
@@ -0,0 +1,30 @@
#!/usr/bin/env python
# coding: utf-8

import os, re, sys
import languages

def main(*args):
# TODO
# 1) get https://fr.wiktionary.org/wiki/Module:langues/data
# 2) Regex
# ur"\n *\t*l\['([^']+)'\] = \{ nom = '([^']+)'[^\n]+"
# ur"\n '$1': '$2',"
# 4) sorting

# 3) Treat commented redirections
file = open('src/lib/languages.py','r+b')
list = file.read()

regex = ur"\n *\t*l\['([^']+)'\] = l\['([^']+)'\]"
redirects = re.findall(regex, list)
print str(len(redirects)) + u' redirections found'

redirectNames = ''
for redirect in redirects:
redirectNames += "\n '" + redirect[0] + "': '" + languages.languages[redirect[1]] + "',"
file.write(redirectNames)
file.close

if __name__ == "__main__":
main(sys.argv)

0 comments on commit 3e1f0a1

Please sign in to comment.
You can’t perform that action at this time.