Skip to content

Commit

Permalink
pb in Other languages
Browse files Browse the repository at this point in the history
  • Loading branch information
Patent2net committed Mar 30, 2023
1 parent 7744786 commit 77a6aa5
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions Patent2Net/P2N-Indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,12 +334,12 @@ def iramCleaner (texte):
lang = detectlanguage.simple_detect(phrase)
except:
lang = ''
if len(lang) > 0:
indexLang = lang.upper() + '-' + ndf.lower()
doc['lang'] = lang.upper()
else:
indexLang = "UNK-" + ndf.lower()
doc['lang'] = "UNKNOWN"
if len(lang) > 0:
indexLang = lang.upper() + '-' + ndf.lower()
doc['lang'] = lang.upper()
else:
indexLang = "UNK-" + ndf.lower()
doc['lang'] = "UNKNOWN"
Files = [truc for truc in lstUnk if bre['label'] == truc.split('-')[1].replace('.txt', "")]
for fil in Files:
champ = ""
Expand All @@ -358,7 +358,9 @@ def iramCleaner (texte):
doc[champ] = donnes
else:
print("file ignored ", fil, champ)

else:
indexLang = "UNK-" + ndf.lower()
doc['lang'] = "UNKNOWN"
if indexLang.split('-')[0] in cpt.keys():
cpt[indexLang.split('-')[0]] += 1
else:
Expand Down

0 comments on commit 77a6aa5

Please sign in to comment.