Skip to content

Commit

Permalink
Merge pull request #29 from SekouDiaoNlp/dev
Browse files Browse the repository at this point in the history
implemented the method _convert_entries(row_fields) for type conversion on of lex entries
  • Loading branch information
SekouDiaoNlp committed Apr 30, 2021
2 parents 52c6f36 + 526b881 commit 1082109
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 148 deletions.
183 changes: 42 additions & 141 deletions pylexique/errors/errors.json
Original file line number Diff line number Diff line change
@@ -1,146 +1,47 @@
{
"137702": [
"a",
"",
"PRE",
"",
"",
"12190,4",
"19209,05",
"12190,4",
"19209,05",
"",
"1",
"9",
"1",
"1",
"1",
"V",
"V",
"25",
"20",
"1",
"1",
"a",
"1",
"V",
"",
"a",
"",
"PRE",
"",
"",
"1",
"1",
"",
"1"
"a": [
{
"nbphons": "V"
},
{
"puphon": "a"
},
{
"nbsyll": "V"
}
],
"137794": [
"sa",
"",
"ADV",
"",
"",
"7,78",
"21,15",
"7,78",
"21,15",
"",
"1",
"6",
"1",
"2",
"2",
"CV",
"CV",
"3",
"30",
"2",
"2",
"sa",
"1",
"CV",
"",
"as",
"",
"ADV",
"",
"",
"1,8",
"1",
"",
"1"
"sa": [
{
"nbphons": "CV"
},
{
"puphon": "sa"
},
{
"nbsyll": "CV"
}
],
"137799": [
"e",
"",
"ADV",
"",
"",
"0,05",
"0",
"0,05",
"0",
"",
"1",
"9",
"1",
"1",
"1",
"V",
"V",
"25",
"20",
"1",
"1",
"e",
"1",
"V",
"",
"e",
"",
"ADV",
"",
"",
"1",
"1",
"",
"1"
"e": [
{
"nbphons": "V"
},
{
"puphon": "e"
},
{
"nbsyll": "V"
}
],
"142664": [
"o",
"",
"ONO",
"",
"",
"16,08",
"33,11",
"16,08",
"33,11",
"",
"1",
"16",
"1",
"1",
"1",
"V",
"V",
"25",
"20",
"1",
"1",
"o",
"1",
"V",
"",
"o",
"",
"ONO",
"",
"",
"1",
"1",
"",
"1"
]
"o": [
{
"nbphons": "V"
},
{
"puphon": "o"
},
{
"nbsyll": "V"
}
],
"default_factory": []
}
53 changes: 46 additions & 7 deletions pylexique/pylexique.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

"""Main module of pylexique."""

from collections import OrderedDict
from collections import OrderedDict, defaultdict
import pkg_resources

from dataclasses import asdict, dataclass, astuple
Expand Down Expand Up @@ -114,8 +114,7 @@ def _parse_lexique(self, lexique_path):
"""
with open(lexique_path, 'r', encoding='utf-8', errors='ignore') as csv_file:
content = csv_file.readlines()
lexique383_db = content[1:]
self._create_db(lexique383_db)
self._create_db(content)
return

def _create_db(self, lexicon):
Expand All @@ -127,9 +126,9 @@ def _create_db(self, lexicon):
Iterable containing the lexique383 entries.
:return:
"""
errors = {}
for i, row in enumerate(lexicon):
for i, row in enumerate(lexicon[1:]):
row_fields = row.strip().split('\t')
row_fields = self._convert_entries(row_fields)
if row_fields[0] in self.lexique and not isinstance(self.lexique[row_fields[0]], list):
self.lexique[row_fields[0]] = [self.lexique[row_fields[0]]]
self.lexique[row_fields[0]].append(LexItem(row_fields))
Expand All @@ -139,6 +138,35 @@ def _create_db(self, lexicon):
self.lexique[row_fields[0]] = LexItem(row_fields)
return

@staticmethod
def _convert_entries(row_fields):
"""
:param row_fields:
:return: converted_row_fields:
"""
errors = defaultdict(list)
converted_row_fields = []
for attr, value in zip(LEXIQUE383_FIELD_NAMES, row_fields):
if attr in {'freqlemfilms2', 'freqlemlivres', 'freqfilms2', 'freqlivres', 'old20', 'pld20'}:
if (value != '' or value != ' ') and ',' in value:
value = value.replace(',', '.')
value = float(value)
if attr in {'nbhomogr', 'nbhomoph', 'islem', 'nblettres', 'nbphons', 'voisorth', 'voisphon', 'puorth',
'puphon', 'nbsyll'}:
if value != '' or value != ' ':
try:
value = int(value)
except ValueError:
print(
"the value {} is of the wrong type for the attribute '{}'. Keeping value as string.\n".format(
value, attr))
errors[row_fields[0]].append({attr: value})
value = value
converted_row_fields.append(value)
row_fields = converted_row_fields
return row_fields


@dataclass(init=False, repr=False, eq=True, order=False, unsafe_hash=False, frozen=False)
class LexItem:
Expand All @@ -158,7 +186,8 @@ def __init__(self, row_fields):
fields = row_fields
setattr(self, '_name_', fields[0])
for attr, value in zip(LEXIQUE383_FIELD_NAMES, fields):
setattr(self, attr, value)
if attr != 'attr':
setattr(self, attr, value)
return

def __repr__(self):
Expand All @@ -170,7 +199,17 @@ def to_dict(self):
:return: dict
"""
result = OrderedDict((attr, getattr(self, attr)) for attr in LEXIQUE383_FIELD_NAMES)
attributes = []
for attr in self.__slots__:
if not attr == "_name_":
try:
value = getattr(self, attr)
except AttributeError as e:
print(e)
pass
attributes.append((attr, value))
result = OrderedDict(attributes)
# result = OrderedDict((attr, getattr(self, attr)) for attr in self.__slots__ if attr != 'attr')
return result


Expand Down

0 comments on commit 1082109

Please sign in to comment.