Skip to content

Commit

Permalink
Merge pull request #85 from SekouDiaoNlp/dev
Browse files Browse the repository at this point in the history
Release of pylexique version 1.3.5
  • Loading branch information
SekouDiaoNlp committed May 18, 2021
2 parents 5f2278c + 7b9da66 commit f3b0ebd
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 23 deletions.
15 changes: 5 additions & 10 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,9 @@ jobs:
twine check dist/*
py.test -rP --cov=./pylexique/
codecov --token=370386ee-28d7-441f-b4eb-7f63f8c5c3e9
# mypy --strict --ignore-missing-imports mlconjug3/
# bandit -r mlconjug3/
# Put this commands first in the run section.
# python setup.py check --restructuredtext -s
# This command has been deprecated. Use `twine check` instead.
bandit -r pylexique/
# mypy --strict --ignore-missing-imports pylexique/


build:
name: Build source package on ${{ matrix.os }}
Expand Down Expand Up @@ -84,8 +82,5 @@ jobs:
twine check dist/*
py.test -rP --cov=./pylexique/
codecov --token=370386ee-28d7-441f-b4eb-7f63f8c5c3e9
# mypy --strict --ignore-missing-imports mlconjug3/
# bandit -r mlconjug3/
# Put this commands first in the run section.
# python setup.py check --restructuredtext -s
# This command has been deprecated. Use `twine check` instead.
bandit -r pylexique/
# mypy --strict --ignore-missing-imports pylexique/
6 changes: 6 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
History
=======

1.3.5 (2021-05-18)
------------------

* Uses str.lower() to normalize inputs.
* Updated the Documentation and the docstrings.

1.3.4 (2021-05-16)
------------------

Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ recursive-exclude * __pycache__
recursive-exclude * *.py[co]

recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
recursive-include pylexique *.txt *.xlsb
recursive-include pylexique *.txt
9 changes: 8 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ The meanings of the attributes of this object are as follow:
* nbhomogr: number of homographs
* nbhomoph: number of homophones
* islem: indicates if it is a lemma or not
* nbletters: the number of letters
* nblettres: the number of letters
* nbphons: number of phonemes
* cvcv: the orthographic structure
* p-cvcv: the phonological structure
Expand All @@ -139,6 +139,13 @@ The meanings of the attributes of this object are as follow:
* orthrenv: reverse orthographic form
* phonrenv: reversed phonological form
* orthosyll: syllable orthographic form
* cgramortho: the different grammatical category for a given orthographic representation
* deflem: the percentage of people who said they knew the lemma of the word
* defobs: the size of the sample from which 'deflem' is derived
* old20: orthographic Levenshtein Distance
* pld20: phonological Levenshtein Distance
* morphoder: inflectional morphology
* nbmorph: the number of morphemes directly computed from 'morphoder'


You can find all the relevant information in the `official documentation of Lexique383`_
Expand Down
2 changes: 1 addition & 1 deletion pylexique/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

__author__ = """SekouDiaoNlp"""
__email__ = 'diao.sekou.nlp@gmail.com'
__version__ = '1.3.4'
__version__ = '1.3.5'
__copyright__ = "Copyright (c) 2021, SekouDiaoNlp"
__credits__ = ("Lexique383",)
__license__ = "MIT"
Expand Down
16 changes: 8 additions & 8 deletions pylexique/pylexique.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def __init__(self, lexique_path: Optional[str] = None, parser_type: str = 'std_c
try:
self._parse_lexique(self.lexique_path, parser_type)
except UnicodeDecodeError as e:
raise UnicodeDecodeError(f"There was a unicode error while parsing {type(lexique_path)}.") from e
raise UnicodeError(f"There was a unicode error while parsing {type(lexique_path)}.") from e
except FileNotFoundError as e:
if isinstance(lexique_path, str):
raise ValueError(f"Argument 'lexique_path' must be a valid path to Lexique383") from e
Expand All @@ -149,18 +149,18 @@ def __init__(self, lexique_path: Optional[str] = None, parser_type: str = 'std_c
# Tries to load the pre-shipped Lexique38X if no path file to the lexicon is provided.
self._parse_lexique(_RESOURCE_PATH_csv, parser_type)
except UnicodeDecodeError as e:
raise UnicodeDecodeError(f"There was a unicode error while parsing {type(_RESOURCE_PATH_csv)}.") from e
raise UnicodeError(f"There was a unicode error while parsing {type(_RESOURCE_PATH_csv)}.") from e
except FileNotFoundError as e:
if isinstance(_RESOURCE_PATH_csv, str):
raise ValueError(f"Argument 'lexique_path' must be a valid path to Lexique383") from e
if not isinstance(_RESOURCE_PATH_csv, str):
raise TypeError(f"Argument 'lexique_path'must be of type String, not {type(_RESOURCE_PATH_csv)}") from e
return

def __repr__(self):
def __repr__(self) -> str:
return '{0}.{1}'.format(__name__, self.__class__.__name__)

def __len__(self):
def __len__(self) -> int:
return len(self.lexique)

@staticmethod
Expand All @@ -177,7 +177,7 @@ def _parse_csv(lexique_path: str) -> Generator[list, Any, None]:
content = (row.strip().split('\t') for row in raw_content[1:])
return content

def _parse_lexique(self, lexique_path: str, parser_type: str) -> None:
def _parse_lexique(self, lexique_path: Optional[str], parser_type: str) -> None:
"""
| Parses the given lexique file and creates 2 hash tables to store the data.
Expand Down Expand Up @@ -298,14 +298,14 @@ def get_lex(self, words: Union[Tuple[str], str]) -> OrderedDict:
results = OrderedDict()
if isinstance(words, str):
try:
results[words] = self.lexique[words]
results[words] = self.lexique[words.lower()]
except AttributeError:
logger.warning('the word {} is not in Lexique383'.format(words))
elif isinstance(words, Sequence):
for word in words:
if isinstance(word, str):
try:
results[word] = self.lexique[word]
results[word] = self.lexique[word.lower()]
except AttributeError:
logger.warning('The word {} is not in Lexique383\n'.format(word))
continue
Expand All @@ -326,7 +326,7 @@ def get_all_forms(self, word: str) -> List[LexItem]:
List of LexItem objects sharing the same root lemma.
"""
try:
lex_entry = self.lexique[word]
lex_entry = self.lexique[word.lower()]
except ValueError as e:
logger.warning('The word {} is not in Lexique383\n'.format(word))
raise ValueError from e
Expand Down
2 changes: 2 additions & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,7 @@ twine
joblib
pytest
pytest-runner
bandit
mypy
Click>=7.1
dataclasses>=0.6; python_version < '3.7'
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.3.4
current_version = 1.3.5
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,6 @@
test_suite='tests',
tests_require=test_requirements,
url='https://github.com/SekouDiaoNlp/pylexique',
version='1.3.4',
version='1.3.5',
zip_safe=False,
)

0 comments on commit f3b0ebd

Please sign in to comment.