Merge pull request #85 from SekouDiaoNlp/dev

Release of pylexique version 1.3.5
SekouDiaoNlp · May 18, 2021 · f3b0ebd · f3b0ebd
2 parents 5f2278c + 7b9da66
commit f3b0ebd
Show file tree

Hide file tree

Showing 9 changed files with 33 additions and 23 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -42,11 +42,9 @@ jobs:
             twine check dist/*
             py.test -rP --cov=./pylexique/
             codecov --token=370386ee-28d7-441f-b4eb-7f63f8c5c3e9
-        #    mypy --strict --ignore-missing-imports  mlconjug3/
-        #   bandit -r mlconjug3/
-        #   Put this commands first in the run section.
-        #   python setup.py check --restructuredtext -s
-        #   This command has been deprecated. Use `twine check` instead.
+            bandit -r pylexique/
+        #    mypy --strict --ignore-missing-imports  pylexique/
+
 
   build:
       name: Build source package on ${{ matrix.os }}
@@ -84,8 +82,5 @@ jobs:
                   twine check dist/*
                   py.test -rP --cov=./pylexique/
                   codecov --token=370386ee-28d7-441f-b4eb-7f63f8c5c3e9
-              #   mypy --strict --ignore-missing-imports  mlconjug3/
-              #   bandit -r mlconjug3/
-              #   Put this commands first in the run section.
-              #   python setup.py check --restructuredtext -s
-              #   This command has been deprecated. Use `twine check` instead.
+                  bandit -r pylexique/
+              #    mypy --strict --ignore-missing-imports  pylexique/
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -2,6 +2,12 @@
 History
 =======
 
+1.3.5 (2021-05-18)
+------------------
+
+* Uses str.lower() to normalize inputs.
+* Updated the Documentation and the docstrings.
+
 1.3.4 (2021-05-16)
 ------------------
 

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -9,4 +9,4 @@ recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
 
 recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
-recursive-include pylexique *.txt *.xlsb
+recursive-include pylexique *.txt
diff --git a/README.rst b/README.rst
@@ -125,7 +125,7 @@ The meanings of the attributes of this object are as follow:
 * nbhomogr: number of homographs
 * nbhomoph: number of homophones
 * islem: indicates if it is a lemma or not
-* nbletters: the number of letters
+* nblettres: the number of letters
 * nbphons: number of phonemes
 * cvcv: the orthographic structure
 * p-cvcv: the phonological structure
@@ -139,6 +139,13 @@ The meanings of the attributes of this object are as follow:
 * orthrenv: reverse orthographic form
 * phonrenv: reversed phonological form
 * orthosyll: syllable orthographic form
+* cgramortho: the different grammatical category for a given orthographic representation
+* deflem: the percentage of people who said they knew the lemma of the word
+* defobs: the size of the sample from which 'deflem' is derived
+* old20:  orthographic Levenshtein Distance
+* pld20: phonological Levenshtein Distance
+* morphoder: inflectional morphology
+* nbmorph: the number of morphemes directly computed from 'morphoder'
 
 
 You can find all the relevant information in the `official documentation of Lexique383`_

diff --git a/pylexique/__init__.py b/pylexique/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """SekouDiaoNlp"""
 __email__ = 'diao.sekou.nlp@gmail.com'
-__version__ = '1.3.4'
+__version__ = '1.3.5'
 __copyright__ = "Copyright (c) 2021, SekouDiaoNlp"
 __credits__ = ("Lexique383",)
 __license__ = "MIT"

diff --git a/pylexique/pylexique.py b/pylexique/pylexique.py
@@ -138,7 +138,7 @@ def __init__(self, lexique_path: Optional[str] = None, parser_type: str = 'std_c
             try:
                 self._parse_lexique(self.lexique_path, parser_type)
             except UnicodeDecodeError as e:
-                raise UnicodeDecodeError(f"There was a unicode error while parsing {type(lexique_path)}.") from e
+                raise UnicodeError(f"There was a unicode error while parsing {type(lexique_path)}.") from e
             except FileNotFoundError as e:
                 if isinstance(lexique_path, str):
                     raise ValueError(f"Argument 'lexique_path' must be a valid path to Lexique383") from e
@@ -149,18 +149,18 @@ def __init__(self, lexique_path: Optional[str] = None, parser_type: str = 'std_c
                 # Tries to load the pre-shipped Lexique38X if no path file to the lexicon is provided.
                 self._parse_lexique(_RESOURCE_PATH_csv, parser_type)
             except UnicodeDecodeError as e:
-                raise UnicodeDecodeError(f"There was a unicode error while parsing {type(_RESOURCE_PATH_csv)}.") from e
+                raise UnicodeError(f"There was a unicode error while parsing {type(_RESOURCE_PATH_csv)}.") from e
             except FileNotFoundError as e:
                 if isinstance(_RESOURCE_PATH_csv, str):
                     raise ValueError(f"Argument 'lexique_path' must be a valid path to Lexique383") from e
                 if not isinstance(_RESOURCE_PATH_csv, str):
                     raise TypeError(f"Argument 'lexique_path'must be of type String, not {type(_RESOURCE_PATH_csv)}") from e
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '{0}.{1}'.format(__name__, self.__class__.__name__)
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self.lexique)
 
     @staticmethod
@@ -177,7 +177,7 @@ def _parse_csv(lexique_path: str) -> Generator[list, Any, None]:
             content = (row.strip().split('\t') for row in raw_content[1:])
             return content
 
-    def _parse_lexique(self, lexique_path: str, parser_type: str) -> None:
+    def _parse_lexique(self, lexique_path: Optional[str], parser_type: str) -> None:
         """
         | Parses the given lexique file and creates 2 hash tables to store the data.
 
@@ -298,14 +298,14 @@ def get_lex(self, words: Union[Tuple[str], str]) -> OrderedDict:
         results = OrderedDict()
         if isinstance(words, str):
             try:
-                results[words] = self.lexique[words]
+                results[words] = self.lexique[words.lower()]
             except AttributeError:
                 logger.warning('the word {} is not in Lexique383'.format(words))
         elif isinstance(words, Sequence):
             for word in words:
                 if isinstance(word, str):
                     try:
-                        results[word] = self.lexique[word]
+                        results[word] = self.lexique[word.lower()]
                     except AttributeError:
                         logger.warning('The word {} is not in Lexique383\n'.format(word))
                         continue
@@ -326,7 +326,7 @@ def get_all_forms(self, word: str) -> List[LexItem]:
             List of LexItem objects sharing the same root lemma.
         """
         try:
-            lex_entry = self.lexique[word]
+            lex_entry = self.lexique[word.lower()]
         except ValueError as e:
             logger.warning('The word {} is not in Lexique383\n'.format(word))
             raise ValueError from e

diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -10,5 +10,7 @@ twine
 joblib
 pytest
 pytest-runner
+bandit
+mypy
 Click>=7.1
 dataclasses>=0.6; python_version < '3.7'
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.4
+current_version = 1.3.5
 commit = True
 tag = True
 

diff --git a/setup.py b/setup.py
@@ -65,6 +65,6 @@
     test_suite='tests',
     tests_require=test_requirements,
     url='https://github.com/SekouDiaoNlp/pylexique',
-    version='1.3.4',
+    version='1.3.5',
     zip_safe=False,
 )