From df911c8856d5375a1c57ea6a854542babcaef1d0 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Sun, 24 Sep 2023 09:15:38 -0400 Subject: [PATCH 01/10] bump black version --- pyproject.toml | 2 +- styling_requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b82d01bb..44d1fe5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ target-version = ['py38', 'py39', 'py310', 'py311'] line-length = 120 skip-string-normalization = true -required-version = "23.3.0" +required-version = "23.9.1" extend-exclude = "/(slides)/" [tool.pylint.ini_options] diff --git a/styling_requirements.txt b/styling_requirements.txt index 05a7d8a6..8a4776f6 100644 --- a/styling_requirements.txt +++ b/styling_requirements.txt @@ -1,4 +1,4 @@ -black==23.3.0 +black==23.9.1 pylint==2.16.2 pylint-django[with_django]==2.5.3 pre-commit==3.3.3 \ No newline at end of file From d50ddd5f8b546123daab84f7ff18adb765f8b0ee Mon Sep 17 00:00:00 2001 From: David Beauchemin Date: Sun, 24 Sep 2023 09:59:33 -0400 Subject: [PATCH 02/10] Improve documentation (#211) * fix error in documentation and improve it * add api.rst documentation file --- README.md | 10 +-- deepparse/parser/address_parser.py | 2 +- deepparse/pre_processing/address_cleaner.py | 14 ++--- docs/source/api.rst | 68 +++++++++++++++++++++ docs/source/cli.rst | 3 +- docs/source/conf.py | 2 +- docs/source/index.rst | 24 +++++--- docs/source/pre_processor.rst | 10 +-- 8 files changed, 105 insertions(+), 28 deletions(-) create mode 100644 docs/source/api.rst diff --git a/README.md b/README.md index 093a3db9..eb379caf 100644 --- a/README.md +++ b/README.md @@ -224,14 +224,14 @@ address_parser = AddressParser( address_parser("350 rue des Lilas Ouest Québec Québec G1L 1B6") ``` -### Parse Address With Our Out-Of-The-Box FastAPI Parse Model +### Parse Address With Our Out-Of-The-Box API -You can use Out-Of-The-Box RESTAPI to parse addresses: +We also offer an out-of-the-box RESTAPI to parse addresses using FastAPI. #### Installation: First, ensure that you have Docker Engine and Docker Compose installed on your machine. -if not, you can install them using the following documentations in the following order: +If not, you can install them using the following documentations in the following order: 1. [Docker Engine](https://docs.docker.com/engine/install/) 2. [Docker Compose](https://docs.docker.com/compose/install/linux/#install-using-the-repository) @@ -246,14 +246,14 @@ docker compose up app #### Sentry: Also, you can monitor your application usage with [Sentry](https://sentry.io) by setting the environment variable `SENTRY_DSN` to your Sentry's project -DSN. There is an example of the .env file in the project's root named `.env_example`. You can copy it using the following command: +DSN. There is an example of the `.env` file in the project's root named `.env_example`. You can copy it using the following command: ```sh cp .env_example .env ``` #### Request Examples: -Once the application is up and running and port 8000 is exported on your localhost, you can send a request with one +Once the application is up and running and port `8000` is exported on your localhost, you can send a request with one of the following methods: ##### cURL POST request: diff --git a/deepparse/parser/address_parser.py b/deepparse/parser/address_parser.py index fe70e9c8..2dc48c3d 100644 --- a/deepparse/parser/address_parser.py +++ b/deepparse/parser/address_parser.py @@ -348,7 +348,7 @@ def __call__( replaced as ``'3 305'`` for the parsing. Where ``'3'`` is the unit, and ``'305'`` is the street number. We use a regular expression to replace alphanumerical characters separated by a hyphen at the start of the string. We do so since some cities use hyphens in their names. The default - is ``False``. If True, it adds the :func:`~deepparse.pre_processing.pre_processor.hyphen_cleaning` + is ``False``. If True, it adds the :func:`~deepparse.pre_processing.address_cleaner.hyphen_cleaning` pre-processor **at the end** of the pre-processor list to apply. pre_processors (Union[None, List[Callable]]): A list of functions (callable) to apply pre-processing on all the addresses to parse before parsing. See :ref:`pre_processor_label` for examples of diff --git a/deepparse/pre_processing/address_cleaner.py b/deepparse/pre_processing/address_cleaner.py index b4a180c8..b7ad962a 100644 --- a/deepparse/pre_processing/address_cleaner.py +++ b/deepparse/pre_processing/address_cleaner.py @@ -3,7 +3,7 @@ def double_whitespaces_cleaning(address: str) -> str: """ - Pre-processor to remove double whitespace by one whitespace. + Pre-processor to remove double whitespace (``" "``) by one whitespace (``" "``). The regular expression use to clean multiple whitespaces is the following ``" {2,}"``. Args: @@ -17,10 +17,10 @@ def double_whitespaces_cleaning(address: str) -> str: def trailing_whitespace_cleaning(address: str) -> str: """ - Pre-processor to remove trailing whitespace. + Pre-processor to remove trailing whitespace (``" "``). Args: - address: The address to apply trailing whitespace cleaning on. + address: The address to apply trailing whitespace (``" "``) cleaning on. Return: The trailing whitespace cleaned address. @@ -64,16 +64,16 @@ def hyphen_cleaning(address: str) -> str: """ Pre-processor to clean hyphen between the street number and unit in an address. Since some addresses use the hyphen to split the unit and street address, we replace the hyphen with whitespaces to allow a - proper splitting of the address. For example, the proper parsing of the address 3-305 street name is - Unit: 3, StreetNumber: 305, StreetName: street name. + proper splitting of the address. For example, the proper parsing of the address ``"3-305 street name"`` is + ``"Unit": "3", "StreetNumber": "305", "StreetName": "street name"``. See `issue 137 `_ for more details. The regular expression use to clean hyphen is the following ``"^([0-9]*[a-z]?)-([0-9]*[a-z]?) "``. The first group is the unit, and the second is the street number. Both include letters since they can include - letters in some countries. For example, unit 3a or address 305a. + letters in some countries. For example, ``unit 3a`` or address ``305a``. - Note: the hyphen is also used in some cities' names, such as Saint-Jean; thus, we use regex to detect + Note: the hyphen is also used in some cities' names, such as ``"Saint-Jean"``; thus, we use regex to detect the proper hyphen to replace. Args: diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 00000000..c39ff937 --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,68 @@ +.. role:: hidden + :class: hidden-section + +Parse Address With Our Out-Of-The-Box API +========================================= + +We also offer an out-of-the-box RESTAPI to parse addresses using FastAPI. + +Installation +************ + +First, ensure that you have Docker Engine and Docker Compose installed on your machine. +If not, you can install them using the following documentations in the following order: + +1. `Docker Engine `_ +2. `Docker Compose `_ + +Once you have Docker Engine and Docker Compose installed, you can run the following command to start the FastAPI application: + +.. code-block:: sh + + docker compose up app + +Sentry +****** + +Also, you can monitor your application usage with `Sentry `_ by setting the environment variable ``SENTRY_DSN`` to your Sentry's project +DSN. There is an example of the ``.env`` file in the project's root named ``.env_example``. You can copy it using the following command: + +.. code-block:: sh + + cp .env_example .env + +Request Examples +---------------- + +Once the application is up and running and port ``8000`` is exported on your localhost, you can send a request with one +of the following methods: + +cURL POST request +~~~~~~~~~~~~~~~~~ + +.. code-block:: shell + + curl -X POST --location "http://127.0.0.1:8000/parse/bpemb-attention" --http1.1 \ + -H "Host: 127.0.0.1:8000" \ + -H "Content-Type: application/json" \ + -d "[ + {\"raw\": \"350 rue des Lilas Ouest Quebec city Quebec G1L 1B6\"}, + {\"raw\": \"2325 Rue de l'Université, Québec, QC G1V 0A6\"} + ]" + +Python POST request +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + import requests + + url = 'http://localhost:8000/parse/bpemb' + addresses = [ + {"raw": "350 rue des Lilas Ouest Quebec city Quebec G1L 1B6"}, + {"raw": "2325 Rue de l'Université, Québec, QC G1V 0A6"} + ] + + response = requests.post(url, json=addresses) + parsed_addresses = response.json() + print(parsed_addresses) \ No newline at end of file diff --git a/docs/source/cli.rst b/docs/source/cli.rst index c4ae4e3a..60f52be0 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -106,6 +106,7 @@ We do not handle the ``seq2seq_params`` fine-tuning argument for now. Test **** + This command allows a user to test the ``base_parsing_model`` (or the retrained one using the ``--path_to_retrained_model``) on the ``train_dataset_path`` dataset. For the testing, the CSV or Pickle dataset is loader in a specific dataloader (see @@ -136,4 +137,4 @@ Command to pre-download model weights and requirements. Here is the list of argu - ``model_type``: The parsing module to download. The possible choice are ``'fasttext'``, ``'fasttext-attention'``, ``'fasttext-light'``, ``'bpemb'`` and ``'bpemb-attention'``. - ``--saving_cache_dir``: To change the default saving cache directory (default to ``None``, e.g. default path). -.. autofunction:: deepparse.cli.download.main +.. autofunction:: deepparse.cli.download_model.main diff --git a/docs/source/conf.py b/docs/source/conf.py index 41bdbe4f..430d87e6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,7 +73,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/source/index.rst b/docs/source/index.rst index 9a399aa5..2577b003 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -653,32 +653,39 @@ class name) when reloading it. address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8, name_of_the_retrain_parser="MyNewParser") -Parse Address With Our Out-Of-The-Box FastAPI Parse Model -********************************************************* -You can use Out-Of-The-Box RESTAPI to parse addresses: +Parse Address With Our Out-Of-The-Box API +***************************************** +We also offer an out-of-the-box RESTAPI to parse addresses using FastAPI. Installation ------------ First, ensure that you have Docker Engine and Docker Compose installed on your machine. -if not, you can install them using the following documentations in the following order: +If not, you can install them using the following documentations in the following order: 1. `Docker Engine `_ - 2. `Docker Compose `_ -Also, you can monitor your application usage with `Sentry `_ by setting the environment variable SENTRY_DSN to your Sentry's project DSN. There is an example of the .env file in the root of the project named .env_example. - Once you have Docker Engine and Docker Compose installed, you can run the following command to start the FastAPI application: .. code-block:: sh docker compose up app +Sentry +****** + +Also, you can monitor your application usage with `Sentry `_ by setting the environment variable ``SENTRY_DSN`` to your Sentry's project +DSN. There is an example of the ``.env`` file in the project's root named ``.env_example``. You can copy it using the following command: + +.. code-block:: sh + + cp .env_example .env + Request Examples ---------------- -Once the application is up and running and port 8000 is exported on your localhost, you can send a request with one +Once the application is up and running and port ``8000`` is exported on your localhost, you can send a request with one of the following methods: cURL POST request @@ -829,6 +836,7 @@ API Reference dataset_container comparer cli + api .. toctree:: :glob: diff --git a/docs/source/pre_processor.rst b/docs/source/pre_processor.rst index 49fc2641..ebb8fdcc 100644 --- a/docs/source/pre_processor.rst +++ b/docs/source/pre_processor.rst @@ -9,8 +9,8 @@ Pre-Processors Here are the available pre-processor in Deepparse. The first four are used as default settings when parsing addresses. -.. autofunction:: deepparse.pre_processing.pre_processor.coma_cleaning -.. autofunction:: deepparse.pre_processing.pre_processor.lower_cleaning -.. autofunction:: deepparse.pre_processing.pre_processor.trailing_whitespace_cleaning -.. autofunction:: deepparse.pre_processing.pre_processor.double_whitespaces_cleaning -.. autofunction:: deepparse.pre_processing.pre_processor.hyphen_cleaning \ No newline at end of file +.. autofunction:: deepparse.pre_processing.address_cleaner.coma_cleaning +.. autofunction:: deepparse.pre_processing.address_cleaner.lower_cleaning +.. autofunction:: deepparse.pre_processing.address_cleaner.trailing_whitespace_cleaning +.. autofunction:: deepparse.pre_processing.address_cleaner.double_whitespaces_cleaning +.. autofunction:: deepparse.pre_processing.address_cleaner.hyphen_cleaning \ No newline at end of file From 13eca91e0878e79d6950e63c60b2bf46eb6e89a0 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Sun, 24 Sep 2023 10:02:00 -0400 Subject: [PATCH 03/10] update chabngelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb22812e..1cd0153c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -338,4 +338,6 @@ - Add a Dockerfile and a `docker-compose.yml` to build a Docker container for the API. - Bug-fix the default pre-processors that were not all apply but only the last one. -## dev \ No newline at end of file +## dev + +- Improve documentation \ No newline at end of file From 3e26a4241f7700fa4a054ebcc4c2aeb3029cd294 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Fri, 6 Oct 2023 13:30:34 -0400 Subject: [PATCH 04/10] fix disk memory usage problem with some github actions --- .github/workflows/docker.yml | 9 +++++++++ .github/workflows/tests.yml | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 542da21c..d4830b8f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -14,6 +14,15 @@ jobs: runs-on: ubuntu-latest steps: + # Appears that we get disk memory space problem, thus as recommended by this + # thread (https://github.com/actions/runner-images/issues/2840#issuecomment-790492173) + # we clean the runner before starting the tests to free some spaces. + - name: Remove unnecessary files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" - uses: actions/checkout@v3 - name: Build the Docker image run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 030faa11..f69aac2d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,6 +10,15 @@ jobs: python-version: [ "3.8", "3.9", "3.10", "3.11" ] steps: + # Appears that we get disk memory space problem, thus as recommended by this + # thread (https://github.com/actions/runner-images/issues/2840#issuecomment-790492173) + # we clean the runner before starting the tests to free some spaces. + - name: Remove unnecessary files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 From 9d2426eae531f6cb35887979f67fd3c08ef90385 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Fri, 6 Oct 2023 13:46:21 -0400 Subject: [PATCH 05/10] add disk space cleaning for disk space errors --- .github/workflows/tests.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f69aac2d..16a7dd6b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,6 +38,15 @@ jobs: python-version: [ "3.8", "3.9", "3.10", "3.11" ] steps: + # Appears that we get disk memory space problem, thus as recommended by this + # thread (https://github.com/actions/runner-images/issues/2840#issuecomment-790492173) + # we clean the runner before starting the tests to free some spaces. + - name: Remove unnecessary files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 From 2800d57ba081530cc760d0eb85ce2d09d726feac Mon Sep 17 00:00:00 2001 From: davebulaval Date: Fri, 6 Oct 2023 13:56:00 -0400 Subject: [PATCH 06/10] delete windows cleaning since use unix command --- .github/workflows/tests.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 16a7dd6b..f69aac2d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,15 +38,6 @@ jobs: python-version: [ "3.8", "3.9", "3.10", "3.11" ] steps: - # Appears that we get disk memory space problem, thus as recommended by this - # thread (https://github.com/actions/runner-images/issues/2840#issuecomment-790492173) - # we clean the runner before starting the tests to free some spaces. - - name: Remove unnecessary files - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 From adcbe41a4293ca98ed5a836e22d80e8b08ce5c84 Mon Sep 17 00:00:00 2001 From: David Beauchemin Date: Fri, 6 Oct 2023 16:22:13 -0400 Subject: [PATCH 07/10] Add Training Guides (#212) and Improve Doc * fix error in documentation and improve it * add api.rst documentation file * first draft of training_guidelines * removed installation and getting started from index to specific files for easier redability * added training guide & fixed warnings * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * Update docs/source/training_guide.rst * fix header error * clean spacing * clean spacing README * added details about the data * changed countries names to english * Update docs/source/training_guide.rst Co-authored-by: David Beauchemin * Update docs/source/training_guide.rst Co-authored-by: David Beauchemin * formatting - removed blanck line * Update docs/source/training_guide.rst --------- Co-authored-by: Marouane Yassine Co-authored-by: Marouane Yassine <46830666+MAYAS3@users.noreply.github.com> --- README.md | 22 +- docs/source/_static/img/labeled_addresses.png | Bin 0 -> 54978 bytes docs/source/dataset_container.rst | 2 + docs/source/examples/fine_tuning.rst | 2 + docs/source/get_started/get_started.rst | 147 +++++++++++ docs/source/index.rst | 249 ++---------------- docs/source/install/installation.rst | 46 ++++ docs/source/install/quick-start.html | 148 +++++++++++ docs/source/parser.rst | 2 +- docs/source/training_guide.rst | 99 +++++++ 10 files changed, 479 insertions(+), 238 deletions(-) create mode 100644 docs/source/_static/img/labeled_addresses.png create mode 100644 docs/source/get_started/get_started.rst create mode 100644 docs/source/install/installation.rst create mode 100644 docs/source/install/quick-start.html create mode 100644 docs/source/training_guide.rst diff --git a/README.md b/README.md index eb379caf..0b78e6a7 100644 --- a/README.md +++ b/README.md @@ -244,19 +244,20 @@ Once you have Docker Engine and Docker Compose installed, you can run the follow docker compose up app ``` -#### Sentry: +#### Sentry + Also, you can monitor your application usage with [Sentry](https://sentry.io) by setting the environment variable `SENTRY_DSN` to your Sentry's project DSN. There is an example of the `.env` file in the project's root named `.env_example`. You can copy it using the following command: ```sh cp .env_example .env ``` -#### Request Examples: +#### Request Examples Once the application is up and running and port `8000` is exported on your localhost, you can send a request with one of the following methods: -##### cURL POST request: +##### cURL POST request ```sh curl -X POST --location "http://127.0.0.1:8000/parse/bpemb-attention" --http1.1 \ -H "Host: 127.0.0.1:8000" \ @@ -267,7 +268,7 @@ curl -X POST --location "http://127.0.0.1:8000/parse/bpemb-attention" --http1.1 ]" ``` -##### Python POST request: +##### Python POST request ```python import requests @@ -395,20 +396,27 @@ Starting at version 0.9.8, we will also release the weights with the GitHub rele Before installing deepparse, you must have the latest version of [PyTorch](https://pytorch.org/) in your environment. -- **Install the stable version of deepparse:** +- **Install the stable version of Deepparse:** ```sh pip install deepparse ``` -- **Install the stable version of deepparse with the app extra dependencies:** +- **Install the stable version of Deepparse with the app extra dependencies:** ```sh pip install deepparse[app] # for bash terminal pip install 'deepparse[app]' # for ZSH terminal ``` -- **Install the latest development version of deepparse:** +- **Install the stable version of Deepparse with all extra dependencies:** + +```sh +pip install deepparse[all] # for bash terminal +pip install 'deepparse[all]' # for ZSH terminal +``` + +- **Install the latest development version of Deepparse:** ```sh pip install -U git+https://github.com/GRAAL-Research/deepparse.git@dev diff --git a/docs/source/_static/img/labeled_addresses.png b/docs/source/_static/img/labeled_addresses.png new file mode 100644 index 0000000000000000000000000000000000000000..047ce6c9297c07c52ff102dd0666a88abc8e3740 GIT binary patch literal 54978 zcmeFZWmp_dw>F9mE+JTe!9wsL!3Tl|3!30g0t9!r;1+@emjQ<0!QFy8gkkXD&fuZg+#)TTnF?6ckAm#dlJgUZ@9ISYE)srGA}?zk$`eqq>5a zlfK7o*R!-I8qX!2ENuHLLkD{IlKL@`6$^yx%(6l>obx}D?|5rDfU^E(ziav*`f69YA zuva}Zsti};A=6*j{)x)>S-+LDV&%VUk(^`a2}}`7&vl=h;4{x++9m#IL*l1&U&%u^ zxcS;H{+qykHD|;Mzp~BvctEpAos4Q1YZLjG2xaXZ1mARSolO%?co(1wh(WEq8H-^V zPNIT6%XG{F5m&&g2<4wED>UV{P4Q z6%nAcLyXE?T-BSTj}+4jnzjyixMJ<{6DO0k{9kP+o^YPR6B+D|!0+xE6&hz1g;`X~ zUcSYBL?`O{LQjpo=B-L}=ft((-ZJyA3X8D;+l9&Uo!Dt9R?ZC^+ zV}kG`WYYYE7?$xS?2VrbDy!LbojO${^A&mw&H*vZ*7OF=!zv^+Xx`tEW=A}%Io!Mu z2|VBsCW?)ji>YFp$@$|F%!LjzGWsN{KA$5=evw*hh!@RP9XS(Lv&HH#)hlAm_;hPM zivhR0t-IK!VL-#LO=)2DylsGybeKVdI&FbKA5tpGqe~w+!tlXp zZg2Jonh60g{1tmg7?RSU1f9g+!QtiPu^Hoq^vOr-3t3ex^@bfwwrFD|7WOHTUj+PU z+Do}A3ic2Q9>Ph$e2|u=&8_gJ`ih#|%sA+#rWHqiUO;c+8}mH|Wa*m#XS$&}iV&w4 z#oC0}_{D_bNpdsh1kE?F4MKwxKUSwh^**KoK&j;>`_8Ik@x$6Qdn=eQ(tYXeQb}ke z0&>bOFLU*LQ0hZ7yUAGK>o$KX8KO?l6W{@UjR5_^d2;FURG;^~dMhu;(#OI5IG9HI zoqyui23UoD<*H=SUE+#CSHv0k^P2dU)@K0c)|jHgk?&KH;AG;}^pBXG3f2aKJhV@x z_<*`V+tbD8v?}}{1D}^l#veF&YGtr=0S&JM_$Zv}ZKeY8*3uFLbzWZq zLV?UHBe_ghycB8}oL2h9eACAse5ovK);Dxz8Ce# zKoAk#F%VfH6OE4WAaHK}5?B};VIrBaCqmJVLZj1T^%ut{kbiig{6nmeRkHY6L0o9KL?Ggq;1yO( zMEm>oY=w4UT%%gb^@W?;URBjIR8>u42ovdnK9@*=&xp@ z&5fTJvuihIPEB}}6hEHe6+b-%Izam5^s{jE<0ZlyA?Ta7`n`{f3fqt;zfFyYXeqMq zn7c21X&)25ZC#tu)RdIFv9ASB@W zoEkw^ZAW3`jk_q5h}Adx7{s&ivq1J^v{#Guk6HPgfj*XB*b*@toH#A6r)cy=MSSL^ zPh3#z@ie@;)g_!7>H2+=;XRzp&crIdMIwFk0JL;+SU?asLz(Kw+h~8eZ{(G3iHX%O z4O73q6rTz9;vRIS#ARo+Qmgy0uSB@Cu_^Y^P`|NvDrJ%cpb%iqG8x>?KG;0rKP`NR z798`9kq^7RvxAb3Bm3AVrPSmp54convhGqp!Yt2kvM=2qMlYPyqI&K4Rra&em6WKT zQBTyn;`WgrMi~9f6J(?O?dK&zOUYMCVrD zz`@Fe``qCp8;SaxJp(VDKh_2>HND4MG^i3m>IVSzN5AXwaiSm8pRYJ%+t7?ZXDnJ{ zi?6N5*g5RrV^z5VvBvHFnO$busD=OxM7g`_)PIdND@SLGbInt1>V^Rlt?~+ee7nK) zMT!li_#g5Wm?J#s?FcKoovW}gGHG8@N`nY)bSa9zZ;0-X4x$QKbGev)s^ zO3fb<>315~C>niRs*JEY2%^VQi9L)x!Pu-ZzjF&{?cCs@R7KH?XrbR9+n&lSsak$j zks>)ZUvpxj`(mmxaaDY9fd6sx866f?s>|aJ}Q8kj$67o?1MeY zH*OS_YKp_({LoMquNU- zH=(LD>Axkj!yqBE4knU14`uS32urC01#UMg{?dVAHJvJ7Ocdgq+GV>P2!?0@O{2>r7o__f280M0g2PuTR z-K*=jQRkM#0fNqpQ&vP=x|G{Cdtsx0xF7h%H=hmW8(nLbZAm1wkk*>Cj8WlrNkJR8 zhOUM_XsuWnnYL@bzK+|T$!wiUw-o57$nyT1pLSHlpWdjbGaVDMZ8+O&175Y5LSqz6W;KHAR^Xc26<1|sa8!Kv1u&n1#_1hxYpdTR@h|*o$nV4#K>|9D2+R48 z1B(N7poMS-F4N9PLs1o$d=j_RqA1 zTsp8x+RFtsHV^3hSeu91LOWXNL|?7c)792%uV=&Kn^ilE9Ssilr)HxKU&j_Zy4g;E zvgKGVF!G3y&XgiFpOE^0G%|<#N9DXSltn54=PMbj&cOG=g7#OAaOL-Z%OquxV<{t% za%2zmJk{&oel&rGv)6|(idr77GPg$C>j!P@E?GhpjU0zkLx1h1QzC{4URU3 z4>?Q#b+(0ZpopIEt%W~l*69*6uXB9YQgnj~S*u9WvAW<8p-G|Kk7eJ&hn=gDErER{ z6^^Ueqe$A!r@;+CHy{TbfGvpoky=tF(~a^NV%x_HS<6wko-qdVfgpQG{GuBq0ytuZ zsdltS%n)5H=^9`H?yJ9GegK>QGs9-r?YejoKH|Adu#dmVoBJU#lY}eC0TUJVdpPgrJnkyXCC?RWKt(3Nr+L~B$Gr=tnG&Y`3+)YIB=cQ*k)NCT3 zZ%g)pE7Ofwb?E`$AHMK#xb|E?OKL@Po#<3rmGz?B%&!+1_|A2E3|7R*BpZYTevQf( z%XsA4Xo0M(PXYfPm1{!NmoZd>OiW+xtPFog^|#Z&HAsK|l}lE(q6e2>BPg#)wWDu} z;Nol|g{AoFthzA3ATh|U7R{Uegz$C~av6qYN@&%n%?>|!M=Fkf6+ZV#>ILp~*GJP@ zv_q{R``xCmzSg$t_X;Ip`jQm}*{!-HoVQ01O)K%LiIz-?BP?&-iol`%q(~#bPaPt$#1L!Yh@WYS6g%ff^ zoR-}hE{IRP(kZPWM)DT{eqF~cT>Ipb&KmwZV=s!U;y+yct#;@l9Qnl(3*nlvaZ!^k z&y5KvLVj;`bpAI^w+w!)NCcOluemf4^N$~+&3iYIZ}xKHHujA2pxMx;c`$nfZkSRn z;;D6NsIx9a2EVJe>-kJ};1XTYc`)T92qXx}Z3BIAn_B@TVhbWydlz;m8q4~qpY^}# z3~_fG#Bhj;SP;SzfE1QY4{z;1Ka3p55rZ0qac{RY+x|he3f}^-24?p$uCUY}K~jVn zdq4O*F@9rpsWSW&5}(tr610M=IBzkWx>Zl>z}5%r^v96XQ+xl~;^3 zIjuBq?V|qp=c)#~6`s&R$|ioC-P2y!^BK5NIE&!VO-V*876LG9S(W)|}B{vC9Q ziih@FZWqrmXQ92q8Z}uhTB?ycprNo%3S#npoZR2#t;^q|vTwAuAM5pkt|~gSu_0;> z{T9!DqFcB2bV%81`PQrL+_X4}z9mGwI`q}WF^}jhC`k+8*9-b!n0p+~WdD`$VthLT zLQ@q5p(&4bdO7$h(Heg6O|$H1pdLfU6g3UMD5!m*`h`{ZF*-#srdw4=!tLe! zAYuO8R?&FB$!Gh4H@e|9_)}lodWRWA0ST&e#6a8SX_lAz^!B3~tKwXVu`E5hy4V*F zmj3nf9#%%HIn`)qOi(OPIaZ+f?Sv8xYHC}E7O*WsAT4&XPM}L=-u8)fhf&EHaF09C z{j3QXY@}qa;O9h?I}Aqg>>M~Tfo)c<>}iT+2-igyW`WXy0(}AD*PmFe&Pu}HS5r~_ zYG#CH#_JP@F&%yH zA3An63<769;!*b(v|ZBwYIfEmMJClf5Of*G{_6k3!?t;!KeXy|3PC zJ!7$@#)eEsGP~s$pjCv)uB&uGy2n_*j4n3W9%UMlUDyZuX5Ve2T0H@yzrM%T!8;V1QD!m|WElBwMzkMqT6fZ>J^D|hX`D0#SdNTiY;eagdUDb4&D45iH z?NCk&<3Rt=uP56m!slDRgmpv@E%6;^2vi*ZH2)#vE4I_8JC+7xu|}?vHn>yjwU|Gp z4R%SJB7gBahet`aGTtRuAih;9s@lQSpd(CVD|vrt(lR$s-k84NKp5LGJ&6@=9;F|x z`K7lWO_8(GTgq(otq9n*MEcw=kY3cYWs+G50BnlqpnBF@CdqrjX2j%%VehGNU4u9q z(GUvUey4gZyNsO;HWaYxD&NBPnYy4@n`>pW-zv{5$mmz zrciC0ShMO_3DN{Y`gG|X0T+~w{{mF?_W-%nni}mgB|3lGBE{VQpjHp)S;{bF`9CgY z6>U?u#L5!@5);??B7so-9fK+A|2Uh2Tt0R}^*~DmjRnT2?Zvh9bwew-c_xU%u^OIv zUh3B$$YY!t|CN^g80hUltV??#G@fB$Nd4GmUR&WfEb9Ty_?dD}ZiI^Y1cf|(xFtin zkKJ!V*qOmJv7x~adT7J?#8=-(OoF^P-xhC}0&_5q!!}Yvlkz0gMbgzfQ0Hyr9`JhF z_$52MuZd5qotWG{=3#pVC6jT7e$Wo_9XQ3}A-r(Z`zZIDD*vg5p&TqT?@jvW%pfiQ zf#?D&x|RxFQ28}aLw`1|tBitl7sI`haKg{rhb+I1G{^QTCvpv8U8UzUx0dF5DH2*& z7;4x-up?Ahu%X@riluP7FKmqR@#(^k_Xui?%1O&^Y#vC`TFKc?8PAv=vx+*mA4(doSCq--!eBvMHUbVkTHyd@QiGtg2nneFW9KT?CBWlU?V=j3! z4<*DTcEspJWM{>xjPST;1=Prs3w!`fTO<;p;mTu6&? z#!Qs9gE{n_uR%yj&%m*+O|@ zllI7xS4KSXam3I`((i){KJy5h>|3jHibT|0>Gf#h+0U==y!T==u3Xh>IubV$@(DpT z7GebrfVRq4Usb@hunnO$)L%N+#!n`m*19;fVM;L7r+~(1DFL4N51nGB4l-#eSmk3^bGU= z$tm7&i|q&<%l%sOQd~*YonSX)4c7~!LM3T(ac9QxhJnVHV+LUpI~JV7`efKmwyL|1 zB^}ZNIzqAR8h{uzO32u7G5ql$qFNLkPEmyWH+|Wd`Y=y4QLw^sLhu&Fg}I>5WL54) zNTFwB72^fL{0}8ia;V-yy0+Fhp>;@quQlZK1OTScX?V3Z4z#2Wk+s!G36@~-zwT`| zisIn3sB=|%K~A?vYX?xCXC(zhs^Ko03=Lht*XCu{_a+1$xJ z=SlV!Yu;j9NpFvlwL5uDN*xg(rE1c$>%EfUI&|tQIN;dj!ocS&CNmJeK^|`A^;JCZ zM-m@)VUUNz4EiB33oECDu=`RBSsm4jmd)!UgQ&AEIdIoVBNMhCeN;gU2Mbf z=h=HJi2=lK`I~2#!$H%Fc~_i(mf4QL>;fw)*}KAMWh7iX;hCKKs4HUHCqpdjyFx zNSu8JTmP-TLw|}cxv(K0c=kv|_Y~vw;RWGw*CQm#n6BhCfxi?T(=t|T2)Un1?+ylvEi z(j0y8aZH@ht3Q9CO$Q6>etTxax7hYpstS}sA@ilO(y=j!Kfjar^4ws(URZ<9_Ub|d z16{Aw1Ngm5#@Hf~c^1SJIy_zWkWShs{~fexdi#_JkSWj*Wg(EfXkp`ONyYHCB5-7* z>EbPJc$cVid)eXCZ3@*T5U(RYRVD-B`eOp8pt>$WRYvOYh^RG~ya3aTr>*QWoyJoJ zL{L$lQ<9DG@iVy4b-#;lb)fgQ4K$-eG&SHPQ5U(F{i~y7$x8Y%KuH%%b0(}x!Vib) ziH&C6<)Vyr%-RV>f$q?$I95ms-WXylIR-yCJX zIlIQ}!=py#602A)!ZkMdFSQOe#rBG_6YJMEe6brNQrBqG(5KW6FG_N#s9PNT6)ss+ z#{^?*b~D_$tQ~SPeP|A6A6oG8<1XgW+*bH!M0$!Kw?5qJ^Zc+j4RPxl#vRH>8bPcW z9BWq$w+JCAcZGdBGih%J=!^$+%VwYktmA(jd~`U*Bi2r5liGi)2Qwx7qn$drfD8|~ zCB7MN0P`)0h)fgl55-}Mz3?%8BFusav8p&O40FH_x`itMM(~C(t~V|%{Np+362Z)_ zSq2nIIwWg2@%l;F0&iH`s0hQ^s_^;yKzPw-%Z5cYbp@3TkLV5&JS9csSvO!{bTqw= z=oY>Iulq2b;XTX@Cu4whS%XbA_Tlt8F|Y#DJ@FBB5myR664im)d?bZvyNkSM>3PY6 z3Psti@F)fr5QWA<+&DGY#M#8(i#+>9px=bqH=zGIAbNvxp~N+eXAeYv<_sm&@!vee zozORP(@`)|(v`r(8?Pu#l3MBl8N@Z!(V;{K^AlL(QoBO4aV`^d%={x`akQW*PGb4s zZDD_+*e47{nmU+`C6h%jR!@MNq!;!@(CzAzq`lGeIvyb>==weKJE!qpj-1qj>%e}h z#Wa1TRi{p{I&DY$C3!f}thM1Y{d7&1N=($kt#mK=g0`sCKTpcS3S0!llip3U23kxC z>>DQ7NaB2j8840Hi%){SlJXxrP~|8cS;aDxV-iLTRZ+V|5SThEUfO}h`8AC&C0m0iNg-( z?sexI1^w__VPlkS%!kSzWF$(kHH-wS%l*WvK{?-3QS{o+@1A4L@q$Ttu)`JXw;bMg zOZppd9^5eJT7M3JHvWaxGlmtnM_aF$d|tZz(H-W*WLLHX* zTtSR|)%>riQ8z3iQnZ}H9mLcATtbN(Ps|5|6Z)ck$5Mhybk{=^5jo8YRIkc2LOi9> z^cPEoeGaH5)=!pttKCG|#Ir-3F#e#InB+%Jp5q)bPkx%Jf3c?OM0Yth2=9J-wqr|C zQmt_FW^1omr^DRmar%p%iI)Ib=^gRK#3DZ_?FLm}!GL;2qDG=adjAK3blzPbmfgAS z1WnLx7{%@l!?2hr*PSooGI{;NUM5&E{`bnnJ1XR?6VxA>4G-x40C^K%=hxqBX;|fO z_Oh=eKqEL=%8boGR2y#SqB?QS7((XjU_G}WV^tu-p-vAVEC647J8izTdUKbys(XX; ziy<*gQ5!GV&L4)s)euZA3BOp4xR8xHCUlgMo7^0XVw5DhdbCbYP}N?kb1fMHscKNp zDcj>TydqXXi>c?k5HOrwbRy;gqGv$z&%K(Wb{-&#EQw6}DI58XQp z(GcZQPALiKQPR-Dfe6w539(QK2IpH$h!}__&ep%;f*0Vuro+u8b|pS07?QR05jNS* zJ$j;rSjwEbDaGFH)g{JUp30o%Y z^tx!3N0(%?jBHqZYmSmK(eFx2T%}SdhAbx9REanpv$ALMgb|42f_)UIoT9j4*CHsG zYR=yA!Wt2)1Czg|PrKM5Y{}`$kJ^@MOB&goep_%Gz@czR^}MhQ$4q)`&1UnM%!DfV zJj$wKWI9$2R4Is|(He}@aCVN<_I9L{`Yh&S>|^*d8L!>vxBeX+IIN>5Z#aQZiK>iQ{g`tHAJb~c*gLL6V=R)p1bLz z@y|GfvC(s$)t8y^tf1j09n&Q-%DWn=zqO{~6WhrXub6$AF;xA2TP;pPp_|IEH0~FI zq=RrwNtQZ!CGJPWII{8 zl#6#7vL-u!g=clYf@j&42^{$}I9s!e00LeUrf>iD8{zsQE;hT;Lw}Km=kc-~#fo-n zq-%*j>nPB1-OGh=$Wz&{x4xs>f2#VqX3rL|5;bGNwVu(c^bPlbT>!S=cZE0b6!o|s zTx3k>ir4sMnEmihQhNX&PPZpnJN;uSOmW?@4}RfKvbDhMtV*)dd-J2pVclhF=92>5ak&e#?8d)Br z*Ll}HyVc_vi)~k8W%_z9o998HevMPfGiNzT^~`?wzf2Sw#Q$M6tD-h9+fyxbn)IaZmO9-AX5t9kZ_5RjwvxLnT8x z`@(+IT(=zHY-)Yh+I&j*R6Um;x>R&dWKqBNVmRUrF`ezf0k(_u*VxIu0aK-g$*Fzo zkUi~%dLA3Fjp0||fNvjsh5<}q{5_848DRwXhTrbB%-FoqDF}%wV0fMAke0$8J|31= z?p_&YVzw=Ex5go18!nHshGTV5>)Z$6a+tlT^iLY0{!_#$HQmUYkLmEerUE&TRAxC} za^Ee&WB2XA-cue#5IZI@rl-EQpn5`0IbU%WmSeZUnpvR-Pf-6_Ei?Z}de3Pf1{TFH zl!k5kZEZDC@e{gr1Vqd-d=J?KPVA&bj!a%ZwTG&YarYA89=^;jrM*X@A~EQ;VG?qF6KdwUqva8}?FXmJg&o+}e+ zs!%kmDmmoP{5n5J{l(V7+prbo5AU`X?XC+H3TS(FsZtrjFUCDDtFv6e6X27*`3Al} z!0imm)jzmR4y6;c%Exy=Aob}CG*vQX-c-s8v8tDKwJM3nK_9B*taKV-$q+yQ% zNP~TZ>IrjcDm1eMdVr4DG}E~Ta7hhA-e)wb zg5m#CiI7otSWvcmS=F+8VVf4&%om|qT+KSPCF=+L;?d4_i^bY~>jn3iI!6O&z@$R9 zi>O#2rdYbw)VJQ_!OrTaxAp=e@v&LyGtFAU?ZEu;H!zZ58r97=UML=6T=WZ=69VAq zx)1TziyZdoFK~)v4C*|0r@YcQ$D3860_f;g6opxxD>&uc1EyEJpJy4wHys5$z0=Rl zgKMm=)Q}=@cLP*K8-)jKCS9y|cO!;Cr}C-};Otz$K#o&+cHYm-w>tkg;oKc%d>9ev zuhaWGhIQ%do2!LDDsH|h&U6VXnVBJsrmU}6rlWvQ++BCQodZtX61l`ZLlfuhC_$W-59dx{^y+gR$K#Rj2Qqe)QjWw5WQl$C%8Rp|-wElENJQ{K}xC)7GEgT&Z!LWu%*bu%@ygV#5rRzh}s@%8|n z9z_UE=)xzu_{)gawPB17;wRkxUaM=}R@OCLkw&eBxL7eCEybG0^FAKZ2oKt!v%|Ff06lO+B+R zq(c4qzGi~Hp<*uC-$*psYWo*tkyqwEA++&3C;00!l62J8yaa_C7V9B>*hFCit2q!^ zS4Y^U2xfR-!|W4o#yxo;<0k?b8iMGCtghG?-$i6IbSE!A2%>0bD~!)SzJfpOiics4 zHb~;cQv4{<6@rYzHJS^=53MZ_K{4vJqQjUiK*?LC$FNA6PfD=v^q~}{4U64WNO<)P8&j7h2%nt9T)IiuIa?1Ki~eQe*Enne>*?nG0NqsxcZZJpdFli!_dX} zZT5zkq?y|%VH}4Ly9QJio98TW)n3DILp@oi5TJP*!nRVf7N}0rNItGU;1K1i z54;Y)T~{w656zS!DO4tDV7@eU`Ta;qSL^`tD(ZTH?f84-xlRCi5E3m1DkrY>l;|!I zsWh~-FWMeitWbp(7|Riy#h zWAPp7;wVmG?e{4pMOb2g?--7dcW1#s)!t75-OD^;;uYU| z%-ftl+Dzb`nnb$cVUa!i6^C?fwO!dnOrF+DUEjM3Q*ebYND>}u(-%pbeX+QCVLox> z3qy02@Ix03B8Ojnxf}l^W=YpelM|!Pt=QN*FeC|`d8PY0UBOS&uMA)qdz5)vz5dDm zA-M?{CLjqlRba{ZWuh8s4C!ZQ_~gPaz3_ZUHpT`DqTvdDVz${+x}WYnEpj&GMrnq! z8KhCPJ~yG@QvYn{(y$#$wz?~<;%_VxOT?t<8_cP?nRo-K_URE|0874_y%!NNe}(V5 z-tT=Bjo}iZR6EZcLHE)d{*gFau1p#-ANPvkQK@C3!qjQrI)DV^3vqn zMk@VZLk8)lmxFH>C^5UYAyMjdAqS{kKB3sT72}&({Qkgm@-jM>VehH#d)n8)qA*jR z$lgy{^UfeTT7`{GyH_ilBh77nRtxN;zfLdo3~#bRi-J6O>jEFpRl`adzAMEHjIIj8 z9-e60L+Enb(z81b*YLeqj_%JwU4RogWLfR#;hh>zeE8^k2v^Ni{cFCr8mTTf)H}=H zgLEXB2e$Q#QJ0FE*Icp5j|_ylw7f{jXz){+*ppm;f>_eF`C@W}7-pN%h3OD)(r+kW z)WMumo767|UHhY4+|H}}_f)v^s&54zjQ1Xgh&5Y%g12?T&ZQK~WE5?jnY1A;q%ea8 z8*8hV)4ji(&V}E`^JI$k;$+PR7f1NYd!Yr>$mNq*b-zFvlazpMUW)u7x|5lpT#VTO zNRiA}oV6c8!JVT?xvhOxOYJl;=yK8 zW-?~)=>}e7QSog^;T@eqsy&}wL}1a=P5#XAr z3#m(xnWj7w5F44NnY_yu3!h#PsRsIzxpdZCd_|n%txCr(AmCkW3Qy!%rN1bzQv3(=_!GD8;)rY9mUap z{AbzcTRFMdPIF*MgbW#wB!J6KPk3MBbn$m#wEh_6f++g453aS)1zQJaW37 z2*3D>F<0)yY9$?S{f3gj=ZnBx4o;V5EORNfEV7)B(C7paV}T3eCBs*Yiv3icw$4|bK*f26Ft;s5f>*Hn4% zPW*>Ow^Ufz9n2V{ukrkFhVPv90KmwwQ?5RidhkxYBM!YF;_W-?w6h3XE+(!BRc`YM z{qs)xCLuXdYny=(NLI!|E@n;gXi2bvk5PsRe}whpkCGS{RRJ{>6zh7vL6@w_TS&8!KZU;u<}l%p~sg6vwtTs@)9xuUgshYL^%q^QKLNZpZEc{gQ>hUoDh%{)uSp z|D0qPBwPFe&%lhC<$Wt2k_`)0H6&^Pw}`{g6AH^O3g~~QU%yp$;#@=-czB;~XG&XYMk{0L&1iXdWb%73RVXd+{*3%@ImlK%hNm$l9bG!I?`oxCb($2vG|`4)rM9i=y#6 z?aw_NzehvZe?M%^`#liUm<`uEJLudY9V1+K$KZOXf4~o$S^KPCMz$Gf#wQaPLmN(7 zXTOu>#Vy&uXo!uXp^hYzrUau2uma?%VPg9avhJgl78zM>NXD5wMKdAAN)sBQBhOMG zdlWQ53hSV=*$W_3;OLG}(UgRy8yDJ#>6oI)#zx5Enz-$!Si#HOFFg;F3p(xibryC^ zw6$v&%xwicy@h`6HflfV>Th^P6`JgEAgK;nwO?>k;TAawj;C+WwUq@$Ey-L<7h$~W z5F-1V-}v)u%ToWVarl35MN&6sfYWCk{ZPF}eNYjZRU)qDWa+ql zjb-vI+c8++Df>2WblY|o%psva~c>6P%k zrTdqD79{`l1fx3R$0b(9kA8#-LeR|Tt`5msjL$)vHmA-q(S{?rqR9HwJ0F+zMlNiJ zr)f)7K9qWUFDxFVXVq>KV3SQUq2XR_O~)5r#%3eH zU5S+3O*v;*srk8B=GrVUcxbH!xc~8`bXY=ZYdI!OHi4N5NnllUsb{UtswE#vo$d30 zeon`HHq$L;T|+Z^nrUsFPU!j4x$mZzGc@(f@Kk7>%FvxZ$(c}!y9Mw4qAqieqYiQm z7RC>M4Qwsx{nLm?WQi6Cfh`+R%YR#Z2rk%dz#7P8nc&BkHS+*ccDvthI+ndnUW>D$ z));~t{0#Jj>i;xet3-~@2>(An|3`oS$?d^1Fe^D*=SDO92|?mV>goaZU|zFvLOZ~# zvAH3#y547i1!Sb|HgX?@SFXxp6E0Ep(xE2b(!NqzA!xxBN6%`-4sLt*uGQDEM;CV4 z^{DV}*Rc!rSy{&5A*7)V-5{|1f$g86zuWhiyZS3?$=NDk;^J+YR%y|9v-g%1reCa< zVb`ZYpXfRhv&Zf+S*U?mPxa?=teefrL0>Pz$&U&QaH((z#Bt(2Ky_)z4jDC-hg~Al zE~JIADDg5TT{P(3|8PyLuc;xfyAVVOFFG9>DMb&DA)@qQ9 zH+g~}AnzQHup;q+pcg+fSm8|3bqS->9GHK0VD!?ZWfQi$_>q3{G_FLKv|P5>_CmY( z{H$2UsN?wxk$J}A^pT1bG{a*qcxVnxN;k*=Bpi{XP=8d7AF&ZGxgtZ72svWV#xzF5 z3rdFTH**8C53kP$;jib&U?V%Z) zupTd$Y>r(yE<9w$IOh0UeLwiSpGe^n*I|ObWQFFXw#t2I{jyf{>;WxU9(BymBur9pNt!1>sP zsD85S+SePH!3KQVeD>b`rZNN-i9|1nU9REK0&e)by&kXs}ojV1qlTwmI=|LiA#qyTD=m;52mr>)G;Og7N3o9+xCUc=Z&J3 z)4fmSv$JhR^HmR>v9V?^(mk(R&ykePBu|r0TkgCd-j<8%4kuor+>W{x&0T(J{@m$E z^YvZalk`fmziuV6`p3hud3~sW~Q#XlvMlhkj)>H4P;i94iebRmZ!q7(RM%` zy)T+fsGx%ctYggy=QV0hh#jB_>td!*u@l+$?)zNJkE&tho!qO+X_%fNoULedF2A_n zq9Hb=OyIiI#(>exF=;P0CL7xzG!vJ@a!9?U@ea~0>?zDlh-1g8xoTW7%i@uki>Kgd zeJ^B{n$qsdOuSh&$bc(qnITMuwE0L=HyA}}>Z)jnWL90U3!^+0{2*~}zM@8PVjWV& z?{466W_KkFxRfz>kDZ9)15|{komp94|Mh&=fVrfuqH8m$c_UPepZ?Z0nMwY`+B9rl zTLyhf`=7z6V(oix7DIE${yp|XhAy^UfsW)@;WpY5x>zYLOJ{|ttsyE1=QxNTjGAE( z68vHCSVjbUwwJhQGiRDJ2x5~@fiG;IgPFaEm4BhH+ah5l89bKD9^|oB^A*>t)V2jo(W3Xfa4`9yx(D zQfX|k#Ge7^)|{Kz=JR??bK25?nf!`X-TTVW%ZB=2v_EC2R$OQ=$Vd#FNnzLJ0s*1N z%H&1lslO_P>&{7f1KS!&PCH6^*} z=10d3KN28^2$uT!a=ftp*pj0HZMu(hxxgT35$UX$hw3@R;JSOv>h6CEV~1^#z6(-e zwa}D$Z?h8iDj@xrKgWl~WgUt}uT%%*rVtRTx(wq1IC)$354mmm%5q(DD%$kcH29l+ zjEBd+AoB+r!v6g~b_MdwbF8*LMZEMS&#`6#$Tfa!voF^_mTagO-Fwvh$G6V@NZx|Z zq;Belv*$$iAAI-`2_nxW|AQ%!+r}j+k(&_8zB`0xe*P~&`>&*^B`rGgf?p}9M>}#` z!X;+v32Hxcx#c&0G=hi=qiwONgMBKM9XV{3K7UH(vA7wNPdbf^VOzrtW+OIwoE`mp1E%6Zy4vFv8-*Hd*9Qfi#h!jB(etT z5K{nZt@&^bjPnReV?<7{TrOreDGHBdMKonwo$d#qG(YpG+XQ98k?Q%bTJp7U1}(<2 z?rXRY-QI=oRNnqgG1j$*y$wimo_KB-RwX5;Q9N}1HVUrgox{Q#zgIX+)zJ-1awX0l5pC}3l`W; zWfFSst5di6OrBVw8Rc}o@?u9*`9fiT1*OSrZKkUU*ywcNEk_2xehIeZ;I>>kCkxQr&El;^kJ>V0~*soWnbcCH|K!DPH{^#!rHxk)^VhPA#Ha+)B-P4t{NV zPtSLC=5Oowy4rvK!D3Z=n!2@{uMvJ||ItteJ*Aj(rkWQk{=Qk7OXo{+D7WHJbi~(8 zLBxCvi0%2EXOh! zf<4NAnIW3qbd@aPS<_W#HsCK=u$` z(W=pJb74Icq7(hgOYpidR?NYR1Ciz;AowVCf?>9fNs}7#ASm$bvwRA3QFaf7*Y45t zl{k*^YdwCiqJqPu^Z63N%VV&FKNB6o4d=_r>oFiT+3ot`+f zYrtH{*VF4ZXfxjU_AvSfOS9zv!`@p(#Tj({o(b**cS&$}X|#a=Awc7&ZHO+DKsZn*;B9}MOWOGG6+Zybd*&N7IA3|6f(Bl zm+AE_z?5FUFuUfytk!O|P>?eApB~W5(ew++K7P4AwS2s~MyEP*QJdU^85i#TlYt$xl{Z6aq_cg=$VL5~I5wUeca$O%0EMY%t?D>zvheW#Hjtd^LF+?iZucB5{5hXl{&b(@>du{( zaUWCb*nlyU<8!^w*uT2{t1S2~n=Vox*IQcDHEXnN>OdQ^GC=LcK0d@i*w3M5P8_~F zCQv*yh0)&%SzKY*HtX+1`bz1U#=(lQ34xr{SUhPQxWi6dz>x-zNTg$jyk)nqe%ysj zt*u(bPAiWS5nq4K*cDF|kWx}u?)Vjn{4^Q$La1cEwecG3MZl4du`|1yTQT&|bREMw zUR28;;YgvlW*VFS`N{s7X8?Tg=}0HVqhar!9iDyu+0LJ@waaKOW{Cc$h%t;u=+GHpE9Do6?xYl+vx;7K>~mq?CgCtYNH~%#=vC+< z#PKv%4(hLcv{agLpH{&L-b!lE`OAlYecU}zRHW*2&|)9w&9fldncK*;Ft%Q0T#U{g z6a^~I-bOZ{IgZHG%a1Koz=?uDRP~yciH;aTqGiC0k%u0&FDRNJx9gHE?K{T%$JUyS z1+!9YC21BMLMIk6=P;0lZCZr^nX(ApzD<5@L6KbaPsem8p=X^sSFgRkna>h+u~JO~ z;8pFKxoYTw?)}+IXn##{sX8((2ic-0fDWmNOmOLAzX!l{hv>vSh1MMS?c%fwd8fHsonOY>l^8!veq3 z(qiI-1FZ3%oyRS@Or9p2=mxf9Er;C7UF=U2c3%9RJS?md^R4o?C0llWtq(Vq*m;nd zr$)68G3^(s{YrBIOq7hc`@37N1pgXA{A+`zEh0`aVNAOE`B|x)EV?zJs#hg7BUq7hY)`RCUwAxpETVW^D{~>%Z`A}Y@iyh0h~(Tcy9{^ZapY4V2dRyF?A5lP=pH-c z9o`-5cdvc)7Cp#Il#EwntZ`Il%+ph-!Q1Ep#1|YBcH90SxB|8Z2Hq@pSwl5faBy(` zBsKnig#V4mww)dQSgwixZ)ug~e&*f^zjQc?39Gf(f)s`A@O%H7PY#Q2Sc^(BRR6l; zt{Q&WO_hE1CFlM?o8?iCn-4flG&n#0naC7N*yps6gknNY)pybpu%_=h6C!K4kytYO zqTI=he)^%Ukh}iyiK*A$I|g29oo_0AhZD-+L?(>i(AqRlJCJi99vMhZWT0?81A9Ch zUHI+BG)(!^_Zk0u#pZoPK}I6o^l?ty1JV9`nL;R4NGW+)k2gR_L$J^nr(9vxS}%C! zNjV$R*Q7CUyoQp#Ef#&VMqAGG6&JldyNqorU$4lJ?wVw%ku)u*8o?N8_4rD8oQ-)6 zoslw1f1s9^G-DVHQ@=x&7<-`1QN9?N?pVaGbOOS;9oTg%b##hC$+*IJ}jn zgjTKxdzIb@T*FWtKxN%?DFo4xgZyCD^`}`;_9jfSgVNwG{iw1&1o$G#&S0T&rt30! z6NEQF>hu#Isb7#47QJi+X z>16)>e&qGqtBEPkp=z_RD>Aa_df;1H4`8_1bb2?oP@u>10A~xOM4*`Aravz^BI@&A zk*G@F_!pLS4eQc0u%NaPS9V5)r|_eq?v>TGv9}VL3aa-6?7}VqtC2B;C1{WBvRYA( zheg1D-$ZQLl^cE0AO1WuS|Q7Yc5+91T#qS21_cDg63`sPSJ!KLUTD@R=-exMAAU<89*Y*Lwz#%j88W8Z-WVgLmBi9D z@dARKW@5_xy@BbKsl-(hHLM+-e?t%e{2M-p`nkU@+dsuPJgk-6S8D~N>WAx7IdHe+ zK224kyZ2vn7-$h2|9$Ho8Ejn7>9g-epdl8B1n}`JL|3p8AJ2psW-xLZYuCPOUX>K( zGaMc7^ZpYg=wLn=hvptelDD1G?C$o-xXGHAF6DA9qg38QOLZxTThKg6b;cA=*WV6$ zxZDlj6TR5TTiXjk3-{UgpS!J}gyk;b(WT^KMHwyqy1+Wl9NAq8KQ54^b&f$ zpo_2E0{Z1*W=1|}R5RA-QzTjT3i+F>u(R3Bj~46E?~=c3b0ZG$MNEHYl! zke%2Hh0Y)U#d=X^K$KV?K~Z=0bxr?6lOk>J`wAKh37#p#p5Ka|%NJo? zg9J7EJ4%W6Pv*ln-lYzB11e=~PCf`zvs{P6J=!>uh8{bxa|e@X+tjA|TzPJMox=VT ztGBu2b*@%TDNOZeaEGg4wc2rW=02UKCQ)8jAS5y7X-&SZly;^6P$0+G!}5SE5;-wo z{bBqt^(40qs4Hmq6l*T*f)Rq9(RY7Ecy z=+<%8;N?b9%=yuVvuF;Dq`b5yR?7%SuS6?q)Z?mIZ(5w~-irGcP;W)R)x+S%rH=kq z3tQ{vmeb+F76Db8Pe9x4s>P?fBT8kAEl;xT!7B=XgD8~$=-BC_A?s@zid zI6jKq+Uqs+a_@g#%)phc6MF|UaK!ibPx#*BS?+ZjfygESlC3$S_D%lL9@VVfL4{9~ zDY{F$)YS+fRM_D6Ss#mCt`8BlwIIGbi?eS!)%kYZ13ai&gclSqEByEsh&cSL8of*w z@|St~tCibB(e3j*M{!>jMbFeR_r~h%0@YRmJtiB1T~5~(an$v$L@fy(@z++~fwr&l z8PnN}9&zCGJ-*0+=zAUmL_+Mum|>g&Ut>sQf7Ki*U%Qo+Q$8f#>rAbftj>8m!bL_&>bU5@FKUoS^n0i41h1O(!IKZ%1m>n`TJ%cH(K$TC?Q~DW ze2r;$%u)+-Fz`yex!-5V&Qtthu9+73e`i{0>JYt?Xxd8Yj!L-Js61 z|ESu1Vb#xb$3ssfkDTI&e6CCKEU(9GUIS9^xC5|2|LeeIg0_V_(^`y-e5?k+!Dx@Y z@XArqdF&=qYj>3h_!u^Y&^AjB@R7k7yVc(V!&I4UGFdVd`PLnjD}u1ytdmTSIcFl{ zWxC808TG0hOY5E73DXqoBN2O#uj?qfdpkHc-b_4lT+rS-VpKy7>Sn7Iy}^>G4tkVd zM4%X<0ZpHuF>AwaSOZ|gRAOAP=p8?bBs>8-IPk`UM6H+**>ZPLta*P5c=LSJPO6U> zim#}0T=q`=!;nHPO_$6E&4zIQi6!bWR)`2%+_JbP@+P4;;`bwK2X%awbx9$ADU3>y zXzt!=Q}ng+jiMbcn(>7mq-D7m*4H2ph!#E0Jp9VW8exylhaa4uv+S;iyzh9P3E6Z6 zo!Z(bhR!DJ4o?-6*URx){BtxVSB^!A7og!d4^}~ipFT((f2L<$j)K5PCsVc8mpS01 z26E#&d$)ziskshNn8%#&9Y;C!x+mUpuOm%}P6t1inr|sl+6|VIU1SG_`!LNcDNu)& zBa+W_|2X;DWe7PoP-bP0P77~A2-t`t=QmWzwO?nQFY2AmBi`}ROZDEC!cu}iy|d~S zctgFH%5u-Te8Kal59@E~eXmf-QQ9P9L=6}#_F>+;K2=Sq7TLqp&f)tkS^dm4Rg z&{M(jM|qDNsMn76b(5%6cSL3$n2fIUYX2uA5d9AuP>vW5!(>uYkyTR&Yu>i{ z?>hgBEcky*GW7qi?Y~)#|6_U-SJFRHl3^|gst54TuiwPLj)<;}){j^DO^@E%coBau zsEQpu#ah;JlYA0g#Lo7k&9i{@TEAUo^FkKc^_1R!=4h=eyo`*rB-QZ`PY-F@gc82r(eWzx&~Gkc{>RMo|Ead&qwVwAU&lbg-8DD9>C*ss zGU1Et#J;^CXB&n2iP$}hMZ*MCzl1n@-YcKnhF5*g%yTaiD$G*W`L6j*W3g4-lu*2d zu3HIj`Bj<6Q;boYM%Gl|PTtE_RvaKo*K>YBk%Tc;SJrr;m;~4J4GnIlSsAqr====x zkW_w*!RY9~kMxQ@(TOvutI`CvC=VpS4l?nO+Cd{nCFv}%=lBJ)1h(fdv{~M4LF$%n ze56+|>?l6|_GKA}u@@*xhm%UkR=2nOO%Jj5*ovtN8a?^S+*ncrBdHFgKt6@nUaSqD zLS~#w$eYr>7b)%SPpljxQA&jT$SoW1LP|nxSAb03HSLrh3Ys7Hm{^pz4F`FPnI4FE zBX$*s$-x6m!P7e?`_^E;or5uW;ugj?sztaI;}-A~MrM@0tG};!x%rOExE%sGJ2z)* zx{Rq6KBFtbp0O1Utk@9Z+gW$PF_@*I2WH;g7-V?G8T__Fl46}V|13hxxc_~9k9Tna z3WNL`^E8+d93i9%pj%0WVC>UA1>gnQ3!P7lFWQoC19npUiC;DFM0wIRptd9{=mCmo za{}O9do(US{a9_`2^EDg@A0f8FDxVI)N~`^eYd1#ugr)qDT{4YU?e1jP~v~$Jb4!v z5J6cj=cOo7D5bK;Xl57qUpaS-JH6>D5Q|> zRH4*UtLN5&|7imb?S4_wv${ITd}=_&9cImVbFK*xV2aPXW`F9U9M-S7^}ol=DGOW~tv zNS1@PY{2&vTnR_O?}ggjs`mN^l!LKeb$68SpJV^SWh3eA(o+r?N-tuQkJT(O#h$zH zqwj=TW-TGsDq4v(!Jqlnh@nmpXRiJz+hmWf0dt`t?zSP;PLg4OrFJgpeK8mD5&X)h z&IU0uB^7sPJA*2DuS3Env1%K=QM7e;+0zh@GF)i(ei#azVI23wRFYi7&e06)zz>>v z^H))z1!!!L62c$V;P0aXU(}ndt{gJq-}K#J(gW~-?zZm$!Qg(MATHoSkshBIgZv8s zuYVvO#HzfTMt_I9Nap0L5MZc-13=>D}2ghe)FH9AqoG6@D1dviCLn5vC-$``?qto8mNf?kB-udH`C*zy2 ze3|IVuf%{()Y2X5p{xgr!CJ}@_~2~p2`k~4!iQj5I7 z)4gPwyuPRr$q+4oDH+gN7qwa6rUl*PXl5i6-pk>hcn4Pmz8V#1I(Qj}fEs|$&*DTO z&T)c`B8+8bLa#eMO|eNqG>*r4sEjh>EsDNV3PM(roZ zV+&@4q7{wFa5LEPQ6ELPlbdtAh(KGlbGyK&+Qd&COG(n=rfqi}IS=JwN;brA9j=UH< zN$|1TpZ2BmSJRfDdc~r}VW4*aGfsenbiNU~uhR}Y^N}ld;`ZGKi>fw)$dLZ;st_^I zIBv8Ajf&7{d7>lO%+~3i`CKdhwLzza*+zA~@7QO(24PJhf->3VPCi$L%f);46 zo zfTFHUP(^lr;Ey%|$D{FIdp5D7hZtgS{EMN9AH~QDepH4ejf6Xd`tVK68am=j%VqcI zkNz%d`u@2=t4sj7D?b;-9bFgLn=C>Xj3A6b<%@M0Uy@lqfb_5q+Wv{@@G2^KNT1m9 zIucl!8(*OF%RPb#_{8-~-;s+B4}#8$lc;k>iP))9>XUzZpyW3-r2MMV_Q8jF=V1c5 zeJA;)-5CcgA>VOL-j^zCDGg9`qO#DPUx)6A)C>N&w&NE6x`X;XYT0vmGUhNd+ ze&GjNZ++OgHb5yF8RhC4wHQTI8eAK&1d&8Yf5+LBVjVr$$9*UFD zLJtJK#2rHVpI|Z*{e>wZR4G$VcA!8SuJkdsDi(`$1BJ8^6xC3uEnI_-yIcySg|DaN zcvwxAa~Ds61hrCkfFN+Z7>X6}9;zJ>_mKWTG-dU%g!_Arv zx`huSnBkHp!^9HQboB z!DNzo%U#PkpX05W$=)5cFaunUl#hJOWEJ)poONxq$(zjz8wo6BO}}VNZ)N&4B&b#I zmE|QL*TKL;6}6_Jo>En1g89$zOS!JXepW-=+QRzHezuq#7hePmo^x~V@Yty5+>QD?SWz5-RRdWPzs zCUmHeTf&!i%E4djG3nUC_k#rUF&+P( z8qkAK-?_L3RKlvb^PMte&5Dt(5{68}P^DTkXL~759;4a0)p04VAHIkPig3)0mb*w= zE57%(w&o{Aa8+}7QOLDjqVO-ULiL8d^R~-AS5Av)3Q)K)^V@CROr^W4q&c8Y3)0CkPNB)TKN$djCGG& z9{g%p!Vdo@Q*9B$(V-0Z9>pT}M5*xuLYFEu$EjQ~a~Z;AdHeo4ghq_QfYsGA0cJkX z2YpLTQ83e>WRU{=LX9_74_1M`V$&&B;s8pm!TT56?TAWvi`o;$e>JFP5~pz;7Ep*S zxaS>GDGsksnq0vr`*iA2h|SxLS~)dAlWoBB>M%M)O#k2?*T`_Fa&BhlUhbUDXw(MV z1qM`qRf9>fY+NfhM=IO7#F1HbTBqnvWsU3`;2z@)?q$Bjzy%0Z|D;cVk>UIc)p*P^ z+ue8MRJ%v3FMTeqEkRJxEYWG1zy8{%Zs^TPs&gVY!Y&H1#1XSAP%l3H+;#J}lg=KS z8SS4$r9dX|xAQ5)8Q96F-R5s3Sb1DqH_9e$LT?KusxcA%```kWm-IIDnKU_QMFkZ1dq@ubm41HBZU0q_k!>lI$XLa z)M3|yTCAqUd}kkuZv_h-YI!s|sm#Z2=x-qDdMSCcLG)9%Y=$igV(Zv89b9}R<|nVH z>)n7#X6QJ9;GKeg4>sjCQ^l%IjB7ccZx_0>A^k$o8#>}~Td9TeUa9J_%Kcv5{mvy7 z>hhv#xeCh`xh=#t4vfykUdVTy^@b(N@Z)*<#w$t4ALML}nwQOx>J^op!^ zk!q2Vaguh4Cz^1=kRUvI0{Gv!Cb*>UrTw8f;Xh&O?*C}rwl}4OvTS~xoS;=J(Ua|a zg%RxZ%6uPt{7s_L8R#F$|Cwy@dw5BD?{w^@{yoJX^csJ;o+8X1{zlyFb!44@iw@vC z4lA7dU%NnQvJYMA94O41k;B@=^I~Tl1~=+F*kJbU`anycg-0scH*vosr1rc)FP12U7dB*=M1*m+E@WaE>Q}8Fms+V(3xOd%o%>?nh`^XfU+VdjWYOx8NLK^NwcvL*<2 z#3x|{L9to#Lhs4U-kb{t)l_e(Aka`H%S_wm_h?ubyw3t3zSp)PjiLHl|8mHrMlp$k zq){*h36v{Uq7+u0!gI>=WC(+AKR84I4Wfk+^@PFrwv9M%G(&>$Da*qK{=)QI-vB?| zKR6KI6irl(Ld696<0S=N^NLvi=Bu2$yZ zO*%XQN)xd@6Ick??sm@pWRZ|`PCrVu^zf32`qX!7Y?eC|bNMBtMc-c>!j-nuT+9z^ z=KnD>E%6l6CIy)u&Kl{7LU>$K5za4ej2;_#6JHeJY#j6vBd(XGxEmU)=B)3~Ci5~&6^r1Xp)I$6# z$o`)Dq^N|G2Zl33`5A3JaQfgj*u`G75MNdqi>wjaTRQI}ep>#xE0g2kGkSdQ=0r#J zMUYNHOJTom9JUTQ1r#vTHGhMm+fSS0ooW1nGx-{`UCR51w+!YOCl8DJ#&{YOb^0B# z&dOTFflS(pO%&Gh(`-fsay_Spq04Z=$EjxgVF1^N6Ffn3`3d%v9G+b4tQcZhe*IrH zV1~y7LT!al5YE2?>vu90V2-({*sPy)M5FYWK)$u-!hDU-6n!!&rykHcWAIyKL*MAK zBG9JC_JaK24CF*As?azupRpb))kUSpy;yBghPwTj*a0K+voKIyMdyLho?dcBE#o$x zc!MvKV)17V(Cd0M$Em`ib#F48CAJ}>myj8x`5b5w;1JE?@K(l zU$*q`T(Npqfdt%wE+XibrUU;wP7bjc+Hh6i#NvS!+uC5^_Vg)v1VnA-3+o(1SfBL7 z4xHy~DRoQMl9Yi7s3Fhk&oS?+LDzgPkT0MlfPB>*fm=M+@Ma>$|(Iwn`f4(!InOoofO$tX%GXzze3oO%-a*ZpeLPJ!*7%%2uz4mhk_1 zt&n3ry)co2Co_p*R)c*kO+~Sd{PZc;RBSiynFti1@zILO$w5Pw7cqWr`>SIhH&o8| zrz)jP(J3VUynH4UG&^|o(+%dJm<@TQYkCs)JDCRM^m2|F*+R7@Kvz~u;PsA4J(wqM z;@7OR@qvbfvx{08aE?AYmloPASJxvH#RgWMI-L23ry_%WyKWteKp4ju_0w2V^#2^9 z6f->q=PwzdTWD=*=L$1^eYujM*TVOH9R@}Jx0rGYIY#!j*nf#ic|h5WoWZY0TL0Ws z|2(Gf44FC)|91c7f7x9wDEo8&i=TRu2)dFUqp778{7=vJ|5A`q)?bw~iOyO3KGh~( z?E>bj?CS8@oZ~RJ_tB2wCCtxr>VNGK^PgqtdM=6$*aZw+%krOA&mwgc&a!Sc+tYM> zm<%%VRMD#dT3J-`oJWv#EhNlOV-8Pdw#nW9>5bk9o?Ut-yvj9vCz=b^+d9|?F^Y?n za+IuZzMm{e$#C|?uKH#4%FBY zxPg+;aI8XC+ClX28ggS!Bb0JM-Tf5&fPcw9taeKuX(RmEt;Zt0NF>q&6)lh1pnnS$ z#f+lnLEWe&!dZj(cK7jQq(sRp%RcQhdpviKg(L!<3sOf$5?hJcWoZp(eY9qH z)M@rose7`<1k)?{Sgmvv);*Hd$BS2F(?z!aOVB*f0yAj#R;)X8iuayc^NK;)XzJ}=YZ#4<{fu{=jo!OWZ z3&)w*e>a)bq6`=tbc07tlv1vBS614ilBT@g^tBAt|wz##EX`x<=(Z!mS?D7NOP&S1usj!wU1D3QY|)tOUk}(m{H@X zS^-d$IPkD=&Nys4a3aa4A8+*|ziG$xpNU?)gNS6}b(z2_j&Eb@?^|ZP$bLFr3Z%6P z1lDaDZ-MrT&&j)q{a5_Ld=NqCyEV{DXcz$49#zk@`2_)T5l51}!lUuGxo$mQ{w(c0 z2|olwuQ~{MU~_Voxu==;eD{iBr95Gx7xV^m^uZP3Wy|kGbK0)g37R$K@>=#~E1!XG zM1;NPPuX zPR?RKVw}vA3Pu}g;;z_0Pk}ws!M+JX?3wReYYCG>6dc!v96?5YG*OxDLVdcBTsbAzx4ia0R6WlYh z2qIYxU)~M`4SnL6EHsteHgjHjTdpV~CNFA9fV6D>h#YPp`Zd;Y&2YV!a{Nbag!mOe zsW#E7TRyj-#>CK;pz9V+HsNyGiS4AL#q_;K zxo4*PFlTUmgmwXGX*ns%Ca0!9NEsN>ooJ6omrY!~3dkzh&n=sP91xh*mmKnCg0`L9 z(qo^=Qh=+GI>E8}=-a6Fh&h5Uf1Cg0k281d8pgz_oZ*}AksYu7w3lQ&6Iw^vF zWgE>%E+vq*4sx^Y14X@Z`dOaMtNHTS<`obHb??evmzBT_f~H4535Pz0W-cN%dKK=Slv2Y zbFX^8h7xk!V=N!tCSmI0i#FWy_c`wP4~(2S2Hkb*r+FC%e(UHB$!@^>n<9Qv*4SDV zuvA&1bmu(3n7q4@OIA`=A3UjR4zEMiJ@%OASHdxXppj+BiF9>3acxhs56HL*4?RdA zI>Vs>;e#up<=jCjl9j#FjYy1nYPw-cJ%SWrz~mv}sLPyKT_0=Sd-Xs^nMUP?4az+> zs%NAXeW8S@qF#f~ORkh>vcpa8?Oy<=(q_?Bd~F3Fo-O*TB*@ui?DYID?J^jdlA(<(TDYdBM`ph3C(VxIQN zpt+DsRX2+lfh;nPs}luWKFiT>gK4%#J=XCh2c-TuL8uUoygu@GMuzXFJK4e;(l!q1 z`l>^MC)gq8%*}kCKwuv;(e%aIg*5F$WPc&KWe63P%a(!tAD%Q{t>Xqq7rOb>P?bk? z6+OShPZosc%N}wB{$z!*=@Ejza->ds=Vy(TwhD~f5ocoL`0~r{EmB+xDw|7$AF;qX zvb!6}K=l;hl4?H}QU=9Hu4B}$>IFqTPmIf*alUj9yl$!ROyf(lA!=kuY%;~WbLDoj zu!z0$!)D|SxeIx-Pmgf!o(+q%t678nwvFI0zxy}yh_~*ZWsk9q zv2lGK)C^;a|GQ>ez!WVI7}+R&flBurGZ~fTuxb<&TSG#3cIJ)}Z97W>4HwDQ*{fP| zTJ{Bu9;0YQ*ir~YW%S$!%Sk9LbgyJuaVu}ye0BcIqBT((6lK0g$MF)6C93x_>=StQ zjsC^%JM%N+CLps8EJCDH(9<<5PcIa9%8H{uRsm|@78J1N{^pZ1?0EG(vj{D$GB$Rb zjLa4b@Y6r_+?2*>Y5o z6hK^}BlUnjC#IiXd!~WdeEP8>H93Q*&OrR;O{v~%;Olnr7>sik>;;{`e0ymN-iWQ) z6Z+V+lh+=IG$>x&0&&pSaq@Bo)2D0GqFCdDl`vnTt48(7x9fTOQb@BHq-vdGtX}-+ z6^p4Xw(XNE?CaOID9Uz<9ad>Z(lTps=ycS#+?t~kV=Y(w6sJ_oF%0>GY&x5cEsKsg zWVX@k2RTiuqQmVn{{bSxFlwK%izx)wd8DYw-Kj>D_oq>mS%@0Se-sr&r>CO|bq7qX zZE~s^iV_&Cb0adPS>&x_4nt z@kMm9O|68cFFh{w8h)8=xX3|6>aooiU2Z&;o@O-Rf?8I&TeJnt)rCsod#U*zws}Jd znPMu&f3yKP&AeqxF_SC&-QA^>ZR@I(n%j`NpV5vA-nLKLhhix}4d|@*1y`=Kqo< zmeobf?9KLZxQsVz+S`?*;(fWCvJ957Vc;zdrB{}!#+u;KGj6ulr;bR{+3k}D_1BS?t#Bd3q` z70%uQ-3yA7Sqrv)^U~L#ac;P`Pf<-9lK3s{>{Aan8xyXL~&=If2 z4-&2$saB&_ZFaV?XprXI5ZI^b!mvF4J6@`0h2A$ZTn#r^dTFBVONrwNht&A%1`I4dtAuf`D{ zr>E;k*)LWT^VWLMfb6;`P}PYiajk!^4aJ(3jOZ6Z z#;pC1!9U2l;&aK}Ai-0ZKVIr{g25s7qSrWL~Uhs@4Duq7 z|0)|XL8-rguU7d^dY0cdTY*-^w-oH$LV*=;6hoPQCq?UqJV z_&BkHwD}9(a)V0G?XK&*>_SmZsZsEDC_kibKF=mH$aQ|;y9>U00 zK*~-H6Ty~%55cI^y>A3^HO$U}8z~2N12Nf?BdH*22m!5@rJyITSNl!FMbZK)D|lL5 z_JxGt#(X)wd86f5ARfza@`zJVmNVu=*&QKK##i}7im#1N z|0m2?2_vKIu~TZ;1eyHz{12+yrQe9D8D7$8`dYj{E-=&oyQ%WOaI1fQEWKQ)`-&C2 z8HaBM4?~*{e_iYUR%dd61{V9~3LzJZtc+F~HU;6u+hhM*SoQyZ_&*M!{Qqo7coPzu z{#y&+|MAvN$uME8E5G?npp%UO&n-H_4mPo;%^rOe?pR#J^9Q_&q5B}{4&I@mhb#IA zB4u?vQYq3zx2F>IOvX2*dq#Xdg3Y1}8|E|JZqe>ZP2HkEK_$OF9s1r5ty@&wVoOx5 zc&90`WPu&V-mA}%{s78E{BvwktgWXex~mcB55&Sl)Bg_Km=sG;oGRviwyddVe#MK% zamLE!yiqZK&`@k0zq9?Y#0*y-YG$OOS9^+C)8m@yoF6#2<8=cIFuz0<+P$M6lu;`1 z#3#j+4G_3?BOeWRp?-tjT_%aoW{j(GbVz_pIeo3zZQV_I|K21fP^mEgux|LggLFUH z5!YIJ>(LKVvunr}DmUO)>rN;VaQ}MT_xxedG!?6;b#0=q5@&ml`h4}9BXlVZZUEME zS|krU6C8Zn^P~?t^lF2*4L&x8k6v5o%DXV3)wgtJwLvoIuCo{Il^-ZM^7+clsi9xh zhw(rD{T#T8A$KsNsbUdT-?5>x(-IwswI(w)ifN~ET44z$J_w8GXXDH>2a9X$q9;&2 zRFhO3Ld@f?{0qTRSO>w7#@I<%gY{l z+WG^*Z{#cYK|j>)1~KR|z3EzT>H~5%1(IMKC9H3u_jX&bDA6%@#0xoNRpAu2X87Mu ze7~SN$QH*~CSfQ23QhQRA}7aZ>a72g3d3fm$68bP(;b2xT_V zp8_k$76R|EZC6t3p&+>lfjwyy{wQeLHK&6M6oL}|nbaynkVS>O5avm`q+PKU@0*@lcJtU7E&zyOPZ{v|g? z;(w9t?SZMM6OY+7Hq0a+>W>sRgo~$0W)JI4P-_&&rR%BkoM*L;6T+sMVXI*`-fN

8kGd2m|*))sga$-nNu^mydO18==u){TM&c&hMQ@;;J`vW zFJ$HKSI>k9r6e>`r7lzv0KpP9yzQQ5`cz&Hu~%;@%;NhhPCk*8%;oRMMq+kHn6%F< zk?|%mYSq$VUKpy06U!^B^s2Dza%6EegUWQ{mZpEp-Hp}E^@V0&jR)%hL7@;%o)hCu z>*!c8Q|~FP8E>%1;)n9!)D4a37Z`^Sg|vr;64|L_uwMp#xaaHjQH_$CH~|_+&Z!z z%k1pvq6%=FN&a0!0Kw^%{y`LIfAiZ1K4}j)u(@G2FSD8kVs`dKpF(z-!pFJNBr>}H zOj78-(}4=vw26{2{~GD_jLoWTvTB;1A5QL6je<`M*$?9uyI=IkZw$NyQ6g zswCoy-ITq5KMTx20sowxTu1W+*3V&O8C&I_z$ZVLCKWp>7UV_VGE8zYALVZIl5i>P ze`CCCF4i!oD%jHu{caA!xb0;xg60qA1T^(*Zlj&EtGOE>I76CBdE_0-RF3!o@CIF; z_5{D)lSzOZYwfmq2N= zJD9)4yM}4%nj^ZJ!%mBAX>=R(1byH;Ydi)y(8DTBchib8eNIh}tz1(MF`s)vm)HK>{dlExE>L8N|vN)7j0&b-ME z9d+84YlUbuiXVr}y167LDgq zHHF3Omj=E$juE2o$G5MBWUD?y^$OKg!xs9HOTc@QR@9?j0)z-n3#T0I6A&XO(qktg zI}S7Q%cfAM+;|sXCDUNj1L0%W_yxlk9Ka@cW1AMS!c{1sGBT^2^R$Huh@pV38s)gb zLWs%Kos(J3R2KQ5oI)ydm}>KrAV%-YhSg-@{wU?X%KrEIzJ;&^(|x=?Pb=v5PJ;L4 zw?185K9o)gg#k0pZd#~Xg_AN-hpHgvVFh~C15ksz%GgPF3Ry6MmP@Er3)WLC5nDjW@I=Z8~xc@dr(VEK;o7l>3W}6CLv-Uqu>mSm7i3O;V*=orEHS7B)Hf z2_D6pQg6q=(IXtQK?BOW`u;+@DotuW|E4pIBIOV-G)4?`v-^+T`_IlbXHr~PBC&t{ zXG;Ia^8IJu2K@uX?8_kkUFzR=Y2{Fx7nHb2@*fNjT6XdfrBU?UDXhg7b}d=2psoRG z$^0v*o>_y-@Bga;&YQefeG}?TEV&>%*!ph+8Tj5Cv`yBCmLBZgSFbemH!ueZ`syX)PDOGcPQRMkm3$S8#K6EafjmW?oM%c zcXtBCr4)B5?h>px#o6hz|NA*-e}!|Mmy5MnBw1w6d(8Vc#w@k7?S!{h7xo=(L48~* z^D|H-ycv2ZuWY#KLn>TWl&75jqE2PxCJlg7E%aY-DmM?_37SX^|0uH7$Da7!z1e1H6i~NfeY?8 zn(^s^V7)9J@I7GWzemIA95S$>EuW+w^zgZJ;HPx(K3X~HHV$rPMWh)qsUF7_s@!zC zk;%FR;0t3`NTBUUGfe)Y^#W0;qz_j!5hB$FM_fXmNS|9GPc&oIWWUmNIT%@g1r@P) zQP!`Vy=S<{6`fnr6hKxm2iM+ni7{d=-Ojar!n&5;D96uaZYnS~S77RDcq#y!WcP z?_MfYuT_=`xhr+REvND$eu31Z;E)p5^qZNQI_+l0@*BAP-C}k5shuMX#TE@(VWIjK zmB858?1@1)bNfTEj&=<(TIdAod?oCd(&guT9a{e&zJJM&oK|iaod)d`$uFtajyFsz z^iS_Suzv;ld2|w-(J*w;+aNaepS&N$ZgvfYZJ)UMA{SP1!Eb{^(omzRwW*m91M5qZ z&&8JuKlKY|q+rfAJkPL8)29U?Np+agd;zSHbzBto%JZd~ezX9GVLIIQWI1zCMl(!i z>4p!lQJo~o;te3T#kfQsS-x$lc-&1kEEHelMmLTKR`_aYcBLj-a-It@jz${@&F2~B zWsl@ey^VX9@n@58Cp!n!4o&o>89Md4CtVD)0#l_xV@iCzJ$Nf!fRInST$OcR_&Sv9 z+NA0YCFkuyLIb6Xr|lCdXQ`Y@pAnuptmHe&g@j}H-!KRI&WI`SR$WY~hCl;p2@_IV zHju3E(_M$|+mh31;ZK35oL|_{OWtxO_ufwq0Aq2 zn0CU#^I!G;L8tKL5TUrvJ%K|WxDWG0ikL6x~zWugJtaYbYV+%W>;y{^=@&u}#;am`2T}E-~xHW~&j9In%s$p?Pcs^*O_6SND61`9P zXE#AF!k8QD^_)J4$87sPf(YgTn6<+3Q;$s`m(-d~t&e=0iFEn9(uY%l`OZV?f~Wum zWtMQPu`(%BokVTF119e>erYlPX@nj?Vo`4@DIE~w(@*Yto^qjXoLbl}nAOaY@br1@ zLTx=kGW@iGYZ}ggTmfQCR|irESk7b&SE`S~0q!RW6G_|3=f(zy5ZI*!2gMIEZ*yLw zLaTc|^!737zNp6JFbN|Kyu6u4UN~Z=SyGy=@yv*$=FAf&r_80{{n_=hGzzpCM8}8h z*AFz(1av+B!7OWG(Ue{E;31mlNZ?t9dcmx7QpTW3-xgc6p&h_@U9S`Y#&}N~O&a7g zdCuv7;F7y-ur0(F=EwbrE<}neyY4w&t;zAKdUN{tgL?G`szc25o7K`%3 zPCX--+IO<~A|vFzfA%l;IW|O)p(qk&#UZuWh^C{Fh6IBs^#qLcBly0OeXes!5XeCp z1}0hRW2VR{b$R>KZ}NS}Bv=$lQ9b+k1@OpnDL2|tmU&-_4zsHevo>-CzfDu_YS zX9vgWm(c_aa1iEWPq9{b8vDq@DK`k{H&S(A;+kf?19igwD`6gXDZ?)Ep?E zQCqC+RW2#F=$43M1383ObWsxY;YE{h-5H|8XAH;a$PW*%0aiXD*;wc*c_`dvvZQUXkLYy}iu_1$p z#eA@g8_dFjXA5@u?lZ~bzb(Am@23&0O@?s406B`HR`s^*GdvY%S2_dVADf%-1;w@s za2Z>d_O_3Ye4YzE!>`8_D-9$%D(!xqQZ{LkKUq;Opw*P5l#vV;`%0?Q*LlIPhH%nn zAPh$0$L#5&truiM&g#u0Z>)fS6yrF{9(7Ed<3d%frQ0P!>6Fp zPv-Kb`ZxrEio&tQL^e56>rhBt`>P=}LFMH_Q%pjQ!WP0bLO|?`WFPda;3Ka*4>^Ig zL)Jyz?Snp!mZM{GdnCOOi@j<>NdKL`g8)AwxEYp~?IWhK@XPK2`CLRo%@0^b_Ns0j z-#^|C2y}L2)8ca`)1iTgL8cFd{BpI8r=EFovmvG14BVUTS!B1wnBuZn>!^oc~eXOvtt zJpG!Q1UCCsy(oC{3ZZv8Aw>S6?ubxQejJo6F;bk+oHSQZpWWhTXk8od7&`#U6vzC9 z*Z(`H#uZ^cZxfLSFLF{jrmzcc#jL!S^NQihI+K1 z_xn%rC;7uPz$u}jVEw)ZsP)`GsJtmT4C_EqKS?&VZ1Hy>XC&ZNidBApd=e4-j5^*= zu}<@Q*haU5?wRG%6l%GJ;&>1H*VvbU^o%wX8 z0RRiEqLeGHO;%=D3WQM~keAA68mdwaId>yg`xghSX{n&{IcI1cFNu2Q@k zYRdw6Uy)M1g~#fB+Djg=AZlTz&IwRSL8lZ*bp-WB(Ww*CB*B3{GQksxH>sttVXME3)|FrcFF^w$hT^}6z^#dB5Akg#8x(C*v!Aj`9;78?CeK2kHsx@6H5JyB7q1|=rVtUUva z@<@nN@+y0Qed!L%-F*ymOpKg`xl=jlsEcD+ow=Kk8m#W|j?NZ@;QKH&-}{b!n7LA4 zO){H7Hw_!xTTIrpB4x6*-YHT43G?_(T7DSjDGcybXN>l#6FPFKIWZC<5Ygx*5TBTFvT#TShZPxQFoxazFPUpYB5x`(NYO1*e|fwrLtq)S}Nug(I#u z^EUlcX@dK)%DMLiBd}%{k?J_bUC61-i+LpbVYbi3+{uf$Yqh815_wvptSEbeqJq0t z9{AEj|3uyYD5E$htR8R^BER|gbWgoO{hIsWeE(i;^*QgQKhJHx%&q7(}tld;lJP}#FjN^Bi^eot( zjYsw|oI^QtypUU-=)P1{??M9YgkLi4hcViij{f}nRH5~5?#=MymG=O7 z7G9b(tkMv&Km1`hBp{1AIcUU=Bt?3t538AuFZ;tuXL|6%9LKeMQaiU>q`WnO5S2MQ z_S#QD&-BL0K9|pTPfZS@5KRT0y4bXWPnvmWfMup4Jlo;?CC3ERI5JC3{w$mWtA=1_+O$yXi94&zykTN0{U9 zz?P?sycL-cGAutPfMccc-Y5KO>BoIgo*Xd3hbFXD`s=n^@ikvDFwghOpkF>UUu9Mh z@Uu-V(ur!=+u-##LdS)FxS`mEztPU)mU<~x;(0`A$|^}B=@d7qBCp-CV?Iu_K00cw zJ<|~A09V*xc2a7sF=VLWrLPzm`QY1MQ`WZC#`Gn_fx$)d6*&xhv3%EK7OS8cYi4@` zyA&hn$q|Mpznh(;%3I|(BTy)py7ouYXF3-@ENXe7S;4_ee-BOlvsQ9I;M`P>W%4EO ztWW=ntiR-@RUFlN43! zh_&3%_AAW2$VpyxinG+QYLZXZtdR-Kz616*n9_EgA`{vD1w1i}%*LGQOUbV68io5T znX-ocW+W3+&X1nof*#N+-Z+>#qCEA;7Zc)|S0meEwzyVOa_)T->3RmV+;vhFjh40M z|Kjxj7^~ne6#Nofa=9yVD63FmyM$Eb+SJNRJTn_+jhc0!OlaNhK~y;o#^F~V8sPVU?_@FR6J!A>KNLJ!VA zF7#;bjo0sVkbMlldJO;Q%Rk&d9~6Wr*NVwmc@4~Qd#|4(AZ5WroLmCFf}3f@b?6V>QSUB#1WcV56uVv%w^hRE4M~KNguE>q6+% zq8Ep=6=f|n0Gf)N9p5F2q$C7-LA>-10JN9ms>P-lwyF%D5u;_Il5h<|QBXf+JxrCA z_R#CG5Dt34EWB66u3{+31aHV+%rxpCuJp+;?H?^k>XzXOB!{!r{^-{{A_6wTYm;&d zCUNjBMMU@>iTAP)!?L+TiKsgxI7-f=Jnr_SXS_cHE>&!Lz7TR-QGHhK+TRHvR9bpu z#4XJY(eGHOIZ{AhO33i;zElWFY<81z0n+{(>(EheeK*P)`~qKKkpUTih~N;5Q_4Y( zrmb9RE4c&$xslVM;1Ege{eD95`kaz@l6s?7?rgbxHqVCpDqW+t8*Vd}eCZvzKppp|UXn#| z;h;rYd@wBsuKW9I#;?D8Gu6>nDPqb6dmD`axK*vQ5}ZOT-#)f{TB5We{(I1GNKP3h zTu;v*aZ=(eF;pnQ;-0_uhB8*W+WCa&ZB+a(&`=?`^(R7Jxc zHqZ>dAwidAc-giB;IuM(cHC6$NIwjtpz}Gli>{7eh0okG#1XnEr5OkxGmifJ-d^uM zdbWX+g4Xf4hg9c{jPvdog82Tg_7Z|FDZj7>TeS>aJGqKpjQxn}X8C63+c|sxt;`p) zx3v7ZwrK2B&!Y{_$my70O!%kL_nu%!n&F384*R(Vxy#M^F0Y-1gjd|fV&zz#8!CEZ zYh=j+^&&)HBTSh}#8+^TY<92bhmYBjW=^{2MqSidHCc)sX5afWvPR6|*AxWJi*`2$ zG=XdaN-n5RYDGSeM`hBUey>ODyw~LSv93bMk`XR^%qxfA9X9=cIu`Ns+I_zOAACxd zro=LzL+K9mW1YTaj>eJSkB3VH^L*dTCK6Q0xT8Im=_kadmE8HZBzRrDWPoD5cZ~T# zZl`d9FzcD{|@(7;mF zL+~!gFN-^Lj*r5}#1Q^ED*Wgwg(FqEsxP5UL*;{Bs&f+RVi>XJAxAS2soUC*W9mZ( z_{rlv2N%{>(uc{eqk#n5gq)^=?s-&g=~yEvHoj}av^->ADR>B+^5s908H$hH>kPr?W{ZKYxExJSkLTW4S?V% z?J!(WU3FuAb7ao-(nZ@j?=@EJvC1Y^l7=fOXo^n9-Png-KJu`;h;aULMsP-B&&kwE z9hN)%_NYapyOH!K8c|MoRm_7=f@H~>b7@zH&Lz%lY{yc?21Eo z$%J(eNk&FS!c|xAYhIp{FAg=GMJ0AMJZ3v-yaJykk^D)UCFhoPcAn9;ZIw|VY4M+r zv8OphiA~GBZb0rU7bZL9en!W&V#zU1w+~Q)RZ<|XMB&vkm7Z#@@yCEttv@A@@^}Ed zp=s$37f@v>3r(Y3bP*`Y7NN~1*NfEAktYzE^fzC>36;@4wv1 zOA>~*!YQJ%>JokUJ}b3jzTG5a>|~O-SV=9wiIe`eHkLrGO!*c?OlUNZ=9^W?*?q`{=G*|GW0BoTv%#5hTAAH-ASMdvf z7?l|(!+a&Y4i&XcI*aZ@!&4mfD*X8$V@Q9N|1W;9$0z= zxd~ppr25ekRi^X>KlY~Bn7Cg@8uw#pX4bu|NU`Jljacb2`n@;PFRn1LW>|JWYcV$b z4Q^(WiPSkC>9QwD2wn+ai88>La$r|_X#Sh0x*e~=oY1S^w_V2~*PHRZ`3sLkf-x=@ zSx0Lo-!WQ6u-Md4n^f=c()`wD%=c1b7fr=8s(a>@??z>-&2rxx|LW{U8j-gT-{=0Y z99!nM9dqtsBmDw&zAR22a2$~cYwZ;>RUo-&P94n-Wo8sI5B1BJ4>bw73YK>n@zmevudQb5OSmcg{U)5Jc>ektuYLfU9VNG?pAOnC zXc1}1!CPVF86~%nxe|?ge#YA8`vC3@-6;E<0~?y_eZ=i5q4RA!#OSkSi;!1(q9!W+i185L&AR8EqIdy zaHG>!jC(xfa4mL?vFBt+kr-15!*VpzR}%|oe(4|LOE$Lf;HMK!(aG_n;DF__1@2^# zjEyhOTinIp5WC;b-^lOuP{;Xxtb@9v+Gw_ijHHi~1xvq99H&mI6m`jsxSw%gFLqVE zWdW`5*ipxOD^?R1LdUXZbx8%uUyvKH=3t(OT{P4+hO)so*DUS*D#6KM_=-YCS69mk zdD*X=hEq7cSTVmT$NlBT3Hg`VBUh=sqO9Ss+r_U)(8)W^DIr zL*bOLUUqV>2(MO|F`438zjF>5}2&{==Vp489)<&HYQS+)k+1US&flKRh`!SyC7YXx5s~ zk?yEx=9YNpdioyLv1$#rpe@-GQ3F2O1}Quc_0RRS=D*z0Swn0O{Bb6GHfIk-+N&Q% zm)jCxVJX|>N$V#9UVs^oz6ujT7vLSsxN$v(|d~`(j67 z(IwLHsY@OhTNWC@KRryManv@gt0U*# zKrP#&_sZ(>7FGiR2!40AqPU$OlaJbc_W$)def9^Qr>V{c%vw6jj_348uYWz_hSpL2 zP?J>gg_hkP{#4qJEM#y@Bi3d0;)CV+uFv9XvD4M8E5omb--+BzS&Pp0rU}2`2scalcehtD{ zA#Z&Ho3cCikj2rUasLEH&yL-jzGcB^rK6f{1P77EoGTAtBTpfAaUF!5>ldy7$H$Tx z=E8P|=2J8P_j;r~yRQrQ;jF6EbS@0XqJwIN%&ZwQaYuuj%V|5y|Cu?3V~PP-bPK+f zTLW8d71F;xD<`*=c(oKuc002^YSsZdMVP+xJpp+lvh=s;OlmCWAeuSR!RjowKl=b# z{ti!Vf#w4@-?N4}`+w)W%#a-FwPx-TY!X7pLtX{JZCwa-t9T5BB>RM;czjOFC zpudqUU1;|In(h!Egj!=_30^|3CN-2l4+VD*i9L;{QVjD^(zObvYCDKkFNEqZVDfqm72x zV;p+1oo3{AkATa;h|DB8GaFF}0cjnKry2}3w^=78ax2c!0w$K=|F9xo@ioJP$UZVY z{|!NcMqYl=MV#uBcED=>ePATMzmV89*k7b0GaY6cmEZ=}^a>>lVcs6o>eR zfywO@rz3{o$PYzhhwDUTZ~g)*r%foa@+nj^ngklqF;grVe% zT3*0>cb2{|qN0#MbVa-MC7p~@<{IqSA(sarH7xIw-Lc2~r9V8S z@WUM(P~*6lHDuRFy$zci4ya3Hq=h^gD!%2ZiL+O{hH>n%iQszW9Go5SZPk!HSm<wzC~}!Qi}SZ&gdDE{$pjk-{Bn_Y!=NpyRo;j~xG8x_M*1yz zCo$f!u2|J5y_AWWx(S21c{)Vq3=cM+jVdx+e_uc_j%Cnd;ck=&L;ViZj>Xp^Puyt(^3DGe zRn%HW-X5?DVZorMSNL?ygPSlb#|e+eJr!t)6pCJbnJtG zjN<{6VoSpD&D zDKW6?W2ICqQaX4L#cwmq+^HDQj8%H_p{*Ou2EySGp_sv=l$u#OSnS1GLoTn=}TK|~?J~|ALkFhr}zmo?h z#%5SmvBV#9-trYbp`t5^vLGS;yRu0_DPzJ^0{M_!juYH7`cok9)zLPx2Nv(KLwT$t ztrq6_3puliPH%@%xyRcc`dDBj7<>O269yPg(!w>o1qaCsl(v96Pf*E9%~x;VoEH;C zX^@y#|BS>&OjxhBM@$R8AW;cFDNV)zUS1t2vN5cM!M36ylSn|*O3}}9hr{d_?2si= z7@j`72p!qOK@O^U!4AY`1FWzaLqISW{0M z8Xs+@+8SXXpz&-Ap!9S}^V62mj3CIFBjFg&KhWdw^7D050Du$twb5}Jt2=x+S)sI9 zyB1R%Gd?@P9%RV{%Z6xj(${i<$d8=Yr%YZZLee9_VM26C2X-#Coq%yl%nb+!(J%hFZLdT@I$pR9&-w^i8G}B|)1NSi$m4Z27=!zWZ2>SH|wo zVc}BQ<;Ka#0>kU)w7mI!-bv5HWc5zW2PvJ9QMQk7=$T8CZ!9-}_`u$nl>9vUI>|oS!>*t%GzrE@bs4lEUjcs&iDuxzM>WmgO z4G1Mph1$?DHjpL=qwwJ0O>jDIA+@BN5-^;($oA4c*~uvLYjz zK1Z&I=5PO8K4u9mKYI88Xc{>j01q@60Fg8T@PU?*8o(3@OdX+!Y{4LL4%ca4;heSE z3|#|5FZA;3IXam=0FF)T$jE}5U*vu~#WSS^L(-yi-|x(${E6O2C&Olg>*Yx%P0U|= zvPz(5_*nq_SkT>q(ml?e$irnuBJ@wae*8b#a0=*C@a_+Q@kp|hmkgZ)4)$>WH1@oA|%1Hmw;hG);n4| z=6Ds_b;U4_ZL8@^Q${#w;g!;-00Cjo2T=3XcWbo3+A42-#jy^7;6eF%3;^PFP&TtG z@<|ZU+wdN8MG^$*nDf{W+XJmO$XJ;Bqe6%pXHo-odS!4fvOg@)xyPg}>S&Q%<;N@H zNdWKl@chFX>PpyXJtGqehLX){FuLoLEYWbr+DA?qFzL-Zphau2TfE0qcf~-%Qd+MECWfnMA`g zSuqYc%{I8h;Lz_LM1oUHUuZbila4qt?wiuP(ds}N%WgK1&-ZjqeJFB-0cD1%a=Oju z*ExizLDkJi&}C0>Hvc8k8Vph3GVEjDdnKd{SXU^$=0i>^Wx01+o| z+^v*px{Wf;>${EI_K|l0F{$GCPQ$j=^O%wwtYaGz2ysLA80(!N{FHu{bDs4m#9o&iDu8d8OW@Z<0j zX!Eje7(J}i)5sxt2kdMh{VVTDpr~&9aXn{?R-g#pbjoEzD3#5piBl$Te3#Ra3@=P> z=tOZJ9SgPQxWaA*^S+Piw?TTap+j?7m9TunI)KRl;KW2$sAQwN4+Pa?m-8<}xKnsH zpSFnaG5&=Anf?fVpJ9i_+W+1IcM&cFuxvk|H1yUbK_DBHJx6S07T;8+a0egHp;T>~ zpjQ6&1)=`hNREJ)CTyUOocJ9tw{}yFv;*q!=eh1jG#jiZbIrgbxOy0upSgznYDSF1 zEFC+qs4_`YV3WXXvgPgzuJZA+0pknGark97WNcoc$w_hy4LB{u_}%VgvlWZkedc($ z)S#gPEEDKp8anFDL5Fo##bb!}ANW%kA+fp+R|P7FATkJ?E8QQ4KU+nOC~K4Mi8Y&b zNypdi&bl31F$Y)g%hc%z<=8q+{_6K5r>$9}i$K}GTf;&j3kg1R z{Q4qYvHval>S1lwKpKMoBLV-1a9~%4#)f8`A`=e$k75HM;W_wkz+8^ns-2HJtx~eY z;nP70;TK!fx%`?@+A&?7l)Cg(s-~|1*%do)=8?z}I%i361$*m|f+2NRwbFPR$BGJO zoiT!ydLR|o?~%HS|EfrR5g^XZL}QsDnN&axEdIr?7Q!D+!e!L?2w1zk()F2e94?a; zNMzKEPDy3>U&l+g96_|gsd9+?Z;q|=3k)$wc-xf10L5sTWfhh({Bjb6#iGh~fudDT zC=(^i{sZa8wgysEeng$};Lj6TGo{O!Y0?sg&+XK_9dO8Y6+^QEu;gf(9W`3isRGNB4HoBo(3<_){wJ;m}>(3u(#Yk!1>Ea zstvxSln=I{mVcMH(+}cc%&)ekhgzITQB(}^EQ(kDvC!C1@H{-Hi30hENv5NfFmTI&6qTJQxFjt`|z@8QF4|ia&qLNp05IRD0D#a{*+etjpvhEGo8*dcG1uB2}}q zdbzfO1}eg1!wgk)xTK+b{k}R#C`5oLH{8{UwjxtA2KD$(t0Yrj`+mm8kl5M zidt7L@#YsSz}QI|N?_3XJ3M&f!(x;BOW(iG8WSP$=Q42YQ*@C`VF2%X(U$FW@L+I0 z?a~$Fbhx<20OK^7RUc+aqK2BZ`0squ-7N@1h2p1$>G0qdECZyNt3ODBQ`pcuQ6MD3 zpJt0&d1x~ry!MCf8$wc^#|l>LN0bK((*^A+!*_0-#F&@SRNaagDjf`tukdu0#9bMh z8T=^=B!>M1uJbeV28z6({*Qlw#?UAZImR-ET3C*xb>Rz&PQ-9w*7}y(UM8vp+^X== zsu5|+u7e{U#Z5*kqtKEgVNd(OtbCo&l&%khdg*csVN+Xz(_=!=n>C?QRs6_l%3Ob; zu4uE{)kXDMG~=AlbQVtGGg+;_gL=S3VXY=6%lbtt+hz%sTWJjg8ETEK)mBsBy&lW~ z+3&7{q11OWbj8W~u)5UEQ<_0%Nd0fv(DoZt++o>-GO`Is@6TJ!TQv1+RJA929T{C# zR;gX19>x5^GHf}R;o@m!;Od5V`Q%Pw^A3!vYN#;EBq_=_um1nMOREl8FTXwAdrZ6V zn%+e>gx5X>Co9%4Z9fMuq~o`D^Fe3g)c%3YRt1eU^U84`0Bl_lY4DeKE)CzR-Xl;P zRxj3ZuQYZfQh}Wp2?n#l?-AT6W?u3SB^5#k|K0B9G=UW7x1x&e^4&s}9R~2;TV1c_ zO2a~hz!%HQdO20y(3s{eFi$vdNejisv=#(*22<`YoZK4(kT&Zgo3tkiVUUKt^`)>; z4yOjMf-d9w7Rhx8{{+l!<4!a7o(PxdB>p49Zn`u{_YIQ2Wuxcy2iK!5tR97lS zMhD2e^WENUcG*h(rASH4sul6%my!z0U3oTkyK6>k5B8+bLsRGTN zD%FvIiYu1p11b)PkauM8e}nWl*Ri^T#(OX`>SBv&)DC+!WL~YOh1@I={76>`DJuOU$frS*VpwzKTglmD<6_R&b9yP2l4(H7J-_=UM0cS+eC7H>p8Ey zX~XRW!7d;spL&jYRzOEa31h51tsKXyckf};F*0t$^4=SNnl24<&wfiK;xvb@_eS&! zJ4anJYwvUXnWw+xS{(TkOevOV&Xc{(p&1#s+Y6s3@nm-=etPVVJWG89aiZNj8>}J_ zNq>`}wH{0hvunP&snhb2*dGKUX5t)1VT{2VaXkarmXk(T;mJ4S%nGB`ly+-XHD1UM zg=B8~pxAQ}0P5nO!>l>g4!bBmpT=~tAZeo+72p7 zF?$ShG@R#8k=Rsz^O$$TKyR{UmC{}gHo{>$c%X++pKCsL3oUt?bL#mW{0QT(#AMZ& z{8I3Waz3rv!kN_B6XmysMez}k2a$`TGiO_$NC;+T?OMrg5Bo#)i0WDFO4b9rV=gCs zJwSLpLy6d%uzX^XZpf>i=M>Rt#cKUE=-u%fO+YN`t*cIOjVXeg$t4`r{>O94*NNiw z+zWl7E{QByO=kOPhJN%);gm^fOxv!7)+X{tgh4h*#%0ayuHP1Xsy<1-xENYlRbe2w z=&zwGAGQ)2#Somp9w4>hyUZ9f#=_33Yg^Oa^u2pWfGjO0ta`%VL8S0b@A(W3yoP%7 zNt(1!8u_15 zA+QvG9HjVwmb=2OWme0NUmu!4nR_CbVL2LN7JUb%%Y5(M$9J@kmFaa*@RmYtYjuLo zQg>;R@MMrmO2Nk(`MC@du^D$Ay!--XjmS%pbpg3Kk#YCs9QT-aFa|l4B4~H`V}+P1 z?MJC|r_9B5DdOwp5G$x>V+KBk%?~Sqqw0w5UoGIiT(~O}$8}`aS9U?bkBK#~U;hr8 zjQd{{xkr|#Q_IXi5<`Vk4UP+2{d{&-_3hWqmr$Y?sYBg4j7ozrx8D~r(QZVx^EK@m zt2rvJb>))wH%uEN7er!18Km0{@>KyWVqA}!978I9Ex8<`p_Io;|Bn!YLEv{c?9%I7T)2+R9V6v}^lJkWQV z)5N}v%@X#%ul-Md2bx*JL58HBpNQ9XGy4#XO__=nhGG52xKAkokr(|ZI&0`17S!gD z2Eti4wAooD1C0g_P-;8B+GnZft-B-%`p%XdAn|ml{wcI&l2-7FvZtf7IblMOi1vu6 z=7~0alaH5L!+E|On%?S#yEQXdJL-l)ceaEOrh)#s(n`*SeABMFVefAvA`DlYE zWGd+NLY~r|IW4m7^C%Am!p}E?gGt=zo@?61SadinEIbl@7@s`nWLb_#Tuw@-xPPv) z`5K>C9lO~{6vfI*!~*0IlY$-}efd(AQ5H#|A?LA#FQa`YkcMwfIOt@r=tGN{^_Z^S z?aPfK<`U8A{f}9VuLD4hkAgnLsRn&u??c@UZIkEwxO1faESVUzyUn+h#YV{V6=Q8W zYdZVB&(%FKL%-IF_)^68+(lWi^Yhyh?}R!xts8JB%P%*0Z7K zFD`nR>zk;%quZOq2|7)RvH5h?rjgg|*N2EtXYQi$dUfBD-R~VpreqV|V){(eYCzt1 zMAz<4)wav&FT)9vN?~2!bxJ?ib_I^Hp5w~pJTjk6?DTa%xh->~kOHmGF|WN^3P21c z&Ux z8=e8u$|jDT-l=+bTX?!`nHTx37e2Cl3#ELxaHpYn^5*4LZ*ES2FYuRMzt!)*T@@N?b^2;uGIjc{ zf1Aw{ED%^ijUrln9`;$}m-56!M&*lKp}Fk_bP=r0o3vsdv6d$uQ&)+(&y zFk;^fg_iCH$6$PEY&kpwn2kh)88PmI8dXMu2qMZRX zj)_Judh#(^tP`0k{(hf;oPXw=hb1IT65x-iVTQCO>E&s)e#HGyPsYi|y&c@@*cuA# z3J_-^(r~KG84WT`awFN!{h8K|rm3~;P}S~*Xe1c2!!PH>L(r_wWP$$^06$Ls%KcYN ziu}yx>FY(E)Ctl#SH->b-ezK3s@)d@fn?|}rg>!}m7eE65QIlYe#na9F3=AUoM6Wv z>rK%_B%1p>OW?%I?aSeM4A}E7=Q`)ntLlAGqUJyU=>Bn0<^@ z?7Ka&|H9opr6HXWAcm;(9o-{Mk1cYQoANj(D?qIhO3)>wa3fV{61Us5#59bkrdO@94X~o1hT5 z=0N~g|GZ==I?&&FR99m8H~eu!ILEySUHX z?7tCk9C+h%Bz`o4x(y3qd(_m--xG=GfsyoshX!j+C0|nIj^ERX`?x>T7rznu(0O3o zv;BJ99lw5@dJ`*zELS^VvQh1WgftO+2q-4UU-}+lY zZ`=>L6fYlkzTNYD%`?7u6M07Jt)Lq|Fza_@34WD=SbZl8MVAaZtSvk6G~Hh)e%$@m z`>uwkKySbzwIC`AnG%Il1ZBysg3Ixn>h3O8EL9@;geH{a)mKWYK8$*)Hu&(};e z*0Y~Q5YgK{w)rsM#<$qR&O5)$aZr30Z9OLub?dNYH`ToyP{*8&aTk)n5K7%sGcC!)^Vzjilva`f>#CMkG-nRJa*;H+P$8_0BhAAiIX9Y4r8lS(@wbC+p8c z3HyHmS3)Qj#sazW9Xx`oM&+0GkDoGwxRhe7~LyGgQa=Q@5 z1OC?TP3aO!^$K8==j>LuPw^jRs0@9?;K#}qe?9v+ron(~v`{9rq-)6FAZRq=3ttbf z>%7jOshCDSA2<4b>ie)2&Wi)0@7Kue(Lm3i=HM}KNxp$1$!3ThX5%<>oX_{4iRTsZ`J>o4p<_XyEbP!^`y1P_932TPKgJxm6FyD+X z+-o`W_WAv!7}~;sV{DiQjOz)Ch0%is`8DW*p1;+tNKKvlqpr-ak-!^yvIS?)<189^ zg0IWl!&Y9IP7j- zK&O0}ov$YR;ev|`%WxO$;V~v(o$VT0EM^$P+H!js)hz~6xs+1lvLI7hHowk0B30{2 z_xJNkWgY<^NusNZ0}gXFyb^{q$63C~ts&`xG)E6mv`$1K>G!SpTXwfT?T@-QXrjRm zY!|=<$Wbo{x}V0&KE!)PL=F_P*I&vWH+I&F+;!i)zf(raLjxgP%15Xu8ry- zSzeRbaI`BeEc_Hbce$lq<&MhM9_tks2&Q$E%|9pomWwnb9rDYRM75n0k}g zjx9lheZh~xsM=kBg_S)_8Wz85Okn)^LfxreCqP*T9c`X>=lU0L%O0&^2PXIH#9Q0u zj@eUdz0(C%7aAgECjc#aK`GoL8SgES7LD7xQI7}XP{viwaXRmo7M)T{9`r)D?kI$O zB-fN3U|Mh}_x0K&SoWCe!~Y6+2#5DMnaJgMI&khG#gt;oLAkXEjJ>qLzGWi3@-thT}zLh+WN~IXjSa&U67Ftb2Eue#+ zTn*5~#dNT&@BEL)TX4Y9+9L(N18tF&a~yn6x3<~lj8r+9o^5+dRUvt-`}`>{zk~Ai z_<AV9_#0OPFHcrH6q0-~6)*2Spfymqo|)r9`9ix`Xv&B%lE80O z0(#Cwr)5FrGeg?snsKeBNot(q?7}2|ods(r77AT&v}J;yi+?nm;-D0j@g_{rvnsMk zRHB@lDAwPA%vENPB)iSQ z)O_s^(imKp9?x|Jsm6*JcKmz!GJRtuljUh3?K=Qp%3A$hb#+_U5?ft;xXVm$Rm+m% zIFg;OAO%ZGCWA_0?s=6=YD~x-k{%a&<28|po9H~hix-a~Wj^C z1e=48Os5HPoRxLL!MWfi<*uif8qP1LUURs1Epo|qmD#_));k?UJGgOme>_>c0nX7t_PD<$O$hq)5_uA^^S_eN;&elO# z5osiW3<)e*qN~X7P^_-v^xgtEf$-KWwt6N;8{z#d!X?&<7252V03}>=d7IsKT%olj zP7gM}!~(a54dF7vI-&WSrNXtfH}U&FcuiyD@9Mr%ACJonzdy9&tU~ojCTCEW{i)mE zv9#?3?k`%l{USWN15zB?aRS%1TyGWmt8xM2)F{-PGD?!{U45hLr^6W55Q)3 z`dHUb!hL7kUlG{c9UtbU5?Wi;9B=hS6$D=;|EaA5yZWl;_BBU6_NJx&8yPV_n`TK zI`?(QL~ZSn;E#fL&R7@Hj8cfkXDlYAW4CmYUINg8B9WMhl~2ls=5^+}ktWWA!kWY0aKozs0|`vef{;z^uyzBpCuBX5VruU>3FOM)4+ALeXo0EZCNKmcc#^+ zkFJ&0zq4&mK1plvVAV@kub;VA#?`Bh%tTXCqU)~L0k!)r;7h=hsot>6r%W3D`P!dM zibqgpQh+~1o+@uCb78n!moVdB_3xki#Px>wk zO9ul7KA59j-q_+owSYVd&!5+*Uu)LPSLh$ZuejLf>o}5u4#OtSaVCRr$r#eQdkwhO zLC^VeHe)?|yqA`Gymf&6hK#acj^@#p1qJ7M-ErZS7}B>x(nVyOmINxO&Q-wGfbhs| zObN(pCM!uv$>*sB$JA!z$lq~|)tNTXWK1>PDFBou8QO%Qy`-Wpi(V z()F!RoWgW|+jeKSgns?z!rNPjQicA? zJjptd(2094^4F%@Fiy{_ty)#pQf`kce!jI6Fgi}~)eWwKnAl&rFP_4+vnb?zvS zhf?b^|Mm80lO~<%R7^gSulgYNMt6zp8&r74R zT2r&4(a!U{4T};QN&*!y(k3cDa*+fg2}BaubR{6=Oy34;Xs1tH#N!{L{2A+6L-UXL zMH1ME5@@m!%Z`eUBoIj;l0YPZU%v$IqKOOELo;QG9D^k^con@Kiiuc85{M)aNg$jA zDjh{(AH!1Bi4}wA_+tih$Ik6Ad)~(0{;h=J5N*=lw#BX0000< KMNUMnLSTZ`U2s(Z literal 0 HcmV?d00001 diff --git a/docs/source/dataset_container.rst b/docs/source/dataset_container.rst index a79c038c..991bcd4a 100644 --- a/docs/source/dataset_container.rst +++ b/docs/source/dataset_container.rst @@ -1,3 +1,5 @@ +.. _dataset_container: + .. role:: hidden :class: hidden-section diff --git a/docs/source/examples/fine_tuning.rst b/docs/source/examples/fine_tuning.rst index 80139041..1bf76063 100644 --- a/docs/source/examples/fine_tuning.rst +++ b/docs/source/examples/fine_tuning.rst @@ -1,3 +1,5 @@ +.. _fine_tuning: + .. role:: hidden :class: hidden-section diff --git a/docs/source/get_started/get_started.rst b/docs/source/get_started/get_started.rst new file mode 100644 index 00000000..9afccc8e --- /dev/null +++ b/docs/source/get_started/get_started.rst @@ -0,0 +1,147 @@ +.. role:: hidden + :class: hidden-section + +Getting Started +=============== + +.. code-block:: python + + from deepparse.parser import AddressParser + from deepparse.dataset_container import CSVDatasetContainer + + address_parser = AddressParser(model_type="bpemb", device=0) + + # you can parse one address + parsed_address = address_parser("350 rue des Lilas Ouest Québec Québec G1L 1B6") + + # or multiple addresses + parsed_address = address_parser(["350 rue des Lilas Ouest Québec Québec G1L 1B6", + "350 rue des Lilas Ouest Québec Québec G1L 1B6"]) + + # or multinational addresses + # Canada, US, Germany, UK and South Korea + parsed_address = address_parser( + ["350 rue des Lilas Ouest Québec Québec G1L 1B6", "777 Brockton Avenue, Abington MA 2351", + "Ansgarstr. 4, Wallenhorst, 49134", "221 B Baker Street", "서울특별시 종로구 사직로3길 23"]) + + # you can also get the probability of the predicted tags + parsed_address = address_parser("350 rue des Lilas Ouest Québec Québec G1L 1B6", + with_prob=True) + + # Print the parsed address + print(parsed_address) + + # or using one of our dataset container + addresses_to_parse = CSVDatasetContainer("./a_path.csv", column_names=["address_column_name"], + is_training_container=False) + address_parser(addresses_to_parse) + +The default predictions tags are the following + + - ``"StreetNumber"``: for the street number, + - ``"StreetName"``: for the name of the street, + - ``"Unit"``: for the unit (such as apartment), + - ``"Municipality"``: for the municipality, + - ``"Province"``: for the province or local region, + - ``"PostalCode"``: for the postal code, + - ``"Orientation"``: for the street orientation (e.g. west, east), + - ``"GeneralDelivery"``: for other delivery information. + +Parse Addresses From the Command Line +************************************* + +You can also use our cli to parse addresses using: + +.. code-block:: sh + + parse + +Parse Addresses Using Your Own Retrained Model +********************************************** + +See `here `__ for a complete example. + +.. code-block:: python + + address_parser = AddressParser( + model_type="bpemb", device=0, path_to_retrained_model="path/to/retrained/bpemb/model.p") + + address_parser("350 rue des Lilas Ouest Québec Québec G1L 1B6") + +Retrain a Model +*************** +See `here `__ for a complete example +using Pickle and `here `__ +for a complete example using CSV. + +.. code-block:: python + + address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8) + +One can also freeze some layers to speed up the training using the ``layers_to_freeze`` parameter. + +.. code-block:: python + + address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8, layers_to_freeze="seq2seq") + + +Or you can also give a specific name to the retrained model. This name will be use as the model name (for print and +class name) when reloading it. + +.. code-block:: python + + address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8, name_of_the_retrain_parser="MyNewParser") + + + + +Retrain a Model With an Attention Mechanism +******************************************* +See `here `__ for a complete example. + +.. code-block:: python + + # We will retrain the fasttext version of our pretrained model. + address_parser = AddressParser(model_type="fasttext", device=0, attention_mechanism=True) + + address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8) + + +Retrain a Model With New Tags +***************************** +See `here `__ for a complete example. + +.. code-block:: python + + address_components = {"ATag":0, "AnotherTag": 1, "EOS": 2} + address_parser.retrain(training_container, train_ratio=0.8, epochs=1, batch_size=128, prediction_tags=address_components) + + +Retrain a Seq2Seq Model From Scratch +************************************ + +See `here `__ for +a complete example. + +.. code-block:: python + + seq2seq_params = {"encoder_hidden_size": 512, "decoder_hidden_size": 512} + address_parser.retrain(training_container, train_ratio=0.8, epochs=1, batch_size=128, seq2seq_params=seq2seq_params) + + +Download Our Models +******************* + +Here are the URLs to download our pretrained models directly + - `FastText `__, + - `FastTextAttention `__, + - `BPEmb `__, + - `BPEmbAttention `__, + - `FastText Light `__ (using `Magnitude Light `__),. + +Or you can use our cli to download our pretrained models directly using: + +.. code-block:: sh + + download_model + diff --git a/docs/source/index.rst b/docs/source/index.rst index 2577b003..56c93748 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -561,233 +561,6 @@ to achieve an interesting performance. Attention mechanisms improve performance - 99.04 - 99.52 -Getting Started -=============== - -.. code-block:: python - - from deepparse.parser import AddressParser - from deepparse.dataset_container import CSVDatasetContainer - - address_parser = AddressParser(model_type="bpemb", device=0) - - # you can parse one address - parsed_address = address_parser("350 rue des Lilas Ouest Québec Québec G1L 1B6") - - # or multiple addresses - parsed_address = address_parser(["350 rue des Lilas Ouest Québec Québec G1L 1B6", - "350 rue des Lilas Ouest Québec Québec G1L 1B6"]) - - # or multinational addresses - # Canada, US, Germany, UK and South Korea - parsed_address = address_parser( - ["350 rue des Lilas Ouest Québec Québec G1L 1B6", "777 Brockton Avenue, Abington MA 2351", - "Ansgarstr. 4, Wallenhorst, 49134", "221 B Baker Street", "서울특별시 종로구 사직로3길 23"]) - - # you can also get the probability of the predicted tags - parsed_address = address_parser("350 rue des Lilas Ouest Québec Québec G1L 1B6", - with_prob=True) - - # Print the parsed address - print(parsed_address) - - # or using one of our dataset container - addresses_to_parse = CSVDatasetContainer("./a_path.csv", column_names=["address_column_name"], - is_training_container=False) - address_parser(addresses_to_parse) - -The default predictions tags are the following - - - ``"StreetNumber"``: for the street number, - - ``"StreetName"``: for the name of the street, - - ``"Unit"``: for the unit (such as apartment), - - ``"Municipality"``: for the municipality, - - ``"Province"``: for the province or local region, - - ``"PostalCode"``: for the postal code, - - ``"Orientation"``: for the street orientation (e.g. west, east), - - ``"GeneralDelivery"``: for other delivery information. - -Parse Addresses From the Command Line -************************************* - -You can also use our cli to parse addresses using: - -.. code-block:: sh - - parse - -Parse Addresses Using Your Own Retrained Model -********************************************** - -See `here `_ for a complete example. - -.. code-block:: python - - address_parser = AddressParser( - model_type="bpemb", device=0, path_to_retrained_model="path/to/retrained/bpemb/model.p") - - address_parser("350 rue des Lilas Ouest Québec Québec G1L 1B6") - -Retrain a Model -*************** -See `here `_ for a complete example -using Pickle and `here `_ -for a complete example using CSV. - -.. code-block:: python - - address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8) - -One can also freeze some layers to speed up the training using the ``layers_to_freeze`` parameter. - -.. code-block:: python - - address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8, layers_to_freeze="seq2seq") - - -Or you can also give a specific name to the retrained model. This name will be use as the model name (for print and -class name) when reloading it. - -.. code-block:: python - - address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8, name_of_the_retrain_parser="MyNewParser") - - -Parse Address With Our Out-Of-The-Box API -***************************************** -We also offer an out-of-the-box RESTAPI to parse addresses using FastAPI. - -Installation ------------- -First, ensure that you have Docker Engine and Docker Compose installed on your machine. -If not, you can install them using the following documentations in the following order: - - -1. `Docker Engine `_ -2. `Docker Compose `_ - -Once you have Docker Engine and Docker Compose installed, you can run the following command to start the FastAPI application: - -.. code-block:: sh - - docker compose up app - -Sentry -****** - -Also, you can monitor your application usage with `Sentry `_ by setting the environment variable ``SENTRY_DSN`` to your Sentry's project -DSN. There is an example of the ``.env`` file in the project's root named ``.env_example``. You can copy it using the following command: - -.. code-block:: sh - - cp .env_example .env - -Request Examples ----------------- - -Once the application is up and running and port ``8000`` is exported on your localhost, you can send a request with one -of the following methods: - -cURL POST request -~~~~~~~~~~~~~~~~~ - -.. code-block:: sh - - curl -X POST --location "http://127.0.0.1:8000/parse/bpemb-attention" --http1.1 \ - -H "Host: 127.0.0.1:8000" \ - -H "Content-Type: application/json" \ - -d "[ - {\"raw\": \"350 rue des Lilas Ouest Quebec city Quebec G1L 1B6\"}, - {\"raw\": \"2325 Rue de l'Université, Québec, QC G1V 0A6\"} - ]" - -Python POST request -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python - - import requests - - url = 'http://localhost:8000/parse/bpemb' - addresses = [ - {"raw": "350 rue des Lilas Ouest Quebec city Quebec G1L 1B6"}, - {"raw": "2325 Rue de l'Université, Québec, QC G1V 0A6"} - ] - - response = requests.post(url, json=addresses) - parsed_addresses = response.json() - print(parsed_addresses) - - -Retrain a Model With an Attention Mechanism -******************************************* -See `here `_ for a complete example. - -.. code-block:: python - - # We will retrain the fasttext version of our pretrained model. - address_parser = AddressParser(model_type="fasttext", device=0, attention_mechanism=True) - - address_parser.retrain(training_container, train_ratio=0.8, epochs=5, batch_size=8) - - -Retrain a Model With New Tags -***************************** -See `here `_ for a complete example. - -.. code-block:: python - - address_components = {"ATag":0, "AnotherTag": 1, "EOS": 2} - address_parser.retrain(training_container, train_ratio=0.8, epochs=1, batch_size=128, prediction_tags=address_components) - - -Retrain a Seq2Seq Model From Scratch -************************************ - -See `here `_ for -a complete example. - -.. code-block:: python - - seq2seq_params = {"encoder_hidden_size": 512, "decoder_hidden_size": 512} - address_parser.retrain(training_container, train_ratio=0.8, epochs=1, batch_size=128, seq2seq_params=seq2seq_params) - - -Download Our Models -******************* - -Here are the URLs to download our pretrained models directly - - `FastText `_, - - `FastTextAttention `_, - - `BPEmb `_, - - `BPEmbAttention `_, - - `FastText Light `_ (using `Magnitude Light `_),. - -Or you can use our cli to download our pretrained models directly using: - -.. code-block:: sh - - download_model - - -Installation -============ - -Before installing deepparse, you must have the latest version of `PyTorch `_ in your environment. - -- **Install the stable version of deepparse:** - - .. code-block:: sh - - pip install deepparse - -- **Install the latest development version of deepparse:** - - .. code-block:: sh - - pip install -U git+https://github.com/GRAAL-Research/deepparse.git@dev - - Cite ==== @@ -820,12 +593,22 @@ look at our `contributing guidelines `_. +.. toctree:: + :maxdepth: 1 + :caption: Installation + + install/installation + + +.. toctree:: + :maxdepth: 1 + :caption: Get Started -API Reference -============= + get_started/get_started .. toctree:: :maxdepth: 1 @@ -855,6 +638,12 @@ API Reference examples/retrain_with_new_seq2seq_params examples/single_country_retrain +.. toctree:: + :maxdepth: 1 + :caption: Model training + + training_guide + Indices and Tables ================== diff --git a/docs/source/install/installation.rst b/docs/source/install/installation.rst new file mode 100644 index 00000000..beeab907 --- /dev/null +++ b/docs/source/install/installation.rst @@ -0,0 +1,46 @@ +.. role:: hidden + :class: hidden-section + +Installation +============ + +Deepparse is available for Python 3,8 to Python 3.11. + +.. note:: + We do not recommend installation as a root user on your system Python. + Please setup a virtual environment, *e.g.*, via `Anaconda or Miniconda `_, or create a `Docker image `_. + +Quick Start +----------- + +.. raw:: html + :file: quick-start.html + +Installation +------------ + +Before installing deepparse, you must have the latest version of `PyTorch `_ in your environment. + +- **Install the stable version of Deepparse:** + + .. code-block:: sh + + pip install deepparse + +- **Install the stable version of Deepparse with the app extra dependencies:** + + .. code-block:: sh + + pip install "deepparse[app]" + +- **Install the stable version of Deepparse with all extra dependencies:** + + .. code-block:: sh + + pip install "deepparse[all]" + +- **Install the latest development version of Deepparse:** + + .. code-block:: sh + + pip install -U git+https://github.com/GRAAL-Research/deepparse.git@dev \ No newline at end of file diff --git a/docs/source/install/quick-start.html b/docs/source/install/quick-start.html new file mode 100644 index 00000000..dc72ca1d --- /dev/null +++ b/docs/source/install/quick-start.html @@ -0,0 +1,148 @@ + + +

+
+
PyTorch
+
Your OS
+
Package
+
CUDA
+
Run:
+
+
+
+
+
+
+
+
+
+ + \ No newline at end of file diff --git a/docs/source/parser.rst b/docs/source/parser.rst index a42eb7a9..620fb811 100644 --- a/docs/source/parser.rst +++ b/docs/source/parser.rst @@ -13,7 +13,7 @@ This is the complete pretrained address parser model. This model allows using th tags of any address. We offer, for now, only two pretrained models, FastText and BPEmb. The first one relies on -`fastText `_ French pretrained embeddings to parse the address, and the second use +`fastText `__ French pretrained embeddings to parse the address, and the second use the `byte-pair multilingual subword `_ pretrained embeddings. In both cases, the architecture is similar, and performances are comparable; our results are available in this `article `_. diff --git a/docs/source/training_guide.rst b/docs/source/training_guide.rst new file mode 100644 index 00000000..4649bbd0 --- /dev/null +++ b/docs/source/training_guide.rst @@ -0,0 +1,99 @@ +.. role:: hidden + :class: hidden-section + +Training Guide +============== + +In addition to parsing addresses out-of-the-box, Deepparse allows you to retrain the pre-trained models to fit your data and use cases. +In the world of machine learning, this is what's refered to as ``fine-tuning``, which can make it easier to obtain well-performing +models more efficiently and with less data. + +Since fine-tuning models can be tricky, this section of the documentation provides some guidelines and insights that may +be useful to adapt our models successfully. See :ref:`fine_tuning` for a coding example of +how to retrain our models. + +.. note:: + We provide practical recommendations for fine-tuning, but you may have to try multiple retraining configurations to + achieve an optimal result. If you have difficulty adapting our models to your use case, + open an issue on the Deepparse `GitHub `__ page. + +A few use cases may lead you to want to retrain Deepparse's models. Whether you wish to obtain a better +performance on a single or multiple countries that our models weren't trained on, or your data and address schemes require a more complex +architecture, or the tag structure of your dataset, is different from ours; deepparse's retraining features accommodate all these use cases and more. + +In practice, our models were trained on 20 countries. They demonstrated very accurate results on all of them, so we advise you to use our models without retraining unless you wish to predict +different tags (e.g., StreetNumber ...). Also, suppose you want to retrain +our models to perform better on countries outside of the 20 used in the original training set. In that case, you can look +at `our dataset `__ which includes an additional 41 countries used only for testing. + +There are two main concerns to keep in mind when fine-tuning a model: the model's convergence (i.e, its ability actually to learn from the new data) +and the possibility of ``catastrophic forgetting`` (i.e., losing the model's previous knowledge after training on the new data). + +Learning Successfully +********************* + +Making a model converge is as much an art as a science since it often requires a lot of experimentation and parameter tuning. In the case +of fine-tuning, the models have already developed a base knowledge of the task that they were trained on, which gives them an edge. +This is especially true in the case of Deepparse since the task you are fine-tuning remains the same (i.e. parsing addresses). +However, there are a couple of points to consider to obtain favourable results: + +- **Make sure you have enough data**: deep learning models are notorious for being pretty data hungry, so unless you have enough data, the models + will have a hard time learning. Since Deepparse's models have already been trained on a few million addresses, the need for data is mitigated for fine-tuning. However, + it is recommended to use at least a few thousand examples per new country when retraining. + +- **Prepare your dataset**: once you are done pre-processing your dataset, you must convert it to a format which can be loaded into + a :class:`~deepparse.dataset_container.DatasetContainer`. See the :ref:`dataset_container` section for more details. + Also, make sure to keep a portion of your data apart to test the performance of your retrained models. + +- **Use a proper learning rate**: if you are unfamiliar with gradient descent and neural network optimization, you probably don't know what + a ``learning rate`` is. But have no fear; you do not need a Ph.D. to retrain deepparse's models. All you need to understand is that a learning rate + is a value that guides the training process. When it comes to fine-tuning, it is recommended to use a learning rate lower than the one used for the first + training, in this case, we recommend using a learning rate lower than ``0.1``. This parameter can be changed in the :meth:`~deepparse.parser.AddressParser.retrain` method. + +- **Train for long enough**: Deepparse's models are based on the LSTM neural network architecture, which may require a few more training epochs + than recent architectures for fine-tuning. The number of epochs would depend on the use case, but allowing the models to train long enough is important. Perhaps start somewhere between 5 and 10 epochs and increase the number of epochs if needed. + +- **Use a GPU**: this is not required for retraining, but it is highly recommended to use a GPU if your device has one to speed up the + training process. This can be specified in the :class:`~deepparse.parser.AddressParser` constructor. + +Do Not Forget! +************** + +As mentionned above, catastrophic forgetting can happen when fine-tuning machine learning models. This is because the models' internal parameters are +modified to accommodate the new task/data, which can impact their ability to be appropriate for the previous task/data. + +There are many fancy ways to mitigate catastrophic forgetting when fine-tuning models. Still, given the task and data that Deepparse handles, we recommend including some of the previous data when constructing your retraining dataset. The amount +of addresses to keep would vary depending on the number of new addresses, but somewhere between 1% and 10% would be a good start. + +Another approach that can help reduce the effect of forgetting is freezing part of the model. Check out +the :meth:`~deepparse.parser.AddressParser.retrain` method for more details on how to freeze different parts of our models during retraining. + +.. note:: + If you're only interested in the models' performance on the new data, you should not concern yourself with catastrophic forgetting. + + +About The Data +************** + +Deepparse's models learn in a supervised manner; this means that the data provided for retraining must be labelled (i.e. the tag of each element in the +address needs to be specified). This is also required when you want to retrain our models with your own custom tags. Each word in the address must +have a corresponding tag. If you are using custom tags, they must be defined in the :meth:`~deepparse.parser.AddressParser.retrain` method under +the ``prediction_tags`` argument. Here are some examples of properly labelled addresses: + +.. image:: /_static/img/labeled_addresses.png + +.. note:: + If the main objective of retraining is to introduce different tags, it might be a good idea to freeze the model layers. This will speed up the + retraining process and will probably yield good results, especially if you are training on the same countries as the original training set. + +In case your data is mostly or exclusively unlabeled, you can retrain on the labelled portion and then use the obtained model to predict labels +for a few more randomly chosen unlabeled addresses, verify and correct the predictions and retrain with the newly labelled addresses added to the retraining dataset. +This will allow you to incrementally increase the size of your dataset with the help of the models. This is a simple case of *active learning*. + +Modifying the Architecture +************************** + +The :meth:`~deepparse.parser.AddressParser.retrain` method allows you to change the architecture of the models using the ``seq2seq_params`` +argument. This can be useful if you need a more complex model or a lighter model, for example. However, if you +change the models' architecture, you will end up with a completely new model that will be retrained from scratch. This +means that all the previous knowledge that the initial model had will disapear. From 2f396dc32fabf27d865463a13acbee10ea6c4b8b Mon Sep 17 00:00:00 2001 From: davebulaval Date: Mon, 16 Oct 2023 20:41:30 -0400 Subject: [PATCH 08/10] improve documentation --- deepparse/comparer/addresses_comparer.py | 20 ++++++------- .../comparer/formatted_compared_addresses.py | 28 +++++++++---------- .../formatted_compared_addresses_raw.py | 6 ++-- .../formatted_compared_addresses_tags.py | 6 ++-- deepparse/data_validation/data_validation.py | 15 ++++++---- .../dataset_container/dataset_container.py | 20 ++++++------- deepparse/dataset_container/tools.py | 10 +++---- deepparse/parser/address_parser.py | 26 ++++++++--------- .../parser/test_address_parser_retrain_api.py | 2 +- 9 files changed, 68 insertions(+), 65 deletions(-) diff --git a/deepparse/comparer/addresses_comparer.py b/deepparse/comparer/addresses_comparer.py index 70b7f704..c7dabf06 100644 --- a/deepparse/comparer/addresses_comparer.py +++ b/deepparse/comparer/addresses_comparer.py @@ -11,14 +11,14 @@ class AddressesComparer: """ Address comparer to compare addresses with each other and retrieves the differences between them. The addresses - are parsed using an address parser based on one of the seq2seq pretrained networks either with fastText or BPEmb. + are parsed using an address parser based on one of the seq2seq pretrained networks, either with fastText or BPEmb. - The address comparer can compare already parsed addresses. The address parser first recompose the raw - addresses then suggests its own tags, then it makes a comparison with the tags of the source parsing and the + The address comparer can compare already parsed addresses. The address parser first recomposes the raw + addresses then suggest its own tags; then it makes a comparison with the tags of the source parsing and the newly parsed address The address comparer is also able to compare raw addresses by first parsing the addresses using the - address parser and then brings out the differences among the parsed addresses. + address parser and then bring out the differences among the parsed addresses. Args: @@ -40,13 +40,13 @@ def compare_tags( ) -> Union[List[FormattedComparedAddressesTags], FormattedComparedAddressesTags]: """ Compare tags of a source parsing with the parsing from AddressParser. First, it reconstructs the - raw address from the parsing, then AddressParser generates tags and then compares the two parsings. + raw address from the parsing, AddressParser generates tags and compares the two parsings. Args: addresses_tags_to_compare (Union[List[tuple], List[List[tuple]]]): list of tuple that contains - the tags for the address components from the source. Can compare multiples parsings if passed as a + the tags for the address components from the source. Can compare multiple parsings if passed as a list of tuples. - with_prob (Union[None, bool]): A option flag to either or not include prob in the comparison report. + with_prob (Union[None, bool]): An option flag to either or not include prob in the comparison report. The probabilities are not compared but only included in the report. The default value is None, which means not taking into account. @@ -122,14 +122,14 @@ def compare_raw( with_prob: Union[None, bool] = None, ) -> List[FormattedComparedAddressesRaw]: """ - Compare a list of raw addresses together, it starts by parsing the addresses + Compare a list of raw addresses together. It starts by parsing the addresses with the setted parser and then return the differences between the addresses components retrieved with our model. Args: raw_addresses_to_compare (Union[Tuple[str], List[Tuple[str]]]): - List of string that represent raw addresses to compare. - with_prob (Union[None, bool]): A option flag to either or not include prob in the comparison report. + List of strings that represent raw addresses to compare. + with_prob (Union[None, bool]): An option flag to either or not include prob in the comparison report. The probabilities are not compared but only included in the report. The default value is None, which means not taking into account. diff --git a/deepparse/comparer/formatted_compared_addresses.py b/deepparse/comparer/formatted_compared_addresses.py index f29eaca2..f90f699e 100644 --- a/deepparse/comparer/formatted_compared_addresses.py +++ b/deepparse/comparer/formatted_compared_addresses.py @@ -20,7 +20,7 @@ class FormattedComparedAddresses(ABC): for the first one. second_address(FormattedParsedAddress): A formatted parsed address that contains the parsing information for the second one. - origin: (Tuple[str, str]): The origin of the parsing (ex : from source or from a deepparse pretrained model). + origin: (Tuple[str, str]): The origin of the parsing (ex : from source or a Deepparse pretrained model). Example: @@ -40,7 +40,7 @@ class FormattedComparedAddresses(ABC): @property def list_of_bool(self) -> List: """ - A list of boolean that contains all the address components names and indicates if it is the same for the + A list of boolean that contains all the address components' names and indicates if it is the same for the two addresses. Return: @@ -86,7 +86,7 @@ def comparison_report(self, nb_delimiters: Union[int, None] = None) -> None: def _comparison_report(self, nb_delimiters: Union[int, None]) -> str: """ - Builds a comparison_report with delimiters to make the beginning and the end of the comparison easier to spot. + Builds a comparison_report with delimiters to make the comparison's beginning and end easier to spot. """ # Get terminal size to adapt the output to the user @@ -102,15 +102,15 @@ def _comparison_report(self, nb_delimiters: Union[int, None]) -> str: @abstractmethod def _comparison_report_builder(self) -> str: """ - Builds the core of a comparison report for the different comparisons. Since the procedure to make a tags - comparison and the raw addresses comparison is different, the comparison report is not the same for the two. + Builds the core of a comparison report for the various comparisons. Since the procedure to make a tags + comparison and the raw addresses comparison are different, the comparison report is not the same for the two. It is then implemented in each specific class. """ @abstractmethod def _get_probs(self) -> Dict: """ - Get the tags from the parsing with their associated probabilities, the method needs to be implemented in each + To get the tags from the parsing with their associated probabilities, the method needs to be implemented in each class because they don't use the probabilities the same way. """ @@ -180,7 +180,7 @@ def _get_tags_diff_color( Args: name_one (str, optional) : Name associated with first color. The default value is the first address. - name_two (str, optional) : Name associated with second color. The default value is the second address. + name_two (str, optional) : Name associated with the second colour. The default value is the second address. verbose (bool, optional): If True, it will print a presentation of the colours and what they mean. The default value is True. @@ -221,14 +221,14 @@ def _get_tags_diff_color( def _bool_address_tags_are_the_same(self, parsed_addresses: Union[List[List[tuple]], List[tuple]]) -> List[tuple]: """ Compare addresses components and put the differences in a dictionary where the keys are the - names of the addresses components, and the values are the value of the addresses component. + names of the addresses components, and the values are the values of the addresses component. Args: parsed_addresses (Union[List[List[tuple]], List[tuple]]): Contains the tags and the - address components name for the parsed addresses. + address components' names for the parsed addresses. Return: - List[tuple]: List of tuples that contains all addresses components that differ from each other. + List[tuple]: List of tuples that contain all addresses components that differ from each other. """ unique_address_component_names = self._unique_addresses_component_names(parsed_addresses) @@ -258,16 +258,16 @@ def _bool_address_tags_are_the_same(self, parsed_addresses: Union[List[List[tupl @staticmethod def _unique_addresses_component_names(parsed_addresses: List[List[tuple]]) -> List: """ - Retrieves all the unique address components names from the comparison then returns it. + Retrieves all the unique address component names from the comparison, then returns it. Args: parsed_addresses (List[List[tuple]]): Contains the tags and the - address components name for the parsed addresses. + address components' names for the parsed addresses. Return: - Returns a list of all the unique address components names. + Returns a list of all the unique address component names. """ - # Here we don't use a set since order will change and report will also change. + # We don't use a set here since the order and report will change. unique_address_component_names = [] for tuple_values in parsed_addresses: for address_component in tuple_values: diff --git a/deepparse/comparer/formatted_compared_addresses_raw.py b/deepparse/comparer/formatted_compared_addresses_raw.py index 614ee313..de94c05d 100644 --- a/deepparse/comparer/formatted_compared_addresses_raw.py +++ b/deepparse/comparer/formatted_compared_addresses_raw.py @@ -12,7 +12,7 @@ class FormattedComparedAddressesRaw(FormattedComparedAddresses): def _get_probs(self) -> Dict: """ - Get the tags from the parsing with their associated probabilities, the method needs to be implemented in each + To get the tags from the parsing with their associated probabilities, the method needs to be implemented in each class because they don't use the probabilities the same way. """ return { @@ -45,8 +45,8 @@ def _get_raw_diff_color(self, verbose=True) -> str: def _comparison_report_builder(self) -> str: """ - Builds the core of a comparison report for the different comparisons. Since the procedure to make a tags - comparison and the raw addresses comparison is different, the comparison report is not the same for the two. + Builds the core of a comparison report for the various comparisons. Since the procedure to make a tags + comparison and the raw addresses comparison are different, the comparison report is not the same for the two. It is then implemented in each specific class. """ str_formatted = "" diff --git a/deepparse/comparer/formatted_compared_addresses_tags.py b/deepparse/comparer/formatted_compared_addresses_tags.py index 104643af..775335d8 100644 --- a/deepparse/comparer/formatted_compared_addresses_tags.py +++ b/deepparse/comparer/formatted_compared_addresses_tags.py @@ -12,7 +12,7 @@ class FormattedComparedAddressesTags(FormattedComparedAddresses): def _get_probs(self) -> Dict: """ - Get the tags from the parsing with their associated probabilities, the method needs to be implemented in each + To get the tags from the parsing with their associated probabilities, the method needs to be implemented in each class because they don't use the probabilities the same way. """ return { @@ -37,8 +37,8 @@ def _get_probs_of_tags(self, verbose: bool = True) -> str: def _comparison_report_builder(self) -> str: """ - Builds the core of a comparison report for the different comparisons. Since the procedure to make a tags - comparison and the raw addresses comparison is different, the comparison report is not the same for the two. + Builds the core of a comparison report for the various comparisons. Since the procedure to make a tags + comparison and the raw addresses comparison are different, the comparison report is not the same for the two. It is then implemented in each specific class. """ diff --git a/deepparse/data_validation/data_validation.py b/deepparse/data_validation/data_validation.py index 3b98a8c3..2e95e85d 100644 --- a/deepparse/data_validation/data_validation.py +++ b/deepparse/data_validation/data_validation.py @@ -3,30 +3,33 @@ def validate_if_any_empty(string_elements: List) -> bool: """ - Return true if one of the string element is an empty one. + Return ``True`` if one of the string elements is empty. For example, the second element in the following list is + an empty address: ``["An address", "", "Another address"]``. Thus, it will return ``False``. Args: - string_elements (list): A list of string to validate. + string_elements (list): A list of strings to validate. """ return any(is_empty(string_element) for string_element in string_elements) def validate_if_any_whitespace_only(string_elements: List) -> bool: """ - Return true if one of the string element is only whitespace. + Return ``True`` if one of the string elements is only whitespace. For example, the second element in the + following list is only whitespace: ``["An address", " ", "Another address"]``. Thus, it will return ``False``. Args: - string_elements (list): A list of string to validate. + string_elements (list): A list of strings to validate. """ return any(is_whitespace_only(string_element) for string_element in string_elements) def validate_if_any_none(string_elements: List) -> bool: """ - Return true if one of the string element is a None value. + Return ``True`` if one string element is a ``None`` value. For example, the second element in the following + list is a ``None`` value: ``["An address", None, "Another address"]``. Thus, it will return ``False``. Args: - string_elements (list): A list of string to validate. + string_elements (list): A list of strings to validate. """ return any(is_none(string_element) for string_element in string_elements) diff --git a/deepparse/dataset_container/dataset_container.py b/deepparse/dataset_container/dataset_container.py index 2e691305..9ffd7588 100644 --- a/deepparse/dataset_container/dataset_container.py +++ b/deepparse/dataset_container/dataset_container.py @@ -54,14 +54,14 @@ def __getitem__( self, idx: Union[int, slice] ) -> Union[List[str], str, List[List[Tuple[str, List]]], Tuple[str, List]]: """ - If the DatasetContainer is a predict one: + If the DatasetContainer is a "predict" one: - - it can be a list of string items (e.g. a list of address (str)), or + - it can be a list of string items (e.g. a list of addresses (str)), or - it can be a unique string item (e.g. one address). If the DatasetContainer is a training one: - - it can be a list of tuple (str, list) items, namely a list of parsed example (e.g. an address with + - it can be a list of tuple (str, list) items, namely a list of parsed examples (e.g. an address with the tags), or - it can be a tuple (str, list) item. @@ -114,7 +114,7 @@ def _training_validation(self) -> None: if not self._data_tags_is_same_len_then_address(): print( - f"Some addresses (whitespace-split) and the tags associated with them are not the same len. " + f"Some addresses (whitespace-split) and the associated tags are not the same len. " f"If you are using a CSVDatasetContainer, consider using the tag_seperator_reformat_fn argument." f"Here is the report of those cases where len differ to help you out:\n" f"{self._data_tags_not_the_same_len_diff()}" @@ -190,8 +190,8 @@ def __init__(self, data_path: str, is_training_container: bool = True) -> None: if not is_training_container: if self._test_predict_container_is_list_of_tuple(): raise DataError( - "The data is a list of tuple by the dataset container is a predict container. " - "Predict container should contains only a list of address." + "The data is a list of tuples, but the dataset container is a predict container. " + "Predict container should contain only a list of addresses." ) self.validate_dataset() @@ -226,17 +226,17 @@ class CSVDatasetContainer(DatasetContainer): data_path (str): The path to the CSV dataset file. column_names (list): A column name list to extract the dataset element. - If the dataset container is a predict one, the list must be of exactly one element - (i.e. the address column). On the other hand, if the dataset container is a training one, the list must be + If the dataset container is a "predict" one, the list must be of exactly one element + (i.e. the address column). On the other hand, if the dataset container is a "training" one, the list must be of exactly two elements: addresses and tags. is_training_container (bool): Either or not, the dataset container is a training container. This will determine the dataset validation test we apply to the dataset. That is, a predict dataset doesn't include tags. The default value is true. separator (str): The CSV columns separator to use. By default, ``"\\t"``. tag_seperator_reformat_fn (Callable, optional): A function to parse a tags string and return a list of - address tags. For example, if the tag column is a former python list saved with pandas, the characters ``]`` + address tags. For example, if the tag column is a former Python list saved with pandas, the characters ``]`` , ``]`` and ``'`` will be included as the tags' element. Thus, a parsing function will take a string as is - parameter and output a python list. The default function process it as a former python list. + parameter and output a python list. The default function processes it as a former Python list. That is, it removes the ``[],`` characters and splits the sequence at each comma (``","``). csv_reader_kwargs (dict, optional): Keyword arguments to pass to pandas ``read_csv`` use internally. By default, the ``data_path`` is passed along with our default ``sep`` value ( ``"\\t"``) and the ``"utf-8"`` encoding diff --git a/deepparse/dataset_container/tools.py b/deepparse/dataset_container/tools.py index 2971b58f..133522aa 100644 --- a/deepparse/dataset_container/tools.py +++ b/deepparse/dataset_container/tools.py @@ -14,23 +14,23 @@ def former_python_list(tags: str) -> List: A list of the parsed tag set. """ # We remove the [ and ] of the list. - # Then, we split each element using a comma as separator. - # Finally, since some case the element are separated by a comma (e.g. element1,element2) + # Then, we split each element using a comma as a separator. + # Finally, in some cases, the element are separated by a comma (e.g. element1,element2) # or a comma and a whitespace (e.g. element1, element2), we strip the whitespace on all tags to - # remove the trailing whitespace when element are separated by a coma and a whitespace. + # remove the trailing whitespace when a coma and a whitespace separate elements. # To fix https://github.com/GRAAL-Research/deepparse/issues/124. return [tag.strip() for tag in tags.replace("[", "").replace("]", "").replace("'", "").split(",")] def validate_column_names(column_names: List[str]) -> bool: """ - Function validate if element of a list of column name are valid. + Function to validate if the element of a list of column names is valid. Args: column_names (List[str]): A list of column names. Return: - Either or not, the colum name are valid. + Either or not, the column names are valid. """ improper_column_names = False if validate_if_any_empty(column_names) or validate_if_any_whitespace_only(column_names): diff --git a/deepparse/parser/address_parser.py b/deepparse/parser/address_parser.py index 2dc48c3d..ceba52e0 100644 --- a/deepparse/parser/address_parser.py +++ b/deepparse/parser/address_parser.py @@ -539,9 +539,9 @@ def retrain( disable_tensorboard (bool): To disable Poutyne automatic Tensorboard monitoring. By default, we disable them (true). prediction_tags (Union[dict, None]): A dictionary where the keys are the address components - (e.g. street name) and the values are the components indices (from 0 to N + 1) to use during retraining - of a model. The ``+ 1`` corresponds to the End Of Sequence (EOS) token that needs to be included in the - dictionary. We will use this dictionary's length for the prediction layer's output size. + (e.g. street name) and the values are the components indices (from 0 to N + 1) to use during the + retraining of a model. The ``+ 1`` corresponds to the End Of Sequence (EOS) token that needs to be + included in the dictionary. We will use this dictionary's length for the prediction layer's output size. We also save the dictionary to be used later on when you load the model. The default value is ``None``, meaning we use our pretrained model prediction tags. seq2seq_params (Union[dict, None]): A dictionary of seq2seq parameters to modify the seq2seq architecture @@ -582,7 +582,7 @@ def retrain( - if layers_to_freeze is not ``None``, the following tag: ``FreezedLayer{portion}``. verbose (Union[None, bool]): To override the AddressParser verbosity for the test. When set to True or False, it will override (but it does not change the AddressParser verbosity) the test verbosity. - If set to the default value None, the AddressParser verbosity is used as the test verbosity. + If set to the default value ``None``, the AddressParser verbosity is used as the test verbosity. Return: @@ -745,7 +745,7 @@ def retrain( self.processor.tags_converter = self.tags_converter if not self.model.same_output_dim(self.tags_converter.dim): - # Since we have change the output layer dim, we need to handle the prediction layer dim + # Since we have changed the output layer dim, we need to handle the prediction layer dim new_dim = self.tags_converter.dim if seq2seq_params is None: self.model.handle_new_output_dim(new_dim) @@ -759,7 +759,7 @@ def retrain( seq2seq_params.update({"pre_trained_weights": False}) model_factory_dict.update({"seq2seq_kwargs": seq2seq_params}) - # We set verbose to false since model is reloaded + # We set verbose to false since the model is reloaded self._setup_model(verbose=False, path_to_retrained_model=None, **model_factory_dict) callbacks = [] if callbacks is None else callbacks @@ -791,7 +791,7 @@ def retrain( with_capturing_context = False if not valid_poutyne_version(min_major=1, min_minor=8): print( - "You are using a older version of Poutyne that does not support properly error management." + "You are using an older version of Poutyne that does not support proper error management." " Due to that, we cannot show retrain progress. To fix that, update Poutyne to " "the newest version." ) @@ -811,7 +811,7 @@ def retrain( list_of_file_path = os.listdir(path=".") if list_of_file_path: if pretrained_parser_in_directory(logging_path): - # Mean we might already have checkpoint in the training directory + # Mean we might already have a checkpoint in the training directory files_in_directory = get_files_in_directory(logging_path) retrained_address_parser_in_directory = get_address_parser_in_directory(files_in_directory)[ 0 @@ -853,7 +853,7 @@ def retrain( # Means we have changed the seq2seq params torch_save.update({"seq2seq_params": seq2seq_params}) if prediction_tags is not None: - # Means we have changed the predictions tags + # Means we have changed the prediction tags torch_save.update({"prediction_tags": prediction_tags}) torch_save.update( @@ -885,7 +885,7 @@ def retrain( except FileNotFoundError as error: if "s3" in file_path or "//" in file_path or ":" in file_path: raise FileNotFoundError( - "Are You trying to use a AWS S3 URI? If so path need to start with s3://." + "Are You trying to use an AWS S3 URI? If so path needs to start with s3://." ) from error return train_res @@ -906,7 +906,7 @@ def test( Args: test_dataset_container (~deepparse.dataset_container.DatasetContainer): The test dataset container of the data to use. - batch_size (int): The size of the batch (by default, ``32``). + batch_size (int): The batch size (by default, ``32``). num_workers (int): Number of workers to use for the data loader (by default, ``1`` worker). callbacks (Union[list, None]): List of callbacks to use during training. See Poutyne `callback `_ for more information. @@ -1008,7 +1008,7 @@ def save_model_weights(self, file_path: Union[str, Path]) -> None: Method to save, in a Pickle format, the address parser model weights (PyTorch state dictionary). file_path (Union[str, Path]): A complete file path with a pickle extension to save the model weights. - It can either be a string (e.g. 'path/to/save.p') or a path like path (e.g. Path('path/to/save.p'). + It can either be a string (e.g. 'path/to/save.p') or a path-like path (e.g. Path('path/to/save.p'). Examples: @@ -1196,7 +1196,7 @@ def _retrain( verbose: Union[None, bool], ) -> List[Dict]: # pylint: disable=too-many-arguments - # If Poutyne 1.7 and before, we capture poutyne print since it print some exception. + # If Poutyne 1.7 and before, we capture poutyne print since it prints some exception. # Otherwise, we use a null context manager. with Capturing() if capturing_context else contextlib.nullcontext(): train_res = experiment.train( diff --git a/tests/parser/test_address_parser_retrain_api.py b/tests/parser/test_address_parser_retrain_api.py index 5253626e..f3b12b22 100644 --- a/tests/parser/test_address_parser_retrain_api.py +++ b/tests/parser/test_address_parser_retrain_api.py @@ -198,7 +198,7 @@ def test_givenAModel_whenRetrainWithPoutyneBefore18_thenPrintMessage( actual = self.test_out.getvalue() expected = ( - "You are using a older version of Poutyne that does not support properly error management." + "You are using an older version of Poutyne that does not support proper error management." " Due to that, we cannot show retrain progress. To fix that, update Poutyne to the newest version.\n" ) From 329f37257ab7f38b32ece93d5624459027144d58 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Thu, 28 Dec 2023 10:23:36 -0400 Subject: [PATCH 09/10] bump black version --- pyproject.toml | 2 +- styling_requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 44d1fe5f..f923993e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ target-version = ['py38', 'py39', 'py310', 'py311'] line-length = 120 skip-string-normalization = true -required-version = "23.9.1" +required-version = "23.12.1" extend-exclude = "/(slides)/" [tool.pylint.ini_options] diff --git a/styling_requirements.txt b/styling_requirements.txt index 8a4776f6..e15f2037 100644 --- a/styling_requirements.txt +++ b/styling_requirements.txt @@ -1,4 +1,4 @@ -black==23.9.1 +black==23.12.1 pylint==2.16.2 pylint-django[with_django]==2.5.3 pre-commit==3.3.3 \ No newline at end of file From 325242877bb514c05e15052f1dd6cb951558666e Mon Sep 17 00:00:00 2001 From: davebulaval Date: Thu, 28 Dec 2023 11:49:53 -0400 Subject: [PATCH 10/10] improve documentation --- CHANGELOG.md | 16 ++-- deepparse/app/request_examples.http | 2 +- deepparse/cli/download_model.py | 5 +- deepparse/cli/download_models.py | 4 +- deepparse/cli/parse.py | 12 +-- deepparse/cli/parser_arguments_adder.py | 4 +- deepparse/cli/retrain.py | 12 +-- deepparse/cli/test.py | 2 +- deepparse/comparer/addresses_comparer.py | 26 +++---- .../comparer/formatted_compared_addresses.py | 31 ++++---- .../formatted_compared_addresses_raw.py | 4 +- .../formatted_compared_addresses_tags.py | 2 +- deepparse/converter/data_padder.py | 24 +++--- deepparse/converter/data_processor.py | 6 +- deepparse/converter/target_converter.py | 5 +- .../dataset_container/dataset_container.py | 73 ++++++++++--------- deepparse/dataset_container/tools.py | 4 +- deepparse/download_tools.py | 46 ++++++------ .../bpemb_embeddings_model.py | 4 +- .../embeddings_model_factory.py | 4 +- .../fasttext_embeddings_model.py | 4 +- .../magnitude_embeddings_model.py | 8 +- deepparse/errors/data_error.py | 2 +- deepparse/errors/model_error.py | 2 +- deepparse/errors/server_error.py | 2 +- deepparse/metrics/accuracy.py | 4 +- deepparse/metrics/nll_loss.py | 10 +-- deepparse/network/bpemb_seq2seq.py | 32 ++++---- deepparse/network/decoder.py | 4 +- deepparse/network/embedding_network.py | 19 ++--- deepparse/network/encoder.py | 6 +- deepparse/network/fasttext_seq2seq.py | 24 +++--- deepparse/network/model_factory.py | 18 +++-- deepparse/network/seq2seq.py | 50 +++++++------ deepparse/parser/address_parser.py | 31 ++++---- deepparse/parser/formatted_parsed_address.py | 10 +-- deepparse/validations.py | 14 ++-- deepparse/vectorizer/bpemb_vectorizer.py | 2 +- deepparse/vectorizer/fasttext_vectorizer.py | 4 +- deepparse/vectorizer/magnitude_vectorizer.py | 4 +- deepparse/weights_tools.py | 4 +- docs/source/api.rst | 12 +-- docs/source/cli.rst | 26 +++---- .../retrain_with_new_seq2seq_params.rst | 2 +- docs/source/parser.rst | 10 +-- docs/source/training_guide.rst | 16 ++-- examples/retrain_with_new_seq2seq_params.py | 2 +- models_evaluation/timer/timer.py | 2 +- tests/cli/test_retrain.py | 4 +- .../parser/test_address_parser_retrain_api.py | 4 +- tests/test_download_tools.py | 4 +- 51 files changed, 323 insertions(+), 299 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cd0153c..fc87338d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ - Added "contributing to" - Added fix for comma problem (#56) -- Added content in Address Parser doc for tags definition +- Added content in Address Parser documentation for tags definition - Fixed Pylint bug with PyTorch 1.6 - Fixed `pack_padded` cpu error with PyTorch new release @@ -75,7 +75,7 @@ ## 0.3.6 -- Added a method for a dict conversion of parsed addresses for simpler `Pandas` integration. +- Added a method for dictionary conversion of parsed addresses for simpler `Pandas` integration. - Added examples for parsing addresses and how to convert them into a DataFrame. - Fixed error with download module. @@ -83,7 +83,7 @@ - Added verbose flag to training and test based on the __init__ of address parser. - Added a feature to retrain our models with prediction tags dictionary different from the default one. -- Added in-doc code examples. +- Added in-documentation code examples. - Added code examples. - Small improvement of models implementation. @@ -134,7 +134,7 @@ ## 0.6.2 - Improved (slightly) code speed of data padding method as per PyTorch list or array to Tensor recommendation. -- Improved doc for RuntimeError due to retraining FastText and BPEmb model in the same directory. +- Improved documentation for RuntimeError due to retraining FastText and BPEmb model in the same directory. - Added error handling RuntimeError when retraining. ## 0.6.3 @@ -162,13 +162,13 @@ ## 0.6.6 - Fixed errors in code examples -- Improved doc of download_from_url +- Improved documentation of download_from_url - Improve error management of retrain and test ## 0.6.7 - Fixed errors in data validation -- Improved doc over data validation +- Improved documentation over data validation - Bugfix data slicing error with data containers - Add an example on how to use a retrained model @@ -176,7 +176,7 @@ - Improved CLI - Fixed bug in CLI export dataset -- Improved the doc of the CLI +- Improved the documentation of the CLI ## 0.7.1 @@ -208,7 +208,7 @@ user-given name - Hot-fix missing raise for DataError validation of address to parse when address is tuple - Bug-fix handling of string column name for CSVDatasetContainer that raised ValueError -- Improve parse CLI doc and fix error in doc stating JSON format is supported as input data +- Improve parse CLI documentation and fix error in documentation stating JSON format is supported as input data - Add batch_size to parse CLI - Add minimum version to Gensim 4.0.0. - Add a new CLI function, retrain, to retrain from the command line diff --git a/deepparse/app/request_examples.http b/deepparse/app/request_examples.http index 1bd8a2f8..403dc391 100644 --- a/deepparse/app/request_examples.http +++ b/deepparse/app/request_examples.http @@ -16,5 +16,5 @@ Content-Type: application/json [ {"raw": "16 rue Grande-Place, Victoriaville, QC, G6S 1E6"}, - {"raw": "123 rue Valancourt, Val-Alain, quebec, g9v1s3"} + {"raw": "123 rue valancourt, val-alain, quebec, g9v 1s3"} ] \ No newline at end of file diff --git a/deepparse/cli/download_model.py b/deepparse/cli/download_model.py index d748bc89..6ccaa5e4 100644 --- a/deepparse/cli/download_model.py +++ b/deepparse/cli/download_model.py @@ -1,13 +1,12 @@ import argparse import sys - from deepparse.download_tools import download_model, MODEL_MAPPING_CHOICES def main(args=None) -> None: """ - CLI function to manually download all the dependencies for a pretrained model. + CLI function to download all the dependencies for a pretrained model manually. Example of usage: @@ -41,7 +40,7 @@ def get_parser() -> argparse.ArgumentParser: "--saving_cache_dir", type=str, default=None, - help="To change the default saving cache directory (default to None e.g. default path).", + help="To change the default saving cache directory (default to None, e.g. default path).", ) return parser diff --git a/deepparse/cli/download_models.py b/deepparse/cli/download_models.py index 6ab6f359..658c8816 100644 --- a/deepparse/cli/download_models.py +++ b/deepparse/cli/download_models.py @@ -6,7 +6,7 @@ def main(args=None) -> None: """ - CLI function to manually download all the dependencies for all pretrained models. + CLI function to download all the dependencies for all pretrained models manually. Example of usage: @@ -34,7 +34,7 @@ def get_parser() -> argparse.ArgumentParser: "--saving_cache_dir", type=str, default=None, - help="To change the default saving cache directory (default to None e.g. default path).", + help="To change the default saving cache directory (default to None, e.g. default path).", ) return parser diff --git a/deepparse/cli/parse.py b/deepparse/cli/parse.py index 37e8c13d..a96746ec 100644 --- a/deepparse/cli/parse.py +++ b/deepparse/cli/parse.py @@ -32,7 +32,7 @@ def main(args=None) -> None: # pylint: disable=too-many-locals, too-many-branches """ - CLI function to rapidly parse an addresses dataset and output it in another file. + CLI function to easily parse an address dataset and output it in another file. Examples of usage: @@ -40,7 +40,7 @@ def main(args=None) -> None: parse fasttext ./dataset_path.csv parsed_address.pickle - Using a gpu device + Using a GPU device .. code-block:: sh @@ -119,7 +119,7 @@ def main(args=None) -> None: def get_parser() -> argparse.ArgumentParser: - """Return ArgumentParser for the cli.""" + """Return ArgumentParser for the CLI.""" parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( @@ -137,11 +137,11 @@ def get_parser() -> argparse.ArgumentParser: parser.add_argument( "export_filename", help=wrap( - "The filename to use to export the parsed addresses. We will infer the file format base on the " + "The filename to use to export the parsed addresses. We will infer the file format based on the " "file extension. That is, if the file is a pickle (.p or .pickle), we will export it into a pickle file. " - "The supported format are Pickle, CSV and JSON. " + "The supported formats are Pickle, CSV and JSON. " "The file will be exported in the same repositories as the dataset_path. " - "See the doc for more details on the format exporting." + "See the documentation for more details on the format exporting." ), type=str, ) diff --git a/deepparse/cli/parser_arguments_adder.py b/deepparse/cli/parser_arguments_adder.py index 72eeaf2b..52c50755 100644 --- a/deepparse/cli/parser_arguments_adder.py +++ b/deepparse/cli/parser_arguments_adder.py @@ -25,7 +25,7 @@ def add_csv_column_name_arg(parser: ArgumentParser) -> None: parser.add_argument( "--csv_column_name", help=wrap( - "The column name to extract address in the CSV. Need to be specified if the provided dataset_path " + "The column name to extract the address in the CSV. It needs to be specified if the provided dataset_path " "leads to a CSV file." ), type=str, @@ -37,7 +37,7 @@ def add_csv_column_names_arg(parser: ArgumentParser) -> None: parser.add_argument( "--csv_column_names", help=wrap( - "The column names to extract address and tags in the CSV. Need to be specified if the provided " + "The column names to extract addresses and tags in the CSV. It needs to be specified if the provided " "dataset_path leads to a CSV file. Column names have to be separated by a whitespace. For" "example, --csv_column_names column1 column2. By default, None." ), diff --git a/deepparse/cli/retrain.py b/deepparse/cli/retrain.py index 5d070a6b..7ba8c7eb 100644 --- a/deepparse/cli/retrain.py +++ b/deepparse/cli/retrain.py @@ -64,7 +64,7 @@ def handle_prediction_tags(parsed_args): def main(args=None) -> None: # pylint: disable=too-many-locals, too-many-branches """ - CLI function to rapidly retrain an addresses parser and saves it. One can retrain a base pretrained model + CLI function to easily retrain an address parser and save it. One can retrain a base pretrained model using most of the arguments as the :meth:`~AddressParser.retrain` method. By default, all the parameters have the same default value as the :meth:`~AddressParser.retrain` method. The supported parameters are the following: @@ -86,7 +86,7 @@ def main(args=None) -> None: retrain fasttext ./train_dataset_path.csv - Using a gpu device + Using a GPU device .. code-block:: sh @@ -142,7 +142,7 @@ def main(args=None) -> None: def get_parser() -> argparse.ArgumentParser: - """Return ArgumentParser for the cli.""" + """Return ArgumentParser for the CLI.""" parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) @@ -198,8 +198,8 @@ def get_parser() -> argparse.ArgumentParser: "--logging_path", help=wrap( "The logging path for the checkpoints and the retrained model. " - "Note that training creates checkpoints, and we use Poutyne library that use the best epoch " - "model and reloads the state if any checkpoints are already there. " + "Note that training creates checkpoints, and we use the Poutyne library that uses the best epoch " + "model and reload the state if any checkpoints are already there. " "Thus, an error will be raised if you change the model type. For example, " "you retrain a FastText model and then retrain a BPEmb in the same logging path directory." "By default, the path is './checkpoints'." @@ -241,7 +241,7 @@ def get_parser() -> argparse.ArgumentParser: help=wrap( "Path to a JSON file of prediction tags to use to retrain. Tags are in a key-value style, where " "the key is the tag name, and the value is the index one." - "The last element has to be an EOS tag. Read the doc for more detail about EOS tag." + "The last element has to be an EOS tag. Read the documentation for more details about the EOS tag." ), default=None, type=str, diff --git a/deepparse/cli/test.py b/deepparse/cli/test.py index 648e3dc7..853505c0 100644 --- a/deepparse/cli/test.py +++ b/deepparse/cli/test.py @@ -108,7 +108,7 @@ def main(args=None) -> None: def get_parser() -> argparse.ArgumentParser: - """Return ArgumentParser for the cli.""" + """Return ArgumentParser for the CLI.""" parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) diff --git a/deepparse/comparer/addresses_comparer.py b/deepparse/comparer/addresses_comparer.py index c7dabf06..8ed24400 100644 --- a/deepparse/comparer/addresses_comparer.py +++ b/deepparse/comparer/addresses_comparer.py @@ -10,8 +10,9 @@ @dataclass(frozen=True) class AddressesComparer: """ - Address comparer to compare addresses with each other and retrieves the differences between them. The addresses - are parsed using an address parser based on one of the seq2seq pretrained networks, either with fastText or BPEmb. + Address comparer is used to compare addresses with each other and retrieve the differences between them. The + addresses are parsed using an address parser based on one of the seq2seq pretrained networks, either with + FastText or BPEmb. The address comparer can compare already parsed addresses. The address parser first recomposes the raw addresses then suggest its own tags; then it makes a comparison with the tags of the source parsing and the @@ -43,12 +44,12 @@ def compare_tags( raw address from the parsing, AddressParser generates tags and compares the two parsings. Args: - addresses_tags_to_compare (Union[List[tuple], List[List[tuple]]]): list of tuple that contains + addresses_tags_to_compare (Union[List[tuple], List[List[tuple]]]): list of tuples that contain the tags for the address components from the source. Can compare multiple parsings if passed as a list of tuples. - with_prob (Union[None, bool]): An option flag to either or not include prob in the comparison report. - The probabilities are not compared but only included in the report. - The default value is None, which means not taking into account. + with_prob (Union[None, bool]): An option flag to either or not include probabilities in the comparison + report. The probabilities are not compared but only included in the report. The default value is + ``None``, which means not taking into account. Return: Either a :class:`~FormattedComparedAddressesTags` or a list of :class:`~FormattedComparedAddressTags` @@ -123,15 +124,14 @@ def compare_raw( ) -> List[FormattedComparedAddressesRaw]: """ Compare a list of raw addresses together. It starts by parsing the addresses - with the setted parser and then return the differences between the addresses components - retrieved with our model. + with the parser and then return the differences between the parsed address components of the two addresses. Args: raw_addresses_to_compare (Union[Tuple[str], List[Tuple[str]]]): List of strings that represent raw addresses to compare. - with_prob (Union[None, bool]): An option flag to either or not include prob in the comparison report. - The probabilities are not compared but only included in the report. - The default value is None, which means not taking into account. + with_prob (Union[None, bool]): An option flag to either or not include probabilities in the comparison + report. The probabilities are not compared but only included in the report. The default value is + ``None``, which means not taking into account. Return: Either a :class:`~FormattedComparedAddressesRaw` or a list of @@ -184,8 +184,8 @@ def compare_raw( @staticmethod def _format_comparisons_dict(comparison_tuples: List, origin_tuple: Tuple[str, str], with_prob: bool) -> List[Dict]: """ - Return formatted dict that contains two FormattedParsedAddress and the origin name tuple and output it in a - dict format. + Return formatted dictionary that contains two FormattedParsedAddress and the origin name tuple and output it + in a dictionary format. """ list_of_formatted_comparisons_dict = [] diff --git a/deepparse/comparer/formatted_compared_addresses.py b/deepparse/comparer/formatted_compared_addresses.py index f90f699e..b96f15b8 100644 --- a/deepparse/comparer/formatted_compared_addresses.py +++ b/deepparse/comparer/formatted_compared_addresses.py @@ -110,16 +110,16 @@ def _comparison_report_builder(self) -> str: @abstractmethod def _get_probs(self) -> Dict: """ - To get the tags from the parsing with their associated probabilities, the method needs to be implemented in each - class because they don't use the probabilities the same way. + A method to get the tags from the parsing with their associated probabilities, it needs to be implemented in + each class because they don't use the probabilities the same way. """ @staticmethod def _get_color_diff(string_one: str, string_two: str, highlight: bool = False) -> str: """ - Compare two strings and determine the difference between the two. The differences are noted with colour code; - if the first string has more elements than the second one, it will be noted in one colour; on the contrary, - if the other string has something more, it will have a different colour notation. + Compare two strings and determine the difference between the two. The differences are highlighted with a + coloured scheme; if the first string has more elements than the second one, it will be noted in one colour; + on the contrary, if the other string has something more, it will have a different colour notation. Args: string_one (str): The first string to compare. @@ -129,7 +129,7 @@ def _get_color_diff(string_one: str, string_two: str, highlight: bool = False) - two strings are spaces. The default is False. Notes: - the method is colorblind-friendly, which means that the output will be + The method is colorblind-friendly, which means that the output will be in colours that minimize the risk that a user cannot see the difference as defined here https://davidmathlogic.com/colorblind/#%23D81B60-%231E88E5-%23FFC107-%23004D40. @@ -137,7 +137,7 @@ def _get_color_diff(string_one: str, string_two: str, highlight: bool = False) - If the first string has something more than the second one, it will be indicated in blue. If the second string has something more than the first one, it will be noted in yellow. - It uses SequenceMatcher to get the different codes to be later converted into colour codes. + It uses SequenceMatcher to convert the different codes into colour codes later. Return: str: The two strings joined, and the differences are noted in colour codes @@ -176,13 +176,16 @@ def _get_tags_diff_color( verbose: bool = True, ) -> str: """ - Print the output of the string with colour codes that represent the differences between the two strings. + Print the output of the string with colour codes representing the differences between the two strings. Args: - name_one (str, optional) : Name associated with first color. The default value is the first address. - name_two (str, optional) : Name associated with the second colour. The default value is the second address. - verbose (bool, optional): If True, it will print a presentation of the colours and what they mean. - The default value is True. + name_one (str, optional) : Name associated with first color. The default value is ``"first address"``, + namely the first address of the two. We recommend using a whitespace characters between the words. + name_two (str, optional) : Name associated with the second colour. The default value is + ``"second address"``, namely the second address of the two. We recommend using a whitespace + characters between the words. + verbose (bool, optional): If True, it will print a presentation of the colours and their meaning. + The default value is ``True``. """ @@ -220,7 +223,7 @@ def _get_tags_diff_color( def _bool_address_tags_are_the_same(self, parsed_addresses: Union[List[List[tuple]], List[tuple]]) -> List[tuple]: """ - Compare addresses components and put the differences in a dictionary where the keys are the + Compare the components between two addresses and put the differences in a dictionary where the keys are the names of the addresses components, and the values are the values of the addresses component. Args: @@ -228,7 +231,7 @@ def _bool_address_tags_are_the_same(self, parsed_addresses: Union[List[List[tupl address components' names for the parsed addresses. Return: - List[tuple]: List of tuples that contain all addresses components that differ from each other. + List[tuple]: List of tuples containing the components that differ from the two addresses. """ unique_address_component_names = self._unique_addresses_component_names(parsed_addresses) diff --git a/deepparse/comparer/formatted_compared_addresses_raw.py b/deepparse/comparer/formatted_compared_addresses_raw.py index de94c05d..860f268b 100644 --- a/deepparse/comparer/formatted_compared_addresses_raw.py +++ b/deepparse/comparer/formatted_compared_addresses_raw.py @@ -12,8 +12,8 @@ class FormattedComparedAddressesRaw(FormattedComparedAddresses): def _get_probs(self) -> Dict: """ - To get the tags from the parsing with their associated probabilities, the method needs to be implemented in each - class because they don't use the probabilities the same way. + Method to get the tags from the parsing with their associated probabilities, a method needs to be + implemented in each class because they don't use the probabilities the same way. """ return { self.first_address.raw_address: self.first_address.address_parsed_components, diff --git a/deepparse/comparer/formatted_compared_addresses_tags.py b/deepparse/comparer/formatted_compared_addresses_tags.py index 775335d8..c071194d 100644 --- a/deepparse/comparer/formatted_compared_addresses_tags.py +++ b/deepparse/comparer/formatted_compared_addresses_tags.py @@ -12,7 +12,7 @@ class FormattedComparedAddressesTags(FormattedComparedAddresses): def _get_probs(self) -> Dict: """ - To get the tags from the parsing with their associated probabilities, the method needs to be implemented in each + To get the tags from the parsing with their associated probabilities, A method needs to be implemented in each class because they don't use the probabilities the same way. """ return { diff --git a/deepparse/converter/data_padder.py b/deepparse/converter/data_padder.py index e21c32cb..ef434381 100644 --- a/deepparse/converter/data_padder.py +++ b/deepparse/converter/data_padder.py @@ -22,13 +22,15 @@ def pad_word_embeddings_batch( Tuple[Tuple[torch.Tensor, List, torch.Tensor], torch.Tensor], ]: """ - Method to pad a batch of word embeddings sequences and their targets to the length of the longest one. + A method to apply padding to a batch of word embeddings sequences and their targets to the length of the + longest one. + Args: batch (list[Tuple[list, list]]): a list of tuples where the first element is a list of word embeddings (the sequence) and the second is a list of targets. teacher_forcing (bool): if True, the padded target vectors are returned twice, once with the sequences and their lengths, and once on their own. This enables - the use of teacher forcing during the training of sequence to sequence models. + the use of teacher forcing during the training of sequence-to-sequence models. Return: A tuple of two elements: - a tuple containing either a @@ -52,7 +54,7 @@ def pad_word_embeddings_batch( def pad_word_embeddings_sequences(self, sequences_batch: List) -> Tuple[torch.Tensor, List]: """ - Method to pad a batch of word embeddings sequences. + A method to apply batch padding to sequences of word embeddings. Args: sequences_batch (list): a tuple containing lists of word embeddings (the sequences) Return: @@ -81,15 +83,17 @@ def pad_subword_embeddings_batch( Tuple[Tuple[torch.Tensor, List, List, torch.Tensor], torch.Tensor], ]: """ - Method to pad a batch of subword embeddings sequences and their targets to the length of the longest one. + A method to apply padding to a batch of subword embeddings sequences and their targets to the length of the + longest one. + Args: batch (list[Tuple[Tuple[list, list], list]]): a list of tuples containing the two following elements: - - a tuple where the first element is a list of words represented as subword embeddings and the + - a tuple where the first element is a list of words represented as subword embeddings, and the second element is a list of the number of subword embeddings that each word is decomposed into. - a list of targets. teacher_forcing (bool): if True, the padded target vectors are returned twice, once with the sequences and their lengths, and once on their own. This enables - the use of teacher forcing during the training of sequence to sequence models. + the use of teacher forcing during the training of sequence-to-sequence models. Return: A tuple of two elements: - A tuple (``x``, ``y`` , ``z``). The element ``x`` is a :class:`~torch.Tensor` of @@ -122,9 +126,9 @@ def pad_subword_embeddings_sequences( self, sequences_batch: List[Tuple[List, List]] ) -> Tuple[torch.Tensor, List, List]: """ - Method to pad a batch of subword embeddings sequences. + A method to apply padding to a batch of subword embeddings sequences. Args: - sequences_batch (list[Tuple[list, list]]): a list of tuple containing tuples of two elements: + sequences_batch (list[Tuple[list, list]]): a list of tuples containing tuples of two elements: - a list of lists representing words as lists of subword embeddings. - a list of the number of subword embeddings that each word is decomposed into. Return: @@ -158,7 +162,7 @@ def pad_subword_embeddings_sequences( def pad_targets(self, target_batch: List) -> torch.Tensor: """ - Method to pad a batch of target indices to the longest one. + A method to apply padding to a batch of target indices to the longest one. Args: target_batch (list): a tuple containing lists of target indices. Return: @@ -170,7 +174,7 @@ def pad_targets(self, target_batch: List) -> torch.Tensor: def _extract_word_embeddings_sequences_and_target(self, batch: List[Tuple[List, List]]) -> Tuple[List, List]: """ - Method that takes a list of word embedding sequences and targets and zips the + A method that takes a list of word embedding sequences and targets and zips the sequences together and the targets together. """ sorted_batch = sorted(batch, key=lambda x: len(x[0]), reverse=True) diff --git a/deepparse/converter/data_processor.py b/deepparse/converter/data_processor.py index dd45e25a..8b708cdc 100644 --- a/deepparse/converter/data_processor.py +++ b/deepparse/converter/data_processor.py @@ -37,7 +37,7 @@ def process_for_inference( self, addresses: List[str] ) -> Union[Tuple[torch.Tensor, torch.Tensor], Tuple[torch.Tensor, List, torch.Tensor]]: """ - Method to vectorize addresses for inference. + A method to vectorize the addresses for inference. Args: addresses (List[str]): a list of addresses Return: @@ -60,13 +60,13 @@ def process_for_training( ], ]: """ - Method to vectorize addresses and tags for training. + A method to vectorize the addresses and the tags for training. Args: addresses_and_targets (List[Tuple[str, List[str]]]): a list of tuples where the first element is an address and the second is a list of tags. teacher_forcing (bool): if True, the padded target vectors are returned twice, once with the sequences and their lengths, and once on their own. This enables - the use of teacher forcing during the training of sequence to sequence models. + the use of teacher forcing during the training of sequence-to-sequence models. Return: A padded batch. Check out :meth:`~deepparse.converter.DataPadder.pad_word_embeddings_batch` and :meth:`~DataPadder.pad_subword_embeddings_batch` for more details. diff --git a/deepparse/converter/target_converter.py b/deepparse/converter/target_converter.py index c55e4017..34825c5c 100644 --- a/deepparse/converter/target_converter.py +++ b/deepparse/converter/target_converter.py @@ -3,7 +3,7 @@ class TagsConverter: """ - Class to define logic of tag to idx conversion and vice versa. + Class to define the logic of tag to idx conversion and vice versa. Args: tags_to_idx (Dict): A dictionary where the keys are the tags (e.g. StreetNumber) and the values are @@ -16,7 +16,8 @@ def __init__(self, tags_to_idx: Dict) -> None: def __call__(self, key: Union[str, int]) -> int: """ - If str convert from a tag to idx and if int convert from a idx to a tag using the convert table. + If it is a ``str, ``, it will convert from a "tag" to an IDX, and if ``int``, it will convert from an IDX to + a "tag" using the convert table. """ if isinstance(key, str): return self.tags_to_idx[key] diff --git a/deepparse/dataset_container/dataset_container.py b/deepparse/dataset_container/dataset_container.py index 9ffd7588..0448ac5b 100644 --- a/deepparse/dataset_container/dataset_container.py +++ b/deepparse/dataset_container/dataset_container.py @@ -21,28 +21,28 @@ class DatasetContainer(Dataset, ABC): For a training container, it validates the following: - - all addresses are not None value, - - all addresses are not empty, - - all addresses are not whitespace string, - - all tags are not empty, if data is a list of tuple (``[('an address', ['a_tag', 'another_tag']), ...]``), and + - no address is a ``None`` value, + - no address is empty, + - no address is composed of only whitespace, + - no tags list is empty, if data is a list of tuple (``[('an address', ['a_tag', 'another_tag']), ...]``), and - if the addresses (whitespace-split) are the same length as their respective tags list. While for a predict container (unknown prediction tag), it validates the following: - - all addresses are not None, - - all addresses are not empty, and - - all addresses are not whitespace string. + - no address is a ``None`` value, + - no address is empty, and + - no address is composed of only whitespace. Args: is_training_container (bool): Either or not, the dataset container is a training container. This will determine the dataset validation test we apply to the dataset. That is, a predict dataset doesn't include tags. - The default value is true. + The default value is ``True``. """ @abstractmethod def __init__(self, is_training_container: bool = True) -> None: """ - Need to be defined by the child class. + The method to init the class. It needs to be defined by the child's class. """ self.data = None self.is_training_container = is_training_container @@ -59,7 +59,7 @@ def __getitem__( - it can be a list of string items (e.g. a list of addresses (str)), or - it can be a unique string item (e.g. one address). - If the DatasetContainer is a training one: + If the DatasetContainer is a "training" one: - it can be a list of tuple (str, list) items, namely a list of parsed examples (e.g. an address with the tags), or @@ -114,12 +114,14 @@ def _training_validation(self) -> None: if not self._data_tags_is_same_len_then_address(): print( - f"Some addresses (whitespace-split) and the associated tags are not the same len. " - f"If you are using a CSVDatasetContainer, consider using the tag_seperator_reformat_fn argument." + f"Some addresses (whitespace-split) and the associated tags are not the same length. " + f"If you use a CSVDatasetContainer, consider using the tag_seperator_reformat_fn argument." f"Here is the report of those cases where len differ to help you out:\n" f"{self._data_tags_not_the_same_len_diff()}" ) - raise DataError("Some addresses (whitespace-split) and the tags associated with them are not the same len.") + raise DataError( + "Some addresses (whitespace-split) and the tags associated with them are not the same length." + ) def _data_is_list_of_tuple(self) -> bool: """ @@ -157,28 +159,28 @@ class PickleDatasetContainer(DatasetContainer): The dataset needs to be a list of tuples where the first element of each tuple is the address (a string), and the second is a list of the expected tag to predict (e.g. ``[('an address', ['a_tag', 'another_tag']), ...]``). - The len of the tags needs to be the same as the len of the address when whitespace split. + The length of the tags needs to be the same as the length of the address when the whitespace-split is used. For a training container, the validation tests applied on the dataset are the following: - - all addresses are not None value, - - all addresses are not empty, - - all addresses are not whitespace string, - - all tags are not empty, if data is a list of tuple (``[('an address', ['a_tag', 'another_tag']), ...]``), and + - no address is a ``None`` value, + - no address is empty, + - no address is composed of only whitespace, + - no tags list is empty, if data is a list of tuple (``[('an address', ['a_tag', 'another_tag']), ...]``), and - if the addresses (whitespace-split) are the same length as their respective tags list. While for a predict container (unknown prediction tag), the validation tests applied on the dataset are the following: - - all addresses are not None value, - - all addresses are not empty, and - - all addresses are not whitespace string. + - no address is a ``None`` value, + - no address is empty, and + - no address is composed of only whitespace. Args: data_path (str): The path to the pickle dataset file. is_training_container (bool): Either or not, the dataset container is a training container. This will determine the dataset validation test we apply to the dataset. That is, a predict dataset doesn't include tags. - The default value is true. + The default value is ``True``. """ @@ -202,25 +204,26 @@ def _test_predict_container_is_list_of_tuple(self) -> bool: class CSVDatasetContainer(DatasetContainer): """ - CSV dataset container that imports a CSV of addresses. If the dataset is a predict one, it needs to have at least - one column with some addresses. If the dataset is a training one (with prediction tags), it needs to have at + CSV dataset container that imports a CSV of addresses. If the dataset is a predict one, it must have at least + one column with some addresses. If the dataset is a training one (with prediction tags), it must have at least two columns, one with some addresses and another with a list of tags for each address. After loading the CSV dataset, some tests will be applied depending on its type. For a training container, the validation tests applied on the dataset are the following: - - all addresses are not None value, - - all addresses are not empty, - - all addresses are not whitespace string, and + - no address is a ``None`` value, + - no address is empty, + - no address is composed of only whitespace, + - no tags list is empty, if data is a list of tuple (``[('an address', ['a_tag', 'another_tag']), ...]``), and - if the addresses (whitespace-split) are the same length as their respective tags list. While for a predict container (unknown prediction tag), the validation tests applied on the dataset are the following: - - all addresses are not None value, - - all addresses are not empty, and - - all addresses are not whitespace string. + - no address is a ``None`` value, + - no address is empty, and + - no address is composed of only whitespace. Args: @@ -231,7 +234,7 @@ class CSVDatasetContainer(DatasetContainer): of exactly two elements: addresses and tags. is_training_container (bool): Either or not, the dataset container is a training container. This will determine the dataset validation test we apply to the dataset. That is, a predict dataset doesn't include tags. - The default value is true. + The default value is ``True``. separator (str): The CSV columns separator to use. By default, ``"\\t"``. tag_seperator_reformat_fn (Callable, optional): A function to parse a tags string and return a list of address tags. For example, if the tag column is a former Python list saved with pandas, the characters ``]`` @@ -240,7 +243,7 @@ class CSVDatasetContainer(DatasetContainer): That is, it removes the ``[],`` characters and splits the sequence at each comma (``","``). csv_reader_kwargs (dict, optional): Keyword arguments to pass to pandas ``read_csv`` use internally. By default, the ``data_path`` is passed along with our default ``sep`` value ( ``"\\t"``) and the ``"utf-8"`` encoding - format. However, this can be overridden by using this argument again. + format. However, this can be overridded by using this argument again. """ def __init__( @@ -256,13 +259,13 @@ def __init__( if is_training_container: if isinstance(column_names, str): raise ValueError( - "When the dataset is a training container, the column names should be a list of column name." + "When the dataset is a training container, the column names should be a list of column names." ) if len(column_names) != 2: raise ValueError("When the dataset is a training container, two column names must be provided.") else: # It means it is a predict container if isinstance(column_names, str): - # We transform the str into a list to assess is len + # We transform the str into a list to assess its length column_names = [column_names] if len(column_names) != 1: raise ValueError("When the dataset is a predict container, one column name must be provided.") @@ -302,7 +305,7 @@ class ListDatasetContainer(DatasetContainer): identical as the :class:`~deepparse.dataset_container.PickleDatasetContainer`. is_training_container (bool): Either or not, the dataset container is a training container. This will determine the dataset validation test we apply to the dataset. That is, a predict dataset doesn't include tags. - The default value is true. + The default value is ``True``. """ def __init__(self, data: List, is_training_container: bool = True) -> None: diff --git a/deepparse/dataset_container/tools.py b/deepparse/dataset_container/tools.py index 133522aa..5ef211c6 100644 --- a/deepparse/dataset_container/tools.py +++ b/deepparse/dataset_container/tools.py @@ -13,9 +13,9 @@ def former_python_list(tags: str) -> List: Return: A list of the parsed tag set. """ - # We remove the [ and ] of the list. + # We removed the ``"["`` and ``"]"`` from the list. # Then, we split each element using a comma as a separator. - # Finally, in some cases, the element are separated by a comma (e.g. element1,element2) + # Finally, in some cases, the elements are separated by a comma (e.g. element1,element2) # or a comma and a whitespace (e.g. element1, element2), we strip the whitespace on all tags to # remove the trailing whitespace when a coma and a whitespace separate elements. # To fix https://github.com/GRAAL-Research/deepparse/issues/124. diff --git a/deepparse/download_tools.py b/deepparse/download_tools.py index dd6759d1..035c3a47 100644 --- a/deepparse/download_tools.py +++ b/deepparse/download_tools.py @@ -35,9 +35,9 @@ def download_fasttext_magnitude_embeddings(cache_dir: str, verbose: bool = True, offline: bool = False) -> str: """ - Function to download the magnitude pretrained fastText model. + Function to download the magnitude pretrained FastText model. - Return the full path to the fastText embeddings. + Return the full path to the FastText embeddings. """ os.makedirs(cache_dir, exist_ok=True) @@ -48,7 +48,7 @@ def download_fasttext_magnitude_embeddings(cache_dir: str, verbose: bool = True, if not os.path.isfile(file_name) and not offline: if verbose: print( - "The fastText pretrained word embeddings will be download in magnitude format (2.3 GO), " + "The FastText pretrained word embeddings will be download in magnitude format (2.3 GO), " "this process will take several minutes." ) extension = extension + ".gz" @@ -67,7 +67,7 @@ def download_weights(model_filename: str, saving_dir: str, verbose: bool = True) Args: model_filename: The network type (i.e. ``fasttext`` or ``bpemb``). saving_dir: The path to the saving directory. - verbose (bool): Either or not to be verbose during the download of a model. The default value is True. + verbose (bool): Either or not to be verbose during the download of a model. The default value is ``True``. """ if verbose: print(f"Downloading the pre-trained weights for the network {model_filename}.") @@ -83,7 +83,7 @@ def download_weights(model_filename: str, saving_dir: str, verbose: bool = True) def download_from_public_repository(file_name: str, saving_dir: str, file_extension: str) -> None: """ - Simple function to download the content of a file from Deepparse public repository. + Simple function to download the content of a file from the Deepparse public repository. The repository URL string is `'https://graal.ift.ulaval.ca/public/deepparse/{}.{}'`` where the first bracket is the file name and the second is the file extension. """ @@ -97,7 +97,7 @@ def download_from_public_repository(file_name: str, saving_dir: str, file_extens def download_models(saving_cache_path: Union[Path, None] = None) -> None: """ - Function to download all the pretrained models. It will download all the models checkpoint and version file. + Function to download all the pretrained models. It will download all the model's checkpoints and version files. Args: saving_cache_path: The path to the saving cache directory for the specified model. @@ -129,7 +129,7 @@ def download_model( elif "bpemb" in model_type: BPEmb( lang="multi", vs=100000, dim=300, cache_dir=saving_cache_path - ) # The class manage the download of the pretrained words embedding + ) # The class manages the download of the pretrained words embedding model_type_filename = MODEL_MAPPING_CHOICES[model_type] model_path = os.path.join(saving_cache_path, f"{model_type_filename}.ckpt") @@ -165,15 +165,15 @@ def latest_version(model: str, cache_path: str, verbose: bool) -> bool: except HTTPError as exception: # HTTP connection error handling if HTTP_CLIENT_ERROR_STATUS_CODE <= exception.response.status_code < NEXT_RANGE_STATUS_CODE: - # Case where Deepparse server is down. + # Case where the Deepparse server is down. if verbose: warnings.warn( - f"We where not able to verify the cached model in the cache directory {cache_path}. It seems like" - f"Deepparse server is not available at the moment. We recommend to attempt to verify " + f"We could not verify the cached model in the cache directory {cache_path}. It seems like" + f"Deepparse server is not available at the moment. We recommend attempting to verify " f"the model version another time using our download CLI function.", category=RuntimeWarning, ) - # The is_lastest_version is set to True even if we were not able to validate the version. We do so not to + # The is_lastest_version is set to True even if we cannot validate the version. We do so not to # block the rest of the process. is_latest_version = True else: @@ -182,15 +182,15 @@ def latest_version(model: str, cache_path: str, verbose: bool) -> bool: raise except MaxRetryError: # Case where the user does not have an Internet connection. For example, one can run it in a - # Docker container not connected to the Internet. + # The Docker container is not connected to the Internet. if verbose: warnings.warn( - f"We where not able to verify the cached model in the cache directory {cache_path}. It seems like" - f"you are not connected to the Internet. We recommend to verify if you have the latest using our " + f"We could not verify the cached model in the cache directory {cache_path}. It seems like" + f"you are not connected to the Internet. We recommend verifying if you have the latest using our " f"download CLI function.", category=RuntimeWarning, ) - # The is_lastest_version is set to True even if we were not able to validate the version. We do so not to + # The is_lastest_version is set to True even if we cannot validate the version. We do so not to # block the rest of the process. is_latest_version = True finally: @@ -203,7 +203,7 @@ def latest_version(model: str, cache_path: str, verbose: bool) -> bool: # pylint: disable=pointless-string-statement FASTTEXT_COPYRIGHT_MIT_LICENSE = """ -The code below was copied from the fastText project, and has been modified for the purpose of this package. +The code below was copied from the FastText project, and has been modified for the purpose of this package. COPYRIGHT @@ -237,11 +237,11 @@ def latest_version(model: str, cache_path: str, verbose: bool) -> bool: def download_fasttext_embeddings(cache_dir: str, verbose: bool = True, offline: bool = False) -> str: """ - Simpler version of the download_model function from fastText to download pretrained common-crawl - vectors from fastText's website https://fasttext.cc/docs/en/crawl-vectors.html and save it in the + Simpler version of the download_model function from FastText to download pretrained common-crawl + vectors from FastText's website https://fasttext.cc/docs/en/crawl-vectors.html and save it in the saving directory (saving_dir). - Return the full path to the fastText embeddings. + Return the full path to the FastText embeddings. """ os.makedirs(cache_dir, exist_ok=True) @@ -258,21 +258,21 @@ def download_fasttext_embeddings(cache_dir: str, verbose: bool = True, offline: shutil.copyfileobj(f, f_out) os.remove(os.path.join(cache_dir, gz_file_name)) - return file_name_path # return the full path to the fastText embeddings + return file_name_path # return the full path to the FastText embeddings # Now use a saving path and don't return a bool def download_gz_model(gz_file_name: str, saving_path: str, verbose: bool = True) -> None: """ - Simpler version of the _download_gz_model function from fastText to download pretrained common-crawl - vectors from fastText's website https://fasttext.cc/docs/en/crawl-vectors.html and save it in the + Simpler version of the _download_gz_model function from FastText to download pretrained common-crawl + vectors from FastText's website https://fasttext.cc/docs/en/crawl-vectors.html and save it in the saving directory (saving_path). """ url = f"https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/{gz_file_name}" if verbose: print( - "The fastText pretrained word embeddings will be downloaded (6.8 GO), " + "The FastText pretrained word embeddings will be downloaded (6.8 GO), " "this process will take several minutes." ) _download_file(url, saving_path, verbose=verbose) diff --git a/deepparse/embeddings_models/bpemb_embeddings_model.py b/deepparse/embeddings_models/bpemb_embeddings_model.py index 414bb78d..16abdb21 100644 --- a/deepparse/embeddings_models/bpemb_embeddings_model.py +++ b/deepparse/embeddings_models/bpemb_embeddings_model.py @@ -19,7 +19,7 @@ class BPEmbEmbeddingsModel(EmbeddingsModel): Params: cache_dir (str): Path to the cache directory to the embeddings' bin vector and the model. - verbose (bool): Wether or not to make the loading of the embeddings verbose. + verbose (bool): Whether or not to make the loading of the embeddings verbose. """ def __init__(self, cache_dir: str, verbose: bool = True) -> None: @@ -53,7 +53,7 @@ def no_ssl_verification(): Reference: https://gist.github.com/ChenTanyi/0c47652bd916b61dc196968bca7dad1d. - Will be removed when https://github.com/bheinzerling/bpemb/issues/63 is resolved. + It will be removed when https://github.com/bheinzerling/bpemb/issues/63 is resolved. """ opened_adapters = set() old_merge_environment_settings = requests.Session.merge_environment_settings diff --git a/deepparse/embeddings_models/embeddings_model_factory.py b/deepparse/embeddings_models/embeddings_model_factory.py index 6360f752..c5aae0e2 100644 --- a/deepparse/embeddings_models/embeddings_model_factory.py +++ b/deepparse/embeddings_models/embeddings_model_factory.py @@ -11,12 +11,12 @@ def create(self, embedding_model_type: str, cache_dir: str, verbose: bool = True """ Embeddings model creation method. Args: - embeddings_model_type (str): the type of the embeddings model to create. Valid options: + embedding_model_type (str): the type of the embeddings model to create. Valid options: - bpemb - fasttext - fasttext_magnitude cache_dir (str): Path to the cache directory where the embeddings model exists or is to be downloaded. - verbose (bool): Wether or not to make the loading of the embeddings verbose. + verbose (bool): Whether or not to make the loading of the embeddings verbose. Return: An :class:`~EmbeddingsModel` """ diff --git a/deepparse/embeddings_models/fasttext_embeddings_model.py b/deepparse/embeddings_models/fasttext_embeddings_model.py index b4b34e08..abb1e5a4 100644 --- a/deepparse/embeddings_models/fasttext_embeddings_model.py +++ b/deepparse/embeddings_models/fasttext_embeddings_model.py @@ -18,7 +18,7 @@ class FastTextEmbeddingsModel(EmbeddingsModel): Note: Since Windows uses ``spawn`` instead of ``fork`` during multiprocess (for the data loading pre-processing - ``num_worker`` > 0) we use the Gensim model, which takes more RAM (~10 GO) than the Fasttext one (~8 GO). + ``num_worker`` > 0), we use the Gensim model, which takes more RAM (~10 GO) than the Fasttext one (~8 GO). It also takes a longer time to load. See here the `issue `_. """ @@ -39,7 +39,7 @@ def __call__(self, word: str) -> ndarray: word (str): Word to get vector. Return: - The fastText embedding for a word. + The FastText embedding for a word. """ return self.model[word] diff --git a/deepparse/embeddings_models/magnitude_embeddings_model.py b/deepparse/embeddings_models/magnitude_embeddings_model.py index f2d85720..0cd40ac6 100644 --- a/deepparse/embeddings_models/magnitude_embeddings_model.py +++ b/deepparse/embeddings_models/magnitude_embeddings_model.py @@ -7,7 +7,7 @@ class MagnitudeEmbeddingsModel(EmbeddingsModel): """ FastText embeddings network from `Enriching Word Vectors with Subword Information `_ using the magnitude mapping - (`_), which reduce memory footprint. + (`_), which reduces the memory footprint. Args: embeddings_path (str): Path to the bin embeddings vector (.bin). @@ -20,13 +20,13 @@ def __init__(self, embeddings_path: str, verbose: bool = True) -> None: def __call__(self, words: str) -> ndarray: """ - Callable method to get word vector of a complete address. + Callable method to get the word vector of a complete address. Args: words (str): Address to get vector for words. Return: - The fastText embedding for a list of words. + The FastText embedding for a list of words. """ - # we leverage the multiple word query which are faster than single word query + # We leverage the multiple-word query which is faster than a single word query return self.model.query(words.split()) diff --git a/deepparse/errors/data_error.py b/deepparse/errors/data_error.py index 20f41a9c..e829c06b 100644 --- a/deepparse/errors/data_error.py +++ b/deepparse/errors/data_error.py @@ -1,6 +1,6 @@ class DataError(Exception): """ - User error when data is not construct as expected. + User error occurs when the data structure is not as expected. """ def __init__(self, value: str) -> None: diff --git a/deepparse/errors/model_error.py b/deepparse/errors/model_error.py index 8ee4196a..889b26c1 100644 --- a/deepparse/errors/model_error.py +++ b/deepparse/errors/model_error.py @@ -1,6 +1,6 @@ class FastTextModelError(Exception): """ - User error when user uses a FastText-like model on an OS that does not support properly multithreading. + User error occurs when a user uses a FastText-like model on an OS that does not correctly support multithreading. """ def __init__(self, value: str) -> None: diff --git a/deepparse/errors/server_error.py b/deepparse/errors/server_error.py index 9e98dc02..903e5c13 100644 --- a/deepparse/errors/server_error.py +++ b/deepparse/errors/server_error.py @@ -1,6 +1,6 @@ class ServerError(Exception): """ - User error when Deepparse server is not responding. + User error occurs when the Deepparse server is not responding. """ def __init__(self, value: str) -> None: diff --git a/deepparse/metrics/accuracy.py b/deepparse/metrics/accuracy.py index ed8dbeeb..f1ca46af 100644 --- a/deepparse/metrics/accuracy.py +++ b/deepparse/metrics/accuracy.py @@ -2,8 +2,8 @@ from poutyne.framework.metrics import acc -def accuracy(pred: torch.Tensor, ground_truth: torch.Tensor) -> float: +def accuracy(predictions: torch.Tensor, ground_truths: torch.Tensor) -> float: """ Accuracy per tag. """ - return acc(pred.transpose(0, 1).transpose(-1, 1), ground_truth) + return acc(predictions.transpose(0, 1).transpose(-1, 1), ground_truths) diff --git a/deepparse/metrics/nll_loss.py b/deepparse/metrics/nll_loss.py index 4ea1042f..92cf5530 100644 --- a/deepparse/metrics/nll_loss.py +++ b/deepparse/metrics/nll_loss.py @@ -4,13 +4,13 @@ criterion = NLLLoss() -def nll_loss(pred: torch.Tensor, ground_truth: torch.Tensor) -> float: +def nll_loss(predictions: torch.Tensor, ground_truths: torch.Tensor) -> float: """ - NLL loss compute per tag. + NLL loss to compute loss per tag. """ loss = 0 - ground_truth = ground_truth.transpose(0, 1) - for i in range(pred.size(0)): - loss += criterion(pred[i], ground_truth[i]) + ground_truths = ground_truths.transpose(0, 1) + for i in range(predictions.size(0)): + loss += criterion(predictions[i], ground_truths[i]) return loss diff --git a/deepparse/network/bpemb_seq2seq.py b/deepparse/network/bpemb_seq2seq.py index 0195eb31..d08db831 100644 --- a/deepparse/network/bpemb_seq2seq.py +++ b/deepparse/network/bpemb_seq2seq.py @@ -10,22 +10,25 @@ class BPEmbSeq2SeqModel(Seq2SeqModel): """ - BPEmb Seq2Seq network, the best of the two model we propose, but takes more ``GPU``/``CPU`` resources. + BPEmb Seq2Seq network is the best of the two proposed models but takes more ``GPU``/``CPU`` resources. Args: cache_dir (str): The path to the cached directory to use for downloading (and loading) the model weights. - device (~torch.device): The device tu use for the prediction. - input_size (int): The input size of the encoder (i.e. the embeddings size). It will also be used to initialize - the internal embeddings network input size, hidden size and output dim. The default value is 300. - encoder_hidden_size (int): The size of the hidden layer(s) of the encoder. The default value is 1024. - encoder_num_layers (int): The number of hidden layers of the encoder. The default value is 1. - decoder_hidden_size (int): The size of the hidden layer(s) of the decoder. The default value is 1024. - decoder_num_layers (int): The number of hidden layers of the decoder. The default value is 1. - output_size (int): The size of the prediction layers (i.e. the number of tag to predict). - attention_mechanism (bool): Either or not to use attention mechanism. The default value is False. - verbose (bool): Turn on/off the verbosity of the model. The default value is True. - path_to_retrained_model (Union[str, None]): The path to the retrained model to use for the seq2seq. + device (~torch.device): The device to use for the prediction. + input_size (int): The input size of the encoder (i.e. the size of the embedding). It will also be used to + initialize the internal embeddings network input size, hidden size and output dim. The default value is + ``300``. + encoder_hidden_size (int): The size of the hidden layer(s) of the encoder. The default value is ``1024``. + encoder_num_layers (int): The number of hidden layers of the encoder. The default value is ``1``. + decoder_hidden_size (int): The size of the hidden layer(s) of the decoder. The default value is ``1024``. + decoder_num_layers (int): The number of hidden layers of the decoder. The default value is ``1``. + output_size (int): The size of the prediction layers (i.e. the number of tags to predict). The default value is + ``9``. + attention_mechanism (bool): Either or not to use the attention mechanism. The default value is ``False``. + verbose (bool): Turn on/off the verbosity of the model. The default value is ``True`. + path_to_retrained_model (Union[str, None]): The path to the retrained model to use for the seq2seq.`The default + value is ``None``. """ def __init__( @@ -92,9 +95,10 @@ def forward( to_predict (~torch.Tensor): The elements to predict the tags. decomposition_lengths (list) : The lengths of the decomposed words of the batch elements (since packed). lengths (list) : The lengths of the batch elements (since packed). - target (~torch.LongTensor) : The target of the batch element, use only when we retrain the model since we do + target (~torch.LongTensor) : The target of the batch element, used only when we retrain the model since + we do `teacher forcing `_. - Default value is None since we mostly don't have the target except for retrain. + The default value is ``None`` since we mostly don't have the target except for retraining. Return: A Tensor of the predicted sequence. """ diff --git a/deepparse/network/decoder.py b/deepparse/network/decoder.py index f0c347ab..c434344b 100644 --- a/deepparse/network/decoder.py +++ b/deepparse/network/decoder.py @@ -11,7 +11,7 @@ class Decoder(nn.Module): """ - Decoder module that use a LSTM to decode a previously encoded sequence and a linear layer to map + Decoder module that uses a LSTM to decode a previously encoded sequence and a linear layer to map the decoded sequence tags. Args: @@ -33,7 +33,7 @@ def __init__( super().__init__() self.attention_mechanism = attention_mechanism if attention_mechanism: - # Since layer also have attention mechanism + # Since layer also has attention mechanism self.hidden_size = hidden_size input_size = input_size + hidden_size self._attention_mechanism_set_up() diff --git a/deepparse/network/embedding_network.py b/deepparse/network/embedding_network.py index 31721569..297909c1 100644 --- a/deepparse/network/embedding_network.py +++ b/deepparse/network/embedding_network.py @@ -1,7 +1,7 @@ # Bug with PyTorch source code makes torch.tensor as not callable for pylint. # pylint: disable=not-callable -# temporary fix for _forward_unimplemented for PyTorch 1.6 https://github.com/pytorch/pytorch/issues/42305 +# Temporary fix for _forward_unimplemented for PyTorch 1.6 https://github.com/pytorch/pytorch/issues/42305 # pylint: disable=W0223 from typing import Tuple, List @@ -13,14 +13,15 @@ class EmbeddingNetwork(nn.Module): """ - Embedding Network to represent the address components byte-pair embedding representation using a LSTM. + Embedding Network to represent the address components byte-pair embedding representation using an LSTM. Args: input_size (int): The input size of the LSTM. hidden_size (int): The hidden size of the LSTM. - num_layers (int): The number of layer of the LSTM. Default is one (1) layer. - maxpool (bool): Either or not to add a maximum pooling layer after the embedding composition. Default is false. - maxpool_kernel_size (int): The kernel size of the maximum pooling layer. Default is three (3). + num_layers (int): The number of layers of the LSTM. The default value is ``1``, namely one layer. + maxpool (bool): Either or not to add a maximum pooling layer after the embedding composition. The default + value is ``False``. + maxpool_kernel_size (int): The kernel size of the maximum pooling layer. The default value is ``3``. """ def __init__( @@ -73,7 +74,7 @@ def forward(self, to_predict: torch.Tensor, decomposition_lengths: Tuple[List]) for i in range(to_predict.size(0)): lengths = [] - # reorder decomposition, could use a transpose but take a LOT (like a LOT) of memory + # Reorder decomposition, could use a transpose but take a LOT (like a LOT) of memory for decomposition_length in decomposition_lengths: lengths.append(decomposition_length[i]) @@ -86,15 +87,15 @@ def forward(self, to_predict: torch.Tensor, decomposition_lengths: Tuple[List]) packed_output, _ = self.model(packed_sequence) - # pad packed the output to be applied later on in the projection layer + # Pad packed the output to be applied later on in the projection layer. padded_output, padded_output_lengths = pad_packed_sequence(packed_output, batch_first=True) - # filling the embedding by idx + # Filling the embedding by IDX. word_context = torch.zeros(padded_output.size(0), padded_output.size(2), device=device) for j in range(batch_size): word_context[j] = padded_output[j, padded_output_lengths[j] - 1, :] - # projection layer from dim 600 to 300 + # Projection layer from dim 600 to 300. projection_output = self.projection_layer(word_context) if self.maxpooling_layer is not None: diff --git a/deepparse/network/encoder.py b/deepparse/network/encoder.py index 5fafb917..e275d875 100644 --- a/deepparse/network/encoder.py +++ b/deepparse/network/encoder.py @@ -1,4 +1,4 @@ -# temporary fix for _forward_unimplemented for torch 1.6 https://github.com/pytorch/pytorch/issues/42305 +# Temporary fix for _forward_unimplemented for torch 1.6 https://github.com/pytorch/pytorch/issues/42305 # pylint: disable=W0223 from typing import Tuple, List @@ -12,12 +12,12 @@ class Encoder(nn.Module): """ - Encoder module that use a LSTM to encode a sequence. + Encoder module that uses an LSTM to encode a sequence. Args: input_size (int): The input size of the encoder. hidden_size (int): The hidden size of the encoder. - num_layers (int): The number of layer to the encoder. + num_layers (int): The number of layers to the encoder. """ def __init__(self, input_size: int, hidden_size: int, num_layers: int) -> None: diff --git a/deepparse/network/fasttext_seq2seq.py b/deepparse/network/fasttext_seq2seq.py index dd08059a..34ae72e9 100644 --- a/deepparse/network/fasttext_seq2seq.py +++ b/deepparse/network/fasttext_seq2seq.py @@ -9,22 +9,24 @@ class FastTextSeq2SeqModel(Seq2SeqModel): """ - FastText Seq2Seq network, the lightest of the two model we propose (in ``GPU``/``CPU`` consumption) for a little + FastText Seq2Seq network, the lightest of the two models we propose (in ``GPU``/``CPU`` consumption) for a little less accuracy. Args: cache_dir (str): The path to the cached directory to use for downloading (and loading) the model weights. device (~torch.device): The device tu use for the prediction. - input_size (int): The input size of the encoder (i.e. the embeddings size). The default value is 300. - encoder_hidden_size (int): The size of the hidden layer(s) of the encoder. The default value is 1024. - encoder_num_layers (int): The number of hidden layers of the encoder. The default value is 1. - decoder_hidden_size (int): The size of the hidden layer(s) of the decoder. The default value is 1024. - decoder_num_layers (int): The number of hidden layers of the decoder. The default value is 1. - output_size (int): The size of the prediction layers (i.e. the number of tag to predict). - attention_mechanism (bool): Either or not to use attention mechanism. The default value is False. - verbose (bool): Turn on/off the verbosity of the model. The default value is True. - path_to_retrained_model (Union[str, None]): The path to the retrained model to use for the seq2seq. + input_size (int): The input size of the encoder (i.e. the size of the embedding). The default value is ``300``. + encoder_hidden_size (int): The size of the encoder's hidden layer(s). The default value is ``1024``. + encoder_num_layers (int): The number of hidden layers of the encoder. The default value is ``1``. + decoder_hidden_size (int): The size of the decoder's hidden layer(s). The default value is ``1024``. + decoder_num_layers (int): The number of hidden layers of the decoder. The default value is ``1``. + output_size (int): The size of the prediction layers (i.e. the number of tags to predict). The default value + is ``9``. + attention_mechanism (bool): Either or not to use the attention mechanism. The default value is ``False``. + verbose (bool): Turn on/off the verbosity of the model. The default value is ``True``. + path_to_retrained_model (Union[str, None]): The path to the retrained model to use for the seq2seq. The default + value is ``None``. """ def __init__( @@ -88,7 +90,7 @@ def forward( lengths (list) : The lengths of the batch elements (since packed). target (~torch.LongTensor) : The target of the batch element, use only when we retrain the model since we do `teacher forcing `_. - Default value is None since we mostly don't have the target except for retrain. + The default value is ``None`` since we mostly don't have the target except for retrain. Return: A Tensor of the predicted sequence. diff --git a/deepparse/network/model_factory.py b/deepparse/network/model_factory.py index 4893a9c7..a107ab07 100644 --- a/deepparse/network/model_factory.py +++ b/deepparse/network/model_factory.py @@ -8,7 +8,7 @@ class ModelFactory: """ - A factory for the creation of neural network models that predict the tags from addresses + A factory for creating neural network models that predict the tags from addresses. """ def create( @@ -32,12 +32,14 @@ def create( - bpemb cache_dir (str): The path to the cached directory to use for downloading (and loading) the model weights. - device (~torch.device): The device tu use for the prediction. - output_size (int): The size of the prediction layers (i.e. the number of tag to predict). - attention_mechanism (bool): Either or not to use attention mechanism. The default value is False. - path_to_retrained_model (Union[str, None]): The path to the retrained model to use for the seq2seq. - offline (bool): Wether or not the model is an offline or an online. - verbose (bool): Turn on/off the verbosity of the model. The default value is True. + device (~torch.device): The device to use for the prediction. + output_size (int): The size of the prediction layers (i.e. the number of tags to predict). The default + value is ``9``. + attention_mechanism (bool): Either or not to use the attention mechanism. The default value is ``False``. + path_to_retrained_model (Union[str, None]): The path to the retrained model to use for the seq2seq. The + default value is ``None``. + offline (bool): Whether or not the model is an offline or an online. The default value is ``False``. + verbose (bool): Turn on/off the verbosity of the model. The default value is ``True``. Return: A :class:`~Seq2SeqModel`. @@ -69,7 +71,7 @@ def create( else: raise NotImplementedError( f""" - There is no {model_type} network implemented. model_type should be either fasttext or bpemb + There is no {model_type} network implemented. model_type should be either "fasttext" or "bpemb". """ ) diff --git a/deepparse/network/seq2seq.py b/deepparse/network/seq2seq.py index faf5b808..d222b462 100644 --- a/deepparse/network/seq2seq.py +++ b/deepparse/network/seq2seq.py @@ -21,14 +21,15 @@ class Seq2SeqModel(ABC, nn.Module): Args: device (~torch.device): The device tu use for the prediction. - input_size (int): The input size of the encoder (i.e. the embeddings size). The default value is 300. - encoder_hidden_size (int): The size of the hidden layer(s) of the encoder. The default value is 1024. - encoder_num_layers (int): The number of hidden layers of the encoder. The default value is 1. - decoder_hidden_size (int): The size of the hidden layer(s) of the decoder. The default value is 1024. - decoder_num_layers (int): The number of hidden layers of the decoder. The default value is 1. - output_size (int): The size of the prediction layers (i.e. the number of tag to predict). - attention_mechanism (bool): Either or not to use attention mechanism. The default value is False. - verbose (bool): Turn on/off the verbosity of the model. The default value is True. + input_size (int): The input size of the encoder (i.e. the size of the embedding). The default value is ``300``. + encoder_hidden_size (int): The size of the encoder's hidden layer(s). The default value is ``1024``. + encoder_num_layers (int): The number of hidden layers of the encoder. The default value is ``1``. + decoder_hidden_size (int): The size of the decoder's hidden layer(s). The default value is ``1024``. + decoder_num_layers (int): The number of hidden layers of the decoder. The default value is ``1``. + output_size (int): The size of the prediction layers (i.e. the number of tags to predict). The default value is + ``9``. + attention_mechanism (bool): Either or not to use the attention mechanism. The default value is ``False``. + verbose (bool): Turn on/off the verbosity of the model. The default value is ``True``. """ def __init__( @@ -80,14 +81,14 @@ def same_output_dim(self, size: int) -> bool: def handle_new_output_dim(self, new_dim: int) -> None: """ - Update the new output dimension + Update the new output dimension. """ self.decoder.linear_layer_set_up(output_size=new_dim) self.output_size = new_dim def _load_pre_trained_weights(self, model_type: str, cache_dir: str, offline: bool) -> None: """ - Method to download and resolved the loading (into the network) of the pretrained weights. + Method to download and resolve the loading (into the network) of the pre-trained weights. Args: model_type (str): The network pretrained weights to load. @@ -126,7 +127,7 @@ def _load_weights(self, path_to_model_torch_archive: str) -> None: path_to_model_to_upload=path_to_model_torch_archive, device=self.device ) - # All the time, our torch archive include meta-data along with the model weights + # All the time, our torch archive includes meta-data along with the model weights. all_layers_params = all_layers_params.get("address_tagger_model") self.load_state_dict(all_layers_params) @@ -154,11 +155,11 @@ def _encoder_step(self, to_predict: torch.Tensor, lengths: List, batch_size: int Args: to_predict (~torch.Tensor): The elements to predict the tags. lengths (list): The lengths of the batch elements (since packed). - batch_size (int): The number of element in the batch. + batch_size (int): The number of elements in the batch. Return: A tuple (``x``, ``y``, ``z``) where ``x`` is the decoder input (a zeros tensor), ``y`` is the decoder - hidden states and ``z`` is the encoder outputs for the attention weighs if needed. + hidden states, and ``z`` is the encoder output for the attention weighs if needed. """ encoder_outputs, decoder_hidden = self.encoder(to_predict, lengths) @@ -181,41 +182,42 @@ def _decoder_step( Args: decoder_input (~torch.Tensor): The decoder input (so the encode output). - decoder_hidden (~torch.Tensor): The encoder hidden state (so the encode hidden state). + decoder_hidden (~torch.Tensor): The encoder's hidden state (so the encode hidden state). encoder_outputs (~torch.Tensor): The encoder outputs for the attention mechanism weighs if needed. - target (~torch.LongTensor) : The target of the batch element, use only when we retrain the model since we do + target (~torch.LongTensor) : The target of the batch element, used only when we retrain the model since + we do `teacher forcing `_. lengths (list): The lengths of the batch elements (since packed). - batch_size (int): Number of element in the batch. + batch_size (int): Number of elements in the batch. Return: A Tensor of the predicted sequence. """ longest_sequence_length = max(lengths) - # The empty prediction sequence - # +1 for the EOS + # The empty prediction sequence. + # +1 for the EOS. prediction_sequence = torch.zeros(longest_sequence_length + 1, batch_size, self.output_size, device=self.device) - # We decode the first token + # We decode the first token. decoder_output, decoder_hidden, attention_weights = self.decoder( decoder_input, decoder_hidden, encoder_outputs, lengths ) if attention_weights is not None: - # We fill the attention + # We fill the attention. attention_output = torch.ones(longest_sequence_length + 1, batch_size, 1, longest_sequence_length) attention_output[0] = attention_weights - # We fill the first token prediction + # We fill the first token prediction. prediction_sequence[0] = decoder_output - # The decoder next step input (the predicted idx of the previous token) + # The decoder's next step input (the predicted idx of the previous token). _, decoder_input = decoder_output.topk(1) - # we loop the same steps for the rest of the sequence + # We loop the same steps for the rest of the sequence. if target is not None and random.random() < 0.5: - # force the real target value instead of the predicted one to help learning + # Force the real target value instead of the predicted one to help learning. target = target.transpose(0, 1) for idx in range(longest_sequence_length): decoder_input = target[idx].view(1, batch_size, 1) diff --git a/deepparse/parser/address_parser.py b/deepparse/parser/address_parser.py index ceba52e0..febb139c 100644 --- a/deepparse/parser/address_parser.py +++ b/deepparse/parser/address_parser.py @@ -95,8 +95,8 @@ class AddressParser: ``None``. To further improve performance, consider using the models (fasttext or BPEmb) with their counterparts using an attention mechanism with the ``attention_mechanism`` flag. attention_mechanism (bool): Whether to use the model with an attention mechanism. The model will use an - attention mechanism that takes an extra 100 MB on GPU usage (see the doc for more statistics). - The default value is False. + attention mechanism that takes an extra 100 MB on GPU usage (see the documentation for more statistics). + The default value is ``False``. device (Union[int, str, torch.torch.device]): The device to use can be either: - a ``GPU`` index in int format (e.g. ``0``), @@ -104,28 +104,31 @@ class AddressParser: - a :class:`~torch.torch.device` object, - ``"cpu"`` for a ``CPU`` use. - The default value is GPU with the index ``0`` if it exists. Otherwise, the value is ``CPU``. - rounding (int): The rounding to use when asking the probability of the tags. The default value is four digits. - verbose (bool): Turn on/off the verbosity of the model weights download and loading. The default value is True. + The default value is ``0``, witch is a GPU device with the index ``0`` if it exists. Otherwise, + the value is ``CPU``. + rounding (int): The rounding to use when asking the probability of the tags. The default value is ``4``, + namely four digits. + verbose (bool): Turn on/off the verbosity of the model weights download and loading. The default value is + ``True``. path_to_retrained_model (Union[S3Path, str, None]): The path to the retrained model to use for prediction. We will infer the ``model_type`` of the retrained model. The default value is ``None``, meaning we use our pretrained model. If the retrained model uses an attention mechanism, ``attention_mechanism`` needs to be set to True. The path_to_retrain_model can also be a S3-like (Azure, AWS, Google) bucket URI string path (e.g. ``"s3://path/to/aws/s3/bucket.ckpt"``). Or it can be a ``S3Path`` S3-like URI using `cloudpathlib` to handle S3-like bucket. See `cloudpathlib ` - for detail on supported S3 buckets provider and URI condition. The default value is None. + for detail on supported S3 buckets provider and URI condition. The default value is ``None``. cache_dir (Union[str, None]): The path to the cached directory to use for downloading (and loading) the embeddings model and the model pretrained weights. offline (bool): Whether or not the model is an offline one, meaning you have already downloaded the pre-trained weights and embeddings weights in either the default Deepparse cache directory (``"~./cache/deepparse"``) or the ``cache_dir`` directory. When offline, we will not verify if the model is the latest. You can use our - ``download_models`` CLI function to download all the requirements for a model. The default value is False - (not an offline parsing model). + ``download_models`` CLI function to download all the requirements for a model. The default value is + ``False`` (not an offline parsing model). Note: For both networks, we will download the pretrained weights and embeddings in the ``.cache`` directory - for the root user. The pretrained weights take at most 44 MB. The fastText embeddings take 6.8 GO, - the fastText-light embeddings take 3.3 GO and bpemb take 116 MB (in ``".cache/bpemb"``). + for the root user. The pretrained weights take at most 44 MB. The FastText embeddings take 6.8 GO, + the FastText-light (``"fasttext-light"``) embeddings take 3.3 GO and bpemb take 116 MB (in ``".cache/bpemb"``). Also, one can download all the dependencies of our pretrained model using our CLI (e.g. download_model fasttext) before sending it to a node without access to Internet. @@ -914,7 +917,7 @@ def test( seed (int): Seed to use (by default, ``42``). verbose (Union[None, bool]): To override the AddressParser verbosity for the test. When set to True or False, it will override (but it does not change the AddressParser verbosity) the test verbosity. - If set to the default value None, the AddressParser verbosity is used as the test verbosity. + If set to the default value ``None``, the AddressParser verbosity is used as the test verbosity. Return: A dictionary with the stats (see `Experiment class @@ -964,7 +967,7 @@ def test( if "fasttext-light" in self.model_type: raise FastTextModelError( "It's not possible to test a fasttext-light due to pymagnitude problem. " - "See the Retrain method doc for more details." + "See the Retrain method documentation for more details." ) if not isinstance(test_dataset_container, DatasetContainer): @@ -1215,7 +1218,7 @@ def _freeze_model_params(self, layers_to_freeze: Union[str]) -> None: if layers_to_freeze not in ("encoder", "decoder", "prediction_layer", "seq2seq"): raise ValueError( f"{layers_to_freeze} freezing setting is not supported. Value can be 'encoder', 'decoder', " - f"'prediction_layer' and 'seq2seq'. See doc for more details." + f"'prediction_layer' and 'seq2seq'. See documentation for more details." ) layer_exclude = None if layers_to_freeze == "decoder": @@ -1271,7 +1274,7 @@ def _retrain_argumentation_validations( if "fasttext-light" in self.model_type: raise FastTextModelError( "It's not possible to retrain a fasttext-light due to pymagnitude problem. " - "See the Retrain method doc for more details." + "See the Retrain method documentation for more details." ) if not isinstance(train_dataset_container, DatasetContainer): diff --git a/deepparse/parser/formatted_parsed_address.py b/deepparse/parser/formatted_parsed_address.py index 9a013741..28265882 100644 --- a/deepparse/parser/formatted_parsed_address.py +++ b/deepparse/parser/formatted_parsed_address.py @@ -86,7 +86,7 @@ def __repr__(self) -> str: def __eq__(self, other) -> bool: """ - Equal if all address components elements are equals. If attributes are not the same, will return False. + Equal if all address components elements are equals. If attributes are not the same, it will return False. """ for field in self.__dict__: address_component = getattr(self, field) @@ -114,7 +114,7 @@ def format_address( Args: fields (Union[list, None]): Optional argument to define the fields to order the address components of - the address. If None, we will use the inferred order base on the address tags appearance. For example, + the address. If None, we will use the inferred order based on the address tags' appearance. For example, if the parsed address is ``(305, StreetNumber), (rue, StreetName), (des, StreetName), (Lilas, StreetName)``, the inferred order will be ``StreetNumber, StreetName``. capitalize_fields (Union[list, None]): Optional argument to define the capitalized fields for the formatted @@ -138,7 +138,7 @@ def format_address( # > 350, rue des lilas, ouest, quebec city, quebec, g1l 1b6 parse_address.formatted_address(fields_separator=", ", capitalize_fields=["StreetName", "Orientation"]) - # > 350, Rue des lilas, Ouest, quebec city, quebec, g1l 1b6 + # > 350, rue des lilas, ouest, quebec city, quebec, g1l 1b6 parse_address.formatted_address(fields_separator=", ", upper_case_fields=["PostalCode""]) # > 350 rue des lilas ouest quebec city quebec G1L 1B6 @@ -214,7 +214,7 @@ def to_list_of_tuples(self, fields: Union[List, None] = None) -> List[tuple]: def to_pandas(self) -> Dict: """ Method to convert a parsed address into a dictionary for pandas where the first key is the raw address and - the followings keys are the address components, and the values are the value of those components. + the following keys are the address components, and the values are the values of those components. For example, the parsed address `` 305 rue des Lilas`` will be converted into the following dictionary: ``{'Address': '305 rue des Lilas', 'StreetNumber':'305', 'StreetName': 'rue des Lilas'}``. @@ -228,7 +228,7 @@ def to_pandas(self) -> Dict: def to_pickle(self) -> Tuple[str, List]: """ Method to convert a parsed address into a list of tuple for pickle where the first tuple element is the - raw address and the followings tuples are the address components, and the values are the value of + raw address and the following tuples are the address components, and the values are the values of those components. For example, the parsed address `` 305 rue des Lilas`` will be converted into the following list of tuples: ``'305 rue des Lilas', ('305', 'StreetNumber'), ('rue des Lilas', 'StreetName')]``. diff --git a/deepparse/validations.py b/deepparse/validations.py index e2007944..a6c6d555 100644 --- a/deepparse/validations.py +++ b/deepparse/validations.py @@ -12,7 +12,7 @@ def extract_package_version(package) -> str: """ - Handle the retrieval of the major and minor version part of a Python package. + Handle the retrieval of a Python package's major and minor version parts. """ full_version = package.version.__version__ components_parts = full_version.split(".") @@ -24,8 +24,8 @@ def extract_package_version(package) -> str: def valid_poutyne_version(min_major: int = 1, min_minor: int = 2) -> bool: """ - Validate Poutyne version is greater than min_major.min_minor for using a str checkpoint. Some version before - does not support all the features we need. By default, min_major.min_minor equal version 1.2 which is the + Validate that the Poutyne version is greater than min_major.min_minor for using a str checkpoint. Some versions + do not support all the features we need. By default, min_major.min_minor equals version 1.2, which is the lowest version we can use. """ version_components = extract_package_version(package=poutyne).split(".") @@ -45,13 +45,13 @@ def validate_data_to_parse(addresses_to_parse: List) -> None: """ Validation tests on the addresses to parse to respect the following two criteria: - addresses are not tuple, - - no addresses are None value, - - no addresses are empty strings, and - - no addresses are whitespace-only strings. + - no address is a ``None`` value, + - no address is empty, and + - no address is composed of only whitespace. """ if isinstance(addresses_to_parse[0], tuple): raise DataError( - "Addresses to parsed are tuples. They need to be a list of string. Are you using training data?" + "Addresses to parsed are tuples. They need to be a list of strings. Are you using training data?" ) if validate_if_any_none(addresses_to_parse): raise DataError("Some addresses are None value.") diff --git a/deepparse/vectorizer/bpemb_vectorizer.py b/deepparse/vectorizer/bpemb_vectorizer.py index a20d4c12..69ca91dd 100644 --- a/deepparse/vectorizer/bpemb_vectorizer.py +++ b/deepparse/vectorizer/bpemb_vectorizer.py @@ -50,7 +50,7 @@ def _vectorize_sequence(self, address: str) -> Tuple[List, List]: address (str): Address to vectorize using BPEmb. Return: - A tuple of list of word vector and the word decomposition lengths. + A tuple of the list of word vectors and the word decomposition lengths. """ input_sequence = [] diff --git a/deepparse/vectorizer/fasttext_vectorizer.py b/deepparse/vectorizer/fasttext_vectorizer.py index f6506a36..8fcc1990 100644 --- a/deepparse/vectorizer/fasttext_vectorizer.py +++ b/deepparse/vectorizer/fasttext_vectorizer.py @@ -6,7 +6,7 @@ class FastTextVectorizer(Vectorizer): """ - FastText vectorizer to convert an address into fastText embeddings. + FastText vectorizer to convert an address into FastText embeddings. """ def __call__(self, addresses: List[str]) -> List: @@ -28,7 +28,7 @@ def _vectorize_sequence(self, address: str) -> List: Method to vectorize the address. Args: - address (str): Address to vectorize using fastText. + address (str): Address to vectorize using FastText. Return: A list of word vector. diff --git a/deepparse/vectorizer/magnitude_vectorizer.py b/deepparse/vectorizer/magnitude_vectorizer.py index ded630be..0ffff493 100644 --- a/deepparse/vectorizer/magnitude_vectorizer.py +++ b/deepparse/vectorizer/magnitude_vectorizer.py @@ -8,7 +8,7 @@ class MagnitudeVectorizer(Vectorizer): """ - FastText Magnitude vectorizer to convert an address into fastText embeddings using magnitude mapping. + FastText Magnitude vectorizer to convert an address into FastText embeddings using magnitude mapping. """ def __call__(self, addresses: List[str]) -> List: @@ -30,7 +30,7 @@ def _vectorize_sequence(self, address: str) -> ndarray: Method to vectorize the address. Args: - address (str): Address to vectorize using fastText. + address (str): Address to vectorize using FastText. Return: A list of word vector. diff --git a/deepparse/weights_tools.py b/deepparse/weights_tools.py index dd5831bd..b8e8e238 100644 --- a/deepparse/weights_tools.py +++ b/deepparse/weights_tools.py @@ -8,7 +8,7 @@ def weights_init(m: nn.Module) -> None: """ - Function to initialize the weights of a model layers. + Function to initialize the weights of model layers. Usage: network = Model() @@ -50,7 +50,7 @@ def handle_weights_upload( except FileNotFoundError as error: if "s3" in path_to_model_to_upload or "//" in path_to_model_to_upload or ":" in path_to_model_to_upload: raise FileNotFoundError( - "Are You trying to use a AWS S3 URI? If so path need to start with s3://." + "Are You trying to use an AWS S3 URI? If so, the path needs to start with s3://." ) from error raise FileNotFoundError(f"The file {path_to_model_to_upload} was not found.") from error return checkpoint_weights diff --git a/docs/source/api.rst b/docs/source/api.rst index c39ff937..52b343f1 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -4,13 +4,13 @@ Parse Address With Our Out-Of-The-Box API ========================================= -We also offer an out-of-the-box RESTAPI to parse addresses using FastAPI. +We also offer an out-of-the-box REST API to parse addresses using FastAPI. Installation ************ -First, ensure that you have Docker Engine and Docker Compose installed on your machine. -If not, you can install them using the following documentations in the following order: +First, ensure you have Docker Engine and Docker Compose installed on your machine. +If not, you can install them using the following documentation in the following order: 1. `Docker Engine `_ 2. `Docker Compose `_ @@ -24,7 +24,7 @@ Once you have Docker Engine and Docker Compose installed, you can run the follow Sentry ****** -Also, you can monitor your application usage with `Sentry `_ by setting the environment variable ``SENTRY_DSN`` to your Sentry's project +Also, you can monitor your application usage with `Sentry `_ by setting the environment variable ``SENTRY_DSN`` to your Sentry project DSN. There is an example of the ``.env`` file in the project's root named ``.env_example``. You can copy it using the following command: .. code-block:: sh @@ -34,7 +34,7 @@ DSN. There is an example of the ``.env`` file in the project's root named ``.env Request Examples ---------------- -Once the application is up and running and port ``8000`` is exported on your localhost, you can send a request with one +Once the application is up and running and port ``8000`` is exported on your ``localhost``, you can send a request with one of the following methods: cURL POST request @@ -65,4 +65,4 @@ Python POST request response = requests.post(url, json=addresses) parsed_addresses = response.json() - print(parsed_addresses) \ No newline at end of file + print(parsed_addresses) diff --git a/docs/source/cli.rst b/docs/source/cli.rst index 60f52be0..09d50a09 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -13,17 +13,17 @@ The parsing of the addresses to parse ``dataset_path`` is done using the selecte The exported parsed addresses are to be exported in the same directory as the addresses to parse but given the ``export_file_name`` using the encoding format of the address dataset file. For example, if the dataset is in a CSV format, the output file format will be a CSV. Moreover, by default, -we log some information (``--log``) such as the parser model name, the parsed dataset path +we log some information (``--log``), such as the parser model name, the parsed dataset path and the number of parsed addresses. Here is the list of the arguments, their descriptions and default values. One can use the command ``parse --help`` to output the same description in your command line. - ``parsing_model``: The parsing module to use. - ``dataset_path``: The path to the dataset file in a pickle (``.p``, ``.pickle`` or ``.pckl``) or CSV format. - - ``export_file_name``: The filename to use to export the parsed addresses. We will infer the file format base on the file extension. That is, if the file is a pickle (``.p`` or ``.pickle``), we will export it into a pickle file. The supported formats are Pickle, CSV and JSON. The file will be exported in the same repositories as the dataset_path. See the doc for more details on the format exporting. + - ``export_file_name``: The filename to use to export the parsed addresses. We will infer the file format base on the file extension. That is, if the file is a pickle (``.p`` or ``.pickle``), we will export it into a pickle file. The supported formats are Pickle, CSV and JSON. The file will be exported in the same repositories as the dataset_path. See the documentation for more details on the format exporting. - ``--device``: The device to use. It can be 'cpu' or a GPU device index such as ``'0'`` or ``'1'``. By default, ``'0'``. - ``--batch_size``: The batch size to use to process the dataset. By default, ``32``. - ``--path_to_retrained_model``: A path to a retrained model to use for parsing. By default, ``None``. - - ``--csv_column_name``: The column name to extract address in the CSV. Need to be specified if the provided ``dataset_path`` leads to a CSV file. By default, ``None``. + - ``--csv_column_name``: The column name to extract address in the CSV. It needs to be specified if the provided ``dataset_path`` leads to a CSV file. By default, ``None``. - ``--csv_column_separator``: The column separator for the dataset container will only be used if the dataset is a CSV one. By default, ``'\t'``. - ``--log``: Either or not to log the parsing process into a ``.log`` file exported at the same place as the parsed data using the same name as the export file. The bool value can be (not case sensitive) ``'true/false'``, ``'t/f'``, ``'yes/no'``, ``'y/n'`` or ``'0/1'``. By default, ``True``. - ``--cache_dir``: To change the default cache directory (default to ``None``, e.g. default path). @@ -40,7 +40,7 @@ We support three types of export formats: CSV, Pickle and JSON. The first export uses the following pattern column pattern: ``"Address", "First address components class", "Second class", ...``. -Which means the address ``305 rue des Lilas 0 app 2`` will output the table bellow +Which means the address ``305 rue des Lilas 0 app 2`` will output the table below using our default tags: .. list-table:: @@ -65,17 +65,17 @@ using our default tags: - None - None -The second export uses a similar approach but using tuples and list. Using the same example will return the following +The second export uses a similar approach but uses tuples and lists. Using the same example will return the following tuple ``("305 rue des Lilas 0 app 2", [("305", "StreetNumber"), ("rue des lilas", "StreetName"), ...])``. The third export uses a similar approach to the CSV format but uses dictionary-like formatting. Using the -same example will return the following dict ``{"Address": "305 rue des Lilas 0 app 2", "StreetNumber": "305", ...}``. +same example will return the following dictionary ``{"Address": "305 rue des Lilas 0 app 2", "StreetNumber": "305", ...}``. Retrain ******* This command allows a user to retrain the ``base_parsing_model`` on the ``train_dataset_path`` dataset. -For the training, the CSV or Pickle dataset is loader in a specific dataloader (see +For the training, the CSV or Pickle dataset is loaded in a specific dataloader (see :class:`~deepparse.dataset_container.DatasetContainer` for more details). We use Poutyne's automatic logging functionalities during training. Thus, it creates an epoch checkpoint and outputs the epoch metrics in a TSV file. Moreover, we save the best epoch model under the retrain model name (either the default one or a given name using @@ -94,11 +94,11 @@ One can use the command ``parse --help`` to output the same description in your - ``--disable_tensorboard``: To disable Poutyne automatic Tensorboard monitoring. By default, we disable them (``True``). - ``--layers_to_freeze``: Name of the portion of the seq2seq to freeze layers, thus reducing the number of parameters to learn. Default to ``None``. - ``--name_of_the_retrain_parser``: Name to give to the retrained parser that will be used when reloaded as the printed name, and to the saving file name. By default, ``None``, thus, the default name. See the complete parser retrain method for more details. - - ``--device``: The device to use. It can be ``'cpu'`` or a GPU device index such as ``'0'`` or ``'1'``. By default ``'0'``. - - ``--csv_column_names``: The column names to extract address in the CSV. Need to be specified if the provided dataset_path leads to a CSV file. Column names have to be separated by whitespace. For example, ``--csv_column_names column1 column2``. + - ``--device``: The device to use. It can be ``'cpu'`` or a GPU device index such as ``'0'`` or ``'1'``. By default, ``'0'``. + - ``--csv_column_names``: The column names to extract the address in the CSV. It must be specified if the provided dataset_path leads to a CSV file. Column names have to be separated by whitespace. For example, ``--csv_column_names column1 column2``. - ``--csv_column_separator``: The column separator for the dataset container will only be used if the dataset is a CSV one. By default, ``'\t'``. - ``--cache_dir``: To change the default cache directory (default to ``None``, e.g. default path). - - ``prediction_tags``: To change the prediction tags. The ``prediction_tags`` is a path leading to a JSON file of the new tags in a key-value style. For example, the path can be ``"a_path/file.json"`` and the content can be ``{"new_tag": 0, "other_tag": 1, "EOS": 2}``. + - ``prediction_tags``: To change the prediction tags. The ``prediction_tags`` path leads to a JSON file of the new tags in a key-value style. For example, the path can be ``"a_path/file.json"`` and the content can be ``{"new_tag": 0, "other_tag": 1, "EOS": 2}``. .. autofunction:: deepparse.cli.retrain.main @@ -109,9 +109,9 @@ Test This command allows a user to test the ``base_parsing_model`` (or the retrained one using the ``--path_to_retrained_model``) on the ``train_dataset_path`` dataset. -For the testing, the CSV or Pickle dataset is loader in a specific dataloader (see +For the testing, the CSV or Pickle dataset is loaded in a specific dataloader (see :class:`~deepparse.dataset_container.DatasetContainer` for more details). Moreover, by default, -we log some information (``--log``) such as the tested address parser model name and the parsed dataset path. Plus, +we log some information (``--log``), such as the tested address parser model name and the parsed dataset path. Plus, we also log the testing results in a TSV file. The two files are exported at the same path as the testing dataset. Here is the list of the arguments, their descriptions and default values. One can use the command ``parse --help`` to output the same description in your command line. @@ -123,7 +123,7 @@ One can use the command ``parse --help`` to output the same description in your - ``--batch_size``: The batch size to use to process the dataset. By default, ``32``. - ``--num_workers``: The number of workers to use for the data loader (default is ``1`` worker). - ``--seed``: The seed to use to make the sampling deterministic (default ``42``). - - ``--csv_column_name``: The column name to extract address in the CSV. Need to be specified if the provided ``dataset_path`` leads to a CSV file. By default, ``None``. + - ``--csv_column_name``: The column name to extract the address in the CSV. It must be specified if the provided ``dataset_path`` leads to a CSV file. By default, ``None``. - ``--csv_column_separator``: The column separator for the dataset container will only be used if the dataset is a CSV one. By default, ``'\t'``. - ``--log``: Either or not to log the parsing process into a ``.log`` file exported at the same place as the parsed data using the same name as the export file. The bool value can be (not case sensitive) ``'true/false'``, ``'t/f'``, ``'yes/no'``, ``'y/n'`` or ``'0/1'``. By default, ``True``. - ``--cache_dir``: To change the default cache directory (default to ``None``, e.g. default path). diff --git a/docs/source/examples/retrain_with_new_seq2seq_params.rst b/docs/source/examples/retrain_with_new_seq2seq_params.rst index 334291b5..d7e4bc8f 100644 --- a/docs/source/examples/retrain_with_new_seq2seq_params.rst +++ b/docs/source/examples/retrain_with_new_seq2seq_params.rst @@ -56,7 +56,7 @@ Let's start with the default learning rate of ``0.01`` and use a learning rate s logging_path = "./checkpoints" # The new seq2seq params settings using smaller hidden size - # See the doc for the list of tunable seq2seq parameters + # See the documentation for the list of tunable seq2seq parameters seq2seq_params = { "encoder_hidden_size": 512, "decoder_hidden_size": 512 diff --git a/docs/source/parser.rst b/docs/source/parser.rst index 620fb811..f6855880 100644 --- a/docs/source/parser.rst +++ b/docs/source/parser.rst @@ -12,10 +12,10 @@ Pre-trained Complete Model This is the complete pretrained address parser model. This model allows using the pretrained weights to predict the tags of any address. -We offer, for now, only two pretrained models, FastText and BPEmb. The first one relies on +For now, we offer only two pretrained models, FastText and BPEmb. The first one relies on `fastText `__ French pretrained embeddings to parse the address, and the second use the `byte-pair multilingual subword `_ pretrained embeddings. In both cases, -the architecture is similar, and performances are comparable; our results are available in this +the architecture and performances are similar; our results are available in this `article `_. Memory Usage and Time Performance @@ -27,7 +27,7 @@ we report the RAM usage, and in the first table, we also report the GPU memory u Also, for both tables, we report the mean-time of execution that was obtained by processing ~183,000 addresses using different batch sizes (2^0, ..., 2^9) (i.e. :math:`\frac{\text{Total time to process all addresses}}{~183,000} =` time per address). -In addition, we proposed a lighter version (fasttext-light) of our fastText model using +In addition, we proposed a lighter version (``"fasttext-light"``) of our fastText model using `Magnitude embeddings mapping `_. For this lighter model, on average, results are a little bit lower for the trained country (around ~2%) but are similar for the zero-shot country (see our `article `_ for more details). @@ -108,10 +108,10 @@ are a little bit lower for the trained country (around ~2%) but are similar for .. [2] Note that on Windows, we use the Gensim FastText models that use ~10 GO with similar performance. -Thus, the more address is, the faster each address can be processed. You can also improve performance by using more +Thus, the more addresses there are, the faster each address can be processed. You can also improve performance by using more workers for the data loader created with your data within the call. But note that this performance improvement is not linear. Furthermore, as of version ``0.9.6``, we now use Torch 2.0 and many other tricks to improve -processing performance. Here a few: if the parser uses a GPU, it will pin the memory in the Dataloader and reduce some +processing performance. Here are a few: if the parser uses a GPU, it will pin the memory in the Dataloader and reduce some operations (e.g. useless ``.to(device)``). AddressParser diff --git a/docs/source/training_guide.rst b/docs/source/training_guide.rst index 4649bbd0..6aa8cdc1 100644 --- a/docs/source/training_guide.rst +++ b/docs/source/training_guide.rst @@ -5,7 +5,7 @@ Training Guide ============== In addition to parsing addresses out-of-the-box, Deepparse allows you to retrain the pre-trained models to fit your data and use cases. -In the world of machine learning, this is what's refered to as ``fine-tuning``, which can make it easier to obtain well-performing +In the world of machine learning, this is what's referred to as ``fine-tuning``, which can make it easier to obtain well-performing models more efficiently and with less data. Since fine-tuning models can be tricky, this section of the documentation provides some guidelines and insights that may @@ -19,14 +19,14 @@ how to retrain our models. A few use cases may lead you to want to retrain Deepparse's models. Whether you wish to obtain a better performance on a single or multiple countries that our models weren't trained on, or your data and address schemes require a more complex -architecture, or the tag structure of your dataset, is different from ours; deepparse's retraining features accommodate all these use cases and more. +architecture, or your dataset's tag structure, differs from ours; Deepparse's retraining features accommodate all these use cases and more. In practice, our models were trained on 20 countries. They demonstrated very accurate results on all of them, so we advise you to use our models without retraining unless you wish to predict -different tags (e.g., StreetNumber ...). Also, suppose you want to retrain +different tags (e.g., StreetNumber, ...). Also, suppose you want to retrain our models to perform better on countries outside of the 20 used in the original training set. In that case, you can look at `our dataset `__ which includes an additional 41 countries used only for testing. -There are two main concerns to keep in mind when fine-tuning a model: the model's convergence (i.e, its ability actually to learn from the new data) +There are two main concerns to keep in mind when fine-tuning a model: the model's convergence (i.e., its ability actually to learn from the new data) and the possibility of ``catastrophic forgetting`` (i.e., losing the model's previous knowledge after training on the new data). Learning Successfully @@ -37,7 +37,7 @@ of fine-tuning, the models have already developed a base knowledge of the task t This is especially true in the case of Deepparse since the task you are fine-tuning remains the same (i.e. parsing addresses). However, there are a couple of points to consider to obtain favourable results: -- **Make sure you have enough data**: deep learning models are notorious for being pretty data hungry, so unless you have enough data, the models +- **Make sure you have enough data**: deep learning models are notorious for being pretty data-hungry, so unless you have enough data, the models will have a hard time learning. Since Deepparse's models have already been trained on a few million addresses, the need for data is mitigated for fine-tuning. However, it is recommended to use at least a few thousand examples per new country when retraining. @@ -59,7 +59,7 @@ However, there are a couple of points to consider to obtain favourable results: Do Not Forget! ************** -As mentionned above, catastrophic forgetting can happen when fine-tuning machine learning models. This is because the models' internal parameters are +As mentioned above, catastrophic forgetting can happen when fine-tuning machine learning models. This is because the models' internal parameters are modified to accommodate the new task/data, which can impact their ability to be appropriate for the previous task/data. There are many fancy ways to mitigate catastrophic forgetting when fine-tuning models. Still, given the task and data that Deepparse handles, we recommend including some of the previous data when constructing your retraining dataset. The amount @@ -95,5 +95,5 @@ Modifying the Architecture The :meth:`~deepparse.parser.AddressParser.retrain` method allows you to change the architecture of the models using the ``seq2seq_params`` argument. This can be useful if you need a more complex model or a lighter model, for example. However, if you -change the models' architecture, you will end up with a completely new model that will be retrained from scratch. This -means that all the previous knowledge that the initial model had will disapear. +change the models' architecture, a completely new model will be retrained from scratch. This +means that all the previous knowledge that the initial model had will disappear. diff --git a/examples/retrain_with_new_seq2seq_params.py b/examples/retrain_with_new_seq2seq_params.py index a402240d..99a42920 100644 --- a/examples/retrain_with_new_seq2seq_params.py +++ b/examples/retrain_with_new_seq2seq_params.py @@ -34,7 +34,7 @@ logging_path = "./checkpoints" # The new seq2seq params settings using smaller hidden size -# See the doc for the list of tunable seq2seq parameters +# See the documentation for the list of tunable seq2seq parameters seq2seq_params = {"encoder_hidden_size": 512, "decoder_hidden_size": 512} address_parser.retrain( diff --git a/models_evaluation/timer/timer.py b/models_evaluation/timer/timer.py index 285f1777..07fc2c6e 100644 --- a/models_evaluation/timer/timer.py +++ b/models_evaluation/timer/timer.py @@ -30,7 +30,7 @@ class Timer: The class can be used as a context manager to time the code inside the 'with' statement, as a decorator of a function or a method to time it at each call, or as an iterator to have the total running time of a - for loop as well as the mean time taken per iteration. See the doc of the init method for usage examples. + for loop as well as the mean time taken per iteration. See the documentation of the init method for usage examples. """ def __init__( diff --git a/tests/cli/test_retrain.py b/tests/cli/test_retrain.py index fad25bf0..e3e32570 100644 --- a/tests/cli/test_retrain.py +++ b/tests/cli/test_retrain.py @@ -201,8 +201,8 @@ def test_integration_csv(self): def test_ifIsCSVFile_noColumnName_raiseValueError(self): with self.assertRaises(ValueError): - # We set up the params with the default value of csv_column_names of the test case method set_up_params, - # which is None, thus no column names. + # We set up the params with the default value of ``"csv_column_names"`` of the test case method + # set_up_params, which is None, thus no column names. parser_params = self.set_up_params(train_dataset_path=self.a_train_csv_dataset_path) retrain.main(parser_params) diff --git a/tests/parser/test_address_parser_retrain_api.py b/tests/parser/test_address_parser_retrain_api.py index f3b12b22..2b5e4f38 100644 --- a/tests/parser/test_address_parser_retrain_api.py +++ b/tests/parser/test_address_parser_retrain_api.py @@ -1433,8 +1433,8 @@ def test_givenRetrainSettings_whenFormattedNameParserName_thenReturnProperNaming ) # We set possible params type with a value - prediction_tags_settings = [{"A dict": 1.0}, None] # Can be a dict or a None - seq2seq_params_settings = [{"A dict": 1.0}, None] # Can be a dict or a None + prediction_tags_settings = [{"A dict": 1.0}, None] # Can be a dictionary or a None + seq2seq_params_settings = [{"A dict": 1.0}, None] # Can be a dictionary or a None layers_to_freeze_settings = [None, "encoder", "decoder", "prediction_layer", "seq2seq"] # From the doc # We loop all possible settings diff --git a/tests/test_download_tools.py b/tests/test_download_tools.py index 6973a76b..202c7c89 100644 --- a/tests/test_download_tools.py +++ b/tests/test_download_tools.py @@ -228,7 +228,7 @@ def test_givenAFasttextLightEmbeddingsNotLocal_whenDownloadFasttextEmbeddingsVer download_fasttext_magnitude_embeddings(self.a_directory_path, verbose=True) expected = ( - "The fastText pretrained word embeddings will be download in magnitude format (2.3 GO), " + "The FastText pretrained word embeddings will be download in magnitude format (2.3 GO), " "this process will take several minutes." ) @@ -311,7 +311,7 @@ def test_givenADownloadFasttext_whenPrintProgressSetToVerbose_thenPrint( actual = self.test_out.getvalue().strip() expected = ( - "The fastText pretrained word embeddings will be downloaded (6.8 GO), " + "The FastText pretrained word embeddings will be downloaded (6.8 GO), " "this process will take several minutes." ) self.assertIn(expected, actual)