diff --git a/connector_search_engine/models/se_index.py b/connector_search_engine/models/se_index.py index bb1c9aa3..8c58b359 100644 --- a/connector_search_engine/models/se_index.py +++ b/connector_search_engine/models/se_index.py @@ -219,6 +219,8 @@ def _jobify_batch_sync(self, force_export: bool = False) -> None: def _jobify_batch_recompute(self, force_export: bool = False) -> None: self.ensure_one() description = _("Prepare a batch recompute of index '%s'") % self.name + print(">>> run _jobify_batch_recompute") + # breakpoint() self.with_delay( description=description, identity_key=identity_exact ).batch_recompute(force_export) @@ -231,9 +233,12 @@ def generate_batch_sync_per_index(self, domain: list | None = None) -> None: index to sync. The sync will export the bindings marked as to_export. and will delete the bindings marked as to_delete. """ + print(">>>>> run generate_batch_sync_per_index") if domain is None: domain = [] for record in self.search(domain): + print(domain) + print(record) record._jobify_batch_sync() @api.model @@ -244,9 +249,12 @@ def generate_batch_recompute_per_index(self, domain: list | None = None) -> None index to recompute. The recompute process will recompute the bindings marked as to_recompute. """ + print(">>>>> run generate_batch_recompute_per_index") if domain is None: domain = [] for record in self.search(domain): + print(domain) + print(record) record._jobify_batch_recompute() def _get_domain_for_recomputing_binding(self, force_export: bool = False) -> list: @@ -259,6 +267,7 @@ def _get_domain_for_recomputing_binding(self, force_export: bool = False) -> lis def batch_recompute(self, force_export: bool = False) -> None: """Recompute all the bindings of the index marked as to_recompute.""" self.ensure_one() + print("###### >>> run batch_recompute") domain = self._get_domain_for_recomputing_binding(force_export) bindings = self.env["se.binding"].search(domain) bindings_count = len(bindings) @@ -272,6 +281,7 @@ def batch_recompute(self, force_export: bool = False) -> None: "index_name": self.name, } batch.write({"state": "recomputing"}) + print(description) batch.with_delay(description=description).recompute_json() def _get_domain_for_exporting_binding(self, force_export: bool = False): diff --git a/connector_typesense/README.rst b/connector_typesense/README.rst new file mode 100644 index 00000000..0e6dda87 --- /dev/null +++ b/connector_typesense/README.rst @@ -0,0 +1,252 @@ +=================== +connector_typesense +=================== + +.. + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! This file is generated by oca-gen-addon-readme !! + !! changes will be overwritten. !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! source digest: sha256:f09633c3af59b153f0eba3f876a9835676c430a6f39dd0dfa3545a924c80bc11 + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +.. |badge1| image:: https://img.shields.io/badge/maturity-Beta-yellow.png + :target: https://odoo-community.org/page/development-status + :alt: Beta +.. |badge2| image:: https://img.shields.io/badge/licence-AGPL--3-blue.png + :target: http://www.gnu.org/licenses/agpl-3.0-standalone.html + :alt: License: AGPL-3 +.. |badge3| image:: https://img.shields.io/badge/github-OCA%2Fsearch--engine-lightgray.png?logo=github + :target: https://github.com/OCA/search-engine/tree/16.0/connector_typesense + :alt: OCA/search-engine +.. |badge4| image:: https://img.shields.io/badge/weblate-Translate%20me-F47D42.png + :target: https://translation.odoo-community.org/projects/search-engine-16-0/search-engine-16-0-connector_typesense + :alt: Translate me on Weblate +.. |badge5| image:: https://img.shields.io/badge/runboat-Try%20me-875A7B.png + :target: https://runboat.odoo-community.org/builds?repo=OCA/search-engine&target_branch=16.0 + :alt: Try me on Runboat + +|badge1| |badge2| |badge3| |badge4| |badge5| + +Base module for connecting Odoo with external search engines. This addon is +intended to be used as a base for other addons that implement specific search +engines. It's designed to be easily extensible and modular. + +**Table of contents** + +.. contents:: + :local: + +Installation +============ + +This addon uses the native json python package provided by python. When +a json for a record is recomputed, the new value is compared to the original +one to see if an export to the search engine index is needed. This is +done by comparing the md5 of the two json strings. This process when done on +a large number of records can be slow when the json is large and complex. To speed +up this process you can install the orjson package. + +.. code-block:: bash + + pip install orjson + +This package requires a typesense search engine running. +Please read this for a [quick docker based setup](https://typesense.org/docs/guide/install-typesense.html#option-2-local-machine-self-hosting). + +Usage +===== + +Overview +~~~~~~~~ + +A search engine is a system designed to store information in a way that makes +it easy to find through search and analytics queries. The main difference +between a search engine and a database is that a search engine is optimized +for search and analytics queries, while a database is optimized for +transactional and relational queries. + +This addons is designed around 4 main concepts: + +* **The search engine backend** is used to define into Odoo the kind + of search engine that will be used to index the data. It's main responsibility + is to provide an instance of `odoo.addons.search_engine.tools.adapter.SearchEngineAdapter` + that will be used to communicate with the search engine. + +* **The search engine index** is used to define into Odoo the index where + the data will be indexed. An index is always linked to a search engine backend. + The index provides methods to use to manage the lifecycle of the data put into + the index for the records of a given model. To do so, it uses: + + * **The SearchEngineAdapter** provided by the backend to communicate with the + search engine. + * **A ModelSerializer** that is used to transform an odoo record into + a dictionary that can be indexed into the search engine. + * **A JsonValidator** that is used to validate the data that is to be + indexed into the search engine. + + The RecordSerializer and IndexDataValidator are defined on the index itself. + The current addon provides a default implementation only for the IndexDataValidator. + You can find into the github repository `search-engine `_ An implementation of the RecordSerializer based + on the jsonifier addon `connector_search_engine_jsonifier`. + +* **The search engine indexable record** is a mixin that is used to define + the records that can be indexed into a search engine index. The mixin + provides methods: + + * To add a record to an index. + * To remove a record from an index. + * To mark the record into an index (*the search engine bindings*) as to be + recomputed (This method should be called when modifications are made on + the record that could impact the data that are indexed into the search + engine. It will instruct the index that the record must be recomputed and + re-indexed). + + It also ensures that when the record is unlinked, it is removed from the indexes + it was indexed into. + +* **The search engine binding** is a model that represents the link between + an index and an indexable odoo record. It give you access to the data + that are indexed into the search engine for the record. It's also used to + manage the lifecycle of the data into the search engine. When a binding is + created, it's marked as to be computed. Once the data are computed, the + binding is marked as to be indexed. Once the data are indexed, the binding + is marked as indexed. If the linked record is unlinked, the binding is + marked as to be removed. Once the data are removed from the search engine, + the binding is deleted. + +Indexing lifecycle +~~~~~~~~~~~~~~~~~~ + +The indexing lifecycle is based on the following steps: + +* When a record is added to an index, a binding is created and marked as to be + computed. +* A cron job scheduled every 5 minutes will look for bindings that are to be + computed and for each of them will schedule a job to re compute the json data. +* When the json data is computed, the binding is marked as to be exported if the + json is valid and is different from the one that has been computed last time. +* A cron job scheduled every 5 minutes will ensure the syncing with the search + engine. It will: + + * look for bindings that are to be exported and for each of them will schedule + a job to export the json data into the search engine. Once exported, the + binding is marked as 'done'. + * look for bindings that are to be removed and for each of them will schedule + a job to remove the data from the search engine. Once removed, the binding + is deleted. + +To keep in sync the data from your model instance and the data that are indexed +into the search engine, you should call the method `_se_mark_to_update` on the +mode instance when you make modifications that could impact the data that are +indexed into the search engine. + +* When the method `_se_mark_to_update` is called, the binding is marked as to be + computed. +* From there, the same process as described above will be used to recompute the + data and reindex them into the search engine. + +When a model instance is unlinked, the binding is marked as to be removed. From +there if will be processed by the job syncing the data with the search engine. + +.. note:: + + In previous versions of this addon, there was no method to mark a record as + to be recomputed. As a consequence, all the records were re-computed every day + to ensure that the data in the search engine were up to date. This was a + performance issue and consumed a lot of resources. If despite this, you want + to recompute all the records every day, you can activate the cron jon + `Search engine: recompute all index` and deactivate the one named + `earch engine: Generate job for recompute binding to recompute per index`. + +Known issues / Roadmap +====================== + +* Implement generic trigger for binding + based on ir.export linked to the index + (the aim is to set the binding to be updated + if we modify a field configured in the exporter) + +Changelog +========= + +16.0.0.1.1 (2023-10-13) +~~~~~~~~~~~~~~~~~~~~~~~ + +**Bugfixes** + +- Fixes cache issue with the *se_binding_ids* field on the *s.indexable.record* + model. When a binding is created or updated or deleted, the cache for the + *se_binding_ids* field for referenced records is now invalidated. That way, + the next time the field is accessed after such an operation, the value is + recomputed to reflect the change. (`#163 `_) + + +16.0.0.1.0 (2023-10-13) +~~~~~~~~~~~~~~~~~~~~~~~ + +**Features** + +- A new action **Update state** is now available on *Search Engine Record* objects. + This action allows you to update the state of selected records on the tree view. + + Add a smart button to quickly access to the bound records from the + *Search Engine Backend* and *Search Engine Record* views. (`#162 `__) + + +**Bugfixes** + +- Fix Search Engine Binding form view. The fields data and error are now + properly displayed and fit the width of the form. + + Makes the Odoo's admin user a member of the *Search Engine Connector Manager* group. (`#162 `__) + + +12.0.x.y.z (YYYY-MM-DD) +~~~~~~~~~~~~~~~~~~~~~~~ + +TODO + +Bug Tracker +=========== + +Bugs are tracked on `GitHub Issues `_. +In case of trouble, please check there if your issue has already been reported. +If you spotted it first, help us to smash it by providing a detailed and welcomed +`feedback `_. + +Do not contact contributors directly about support or help with technical issues. + +Credits +======= + +Authors +~~~~~~~ + +* Derico + +Contributors +~~~~~~~~~~~~ + +* Sébastien BEAU +* Laurent Mignon +* Simone Orsi +* Raphaël Reverdy + +Maintainers +~~~~~~~~~~~ + +This module is maintained by the OCA. + +.. image:: https://odoo-community.org/logo.png + :alt: Odoo Community Association + :target: https://odoo-community.org + +OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use. + +This module is part of the `OCA/search-engine `_ project on GitHub. + +You are welcome to contribute. To learn how please visit https://odoo-community.org/page/Contribute. diff --git a/connector_typesense/__init__.py b/connector_typesense/__init__.py new file mode 100644 index 00000000..738a2eec --- /dev/null +++ b/connector_typesense/__init__.py @@ -0,0 +1,2 @@ +from . import models +from . import tools diff --git a/connector_typesense/__manifest__.py b/connector_typesense/__manifest__.py new file mode 100644 index 00000000..778ec3b0 --- /dev/null +++ b/connector_typesense/__manifest__.py @@ -0,0 +1,20 @@ +# Copyright 2024 Derico +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl). + +{ + "name": "connector_typesense", + "category": "Connector", + "summary": "Connector For Typesense Search Engine", + "version": "16.0.0.0.2", + "license": "AGPL-3", + "author": "Derico, Odoo Community Association (OCA)", + "website": "https://github.com/OCA/search-engine", + "depends": ["connector_search_engine"], + "data": [ + "views/se_backend.xml", + ], + # "demo": ["demo/backend_demo.xml"], + # TODO: Get latest improvements from elasticsearch library + "external_dependencies": {"python": ["typesense", "requests"]}, + "installable": True, +} diff --git a/connector_typesense/models/__init__.py b/connector_typesense/models/__init__.py new file mode 100644 index 00000000..b4cddc58 --- /dev/null +++ b/connector_typesense/models/__init__.py @@ -0,0 +1 @@ +from . import se_backend, se_index diff --git a/connector_typesense/models/se_backend.py b/connector_typesense/models/se_backend.py new file mode 100644 index 00000000..c6ec0e6f --- /dev/null +++ b/connector_typesense/models/se_backend.py @@ -0,0 +1,48 @@ +# Copyright 2024 Derico +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl). + +from odoo import fields, models + +from ..tools.adapter import TypesenseAdapter + + +class SeBackend(models.Model): + _inherit = "se.backend" + + backend_type = fields.Selection( + selection_add=[("typesense", "Typesense")], + ondelete={"typesense": "cascade"}, + string="Type", + required=True, + ) + ts_server_host = fields.Char( + string="Typesense host", + groups="connector_search_engine.group_connector_search_engine_manager", + ) + ts_server_port = fields.Char( + string="Typesense port", + groups="connector_search_engine.group_connector_search_engine_manager", + ) + ts_server_protocol = fields.Char( + string="Typesense protocol", + groups="connector_search_engine.group_connector_search_engine_manager", + ) + ts_server_timeout = fields.Integer( + string="Typesense server timeout", + groups="connector_search_engine.group_connector_search_engine_manager", + ) + api_key_id = fields.Char( + help="Typesense Api Key ID", + string="Api Key ID", + groups="connector_search_engine.group_connector_search_engine_manager", + ) + api_key = fields.Char( + help="Typesense Api Key", + groups="connector_search_engine.group_connector_search_engine_manager", + ) + + def _get_adapter_class(self): + if self.backend_type == "typesense": + return TypesenseAdapter + else: + return super()._get_adapter_class() diff --git a/connector_typesense/models/se_index.py b/connector_typesense/models/se_index.py new file mode 100644 index 00000000..f5a398a1 --- /dev/null +++ b/connector_typesense/models/se_index.py @@ -0,0 +1,10 @@ +# Copyright 2024 Derico +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl). + +from odoo import _, api, models +from odoo.exceptions import ValidationError + + +class SeIndex(models.Model): + + _inherit = "se.index" diff --git a/connector_typesense/readme/CHANGES.rst b/connector_typesense/readme/CHANGES.rst new file mode 100644 index 00000000..ec3fd05c --- /dev/null +++ b/connector_typesense/readme/CHANGES.rst @@ -0,0 +1,37 @@ +Changelog +--------- + +Future (?) +~~~~~~~~~~ + + +14.0.2.0.0 +~~~~~~~~~~~~ + +**Breaking change** + + +For historical reason (first implementation with algolia) +- the id of the binding was added in the index +- and for elastic/algolia the objectID (= the id of the record bound) was also added + +This lead to a uncomprehensible situation where the frontend dev have an "id" and an "objectID" with different value and have no idea of which one should be used for filtering + +Magic injection of the "id" have been removed (as we already define a export.line in shopinvader) and explicit is better then implicit. + +Note: in shopinvader we push in the key "id" the id of the record bound (we do not care of the id of the binding). + +Elastic Connector do not use any more the "objectID" key +Algolia Connector still use the "objectID" (required) but the value is the same as the id + +See issue shopivader issue `#1000 `_ + + +12.0.2.0.0 +~~~~~~~~~~ + +- index field name is now a computed field based on the backend name, the model exported and the lang +- remove dependency on keychain +- Improve UI on search engine backend (domain on model and exporter...) +- improve test coverage +- use black for auto code style diff --git a/connector_typesense/readme/CONTRIBUTORS.rst b/connector_typesense/readme/CONTRIBUTORS.rst new file mode 100644 index 00000000..0af58a41 --- /dev/null +++ b/connector_typesense/readme/CONTRIBUTORS.rst @@ -0,0 +1,5 @@ +* Sébastien BEAU +* Laurent Mignon +* Simone Orsi +* Raphaël Reverdy +* Maik Derstappen diff --git a/connector_typesense/readme/DESCRIPTION.rst b/connector_typesense/readme/DESCRIPTION.rst new file mode 100644 index 00000000..985e0128 --- /dev/null +++ b/connector_typesense/readme/DESCRIPTION.rst @@ -0,0 +1,4 @@ +This addon provides the bases to implement addons to export information to +Typesense_ indexes. + +.. _Typesense: https://typesense.org diff --git a/connector_typesense/readme/HISTORY.rst b/connector_typesense/readme/HISTORY.rst new file mode 100644 index 00000000..60234453 --- /dev/null +++ b/connector_typesense/readme/HISTORY.rst @@ -0,0 +1,36 @@ +16.0.0.1.1 (2023-10-13) +~~~~~~~~~~~~~~~~~~~~~~~ + +**Bugfixes** + +- Fixes cache issue with the *se_binding_ids* field on the *s.indexable.record* + model. When a binding is created or updated or deleted, the cache for the + *se_binding_ids* field for referenced records is now invalidated. That way, + the next time the field is accessed after such an operation, the value is + recomputed to reflect the change. (`#163 `_) + + +16.0.0.1.0 (2023-10-13) +~~~~~~~~~~~~~~~~~~~~~~~ + +**Features** + +- A new action **Update state** is now available on *Search Engine Record* objects. + This action allows you to update the state of selected records on the tree view. + + Add a smart button to quickly access to the bound records from the + *Search Engine Backend* and *Search Engine Record* views. (`#162 `__) + + +**Bugfixes** + +- Fix Search Engine Binding form view. The fields data and error are now + properly displayed and fit the width of the form. + + Makes the Odoo's admin user a member of the *Search Engine Connector Manager* group. (`#162 `__) + + +12.0.x.y.z (YYYY-MM-DD) +~~~~~~~~~~~~~~~~~~~~~~~ + +TODO diff --git a/connector_typesense/readme/INSTALL.rst b/connector_typesense/readme/INSTALL.rst new file mode 100644 index 00000000..543187da --- /dev/null +++ b/connector_typesense/readme/INSTALL.rst @@ -0,0 +1,13 @@ +This addon uses the native json python package provided by python. When +a json for a record is recomputed, the new value is compared to the original +one to see if an export to the search engine index is needed. This is +done by comparing the md5 of the two json strings. This process when done on +a large number of records can be slow when the json is large and complex. To speed +up this process you can install the orjson package. + +.. code-block:: bash + + pip install orjson + +This package requires a typesense search engine running. +Please read this for a [quick docker based setup](https://typesense.org/docs/guide/install-typesense.html#option-2-local-machine-self-hosting). diff --git a/connector_typesense/readme/ROADMAP.rst b/connector_typesense/readme/ROADMAP.rst new file mode 100644 index 00000000..94648904 --- /dev/null +++ b/connector_typesense/readme/ROADMAP.rst @@ -0,0 +1,4 @@ +* Implement generic trigger for binding + based on ir.export linked to the index + (the aim is to set the binding to be updated + if we modify a field configured in the exporter) diff --git a/connector_typesense/readme/TYPESENSE.rst b/connector_typesense/readme/TYPESENSE.rst new file mode 100644 index 00000000..a9f4237d --- /dev/null +++ b/connector_typesense/readme/TYPESENSE.rst @@ -0,0 +1,57 @@ +typesense +~~~~~~~~~ + +requirements: to pretty print json, install jq on your system; + + +in terminal export the API key + +.. code-block:: + + export TYPESENSE_API_KEY='xyz' + export TYPESENSE_HOST='http://localhost:8108' + +list all collections (indexes): + +.. code-block:: + + curl -H "X-TYPESENSE-API-KEY: ${TYPESENSE_API_KEY}" \ + "http://localhost:8108/collections" | jq + + +retrieve collection details of the products collection (index): + +.. code-block:: + + curl -H "X-TYPESENSE-API-KEY: ${TYPESENSE_API_KEY}" \ + -X GET "http://localhost:8108/collections/typesense_backend_1_product_variant_en_us" | jq + + +list aliases: + +.. code-block:: + curl -H "X-TYPESENSE-API-KEY: ${TYPESENSE_API_KEY}" \ + "http://localhost:8108/aliases" | jq + + + +get alias info: + +.. code-block:: + curl -H "X-TYPESENSE-API-KEY: ${TYPESENSE_API_KEY}" \ + "http://localhost:8108/aliases/typesense_backend_1_product_variant_en_us" | jq + + + + +search for all products: + +curl -H "X-TYPESENSE-API-KEY: ${TYPESENSE_API_KEY}" \ +"http://localhost:8108/collections/typesense_backend_1_product_variant_en_us/documents/search?q=*" | jq + + +Typesense GUI +------------- + +a nice UI is also available here: https://github.com/bfritscher/typesense-dashboard/releases + diff --git a/connector_typesense/readme/USAGE.rst b/connector_typesense/readme/USAGE.rst new file mode 100644 index 00000000..cbb5f407 --- /dev/null +++ b/connector_typesense/readme/USAGE.rst @@ -0,0 +1,102 @@ +Overview +~~~~~~~~ + +A search engine is a system designed to store information in a way that makes +it easy to find through search and analytics queries. The main difference +between a search engine and a database is that a search engine is optimized +for search and analytics queries, while a database is optimized for +transactional and relational queries. + +This addons is designed around 4 main concepts: + +* **The search engine backend** is used to define into Odoo the kind + of search engine that will be used to index the data. It's main responsibility + is to provide an instance of `odoo.addons.search_engine.tools.adapter.SearchEngineAdapter` + that will be used to communicate with the search engine. + +* **The search engine index** is used to define into Odoo the index where + the data will be indexed. An index is always linked to a search engine backend. + The index provides methods to use to manage the lifecycle of the data put into + the index for the records of a given model. To do so, it uses: + + * **The SearchEngineAdapter** provided by the backend to communicate with the + search engine. + * **A ModelSerializer** that is used to transform an odoo record into + a dictionary that can be indexed into the search engine. + * **A JsonValidator** that is used to validate the data that is to be + indexed into the search engine. + + The RecordSerializer and IndexDataValidator are defined on the index itself. + The current addon provides a default implementation only for the IndexDataValidator. + You can find into the github repository `search-engine `_ An implementation of the RecordSerializer based + on the jsonifier addon `connector_search_engine_jsonifier`. + +* **The search engine indexable record** is a mixin that is used to define + the records that can be indexed into a search engine index. The mixin + provides methods: + + * To add a record to an index. + * To remove a record from an index. + * To mark the record into an index (*the search engine bindings*) as to be + recomputed (This method should be called when modifications are made on + the record that could impact the data that are indexed into the search + engine. It will instruct the index that the record must be recomputed and + re-indexed). + + It also ensures that when the record is unlinked, it is removed from the indexes + it was indexed into. + +* **The search engine binding** is a model that represents the link between + an index and an indexable odoo record. It give you access to the data + that are indexed into the search engine for the record. It's also used to + manage the lifecycle of the data into the search engine. When a binding is + created, it's marked as to be computed. Once the data are computed, the + binding is marked as to be indexed. Once the data are indexed, the binding + is marked as indexed. If the linked record is unlinked, the binding is + marked as to be removed. Once the data are removed from the search engine, + the binding is deleted. + +Indexing lifecycle +~~~~~~~~~~~~~~~~~~ + +The indexing lifecycle is based on the following steps: + +* When a record is added to an index, a binding is created and marked as to be + computed. +* A cron job scheduled every 5 minutes will look for bindings that are to be + computed and for each of them will schedule a job to re compute the json data. +* When the json data is computed, the binding is marked as to be exported if the + json is valid and is different from the one that has been computed last time. +* A cron job scheduled every 5 minutes will ensure the syncing with the search + engine. It will: + + * look for bindings that are to be exported and for each of them will schedule + a job to export the json data into the search engine. Once exported, the + binding is marked as 'done'. + * look for bindings that are to be removed and for each of them will schedule + a job to remove the data from the search engine. Once removed, the binding + is deleted. + +To keep in sync the data from your model instance and the data that are indexed +into the search engine, you should call the method `_se_mark_to_update` on the +mode instance when you make modifications that could impact the data that are +indexed into the search engine. + +* When the method `_se_mark_to_update` is called, the binding is marked as to be + computed. +* From there, the same process as described above will be used to recompute the + data and reindex them into the search engine. + +When a model instance is unlinked, the binding is marked as to be removed. From +there if will be processed by the job syncing the data with the search engine. + +.. note:: + + In previous versions of this addon, there was no method to mark a record as + to be recomputed. As a consequence, all the records were re-computed every day + to ensure that the data in the search engine were up to date. This was a + performance issue and consumed a lot of resources. If despite this, you want + to recompute all the records every day, you can activate the cron jon + `Search engine: recompute all index` and deactivate the one named + `earch engine: Generate job for recompute binding to recompute per index`. diff --git a/connector_typesense/readme/newsfragments/.gitignore b/connector_typesense/readme/newsfragments/.gitignore new file mode 100644 index 00000000..e69de29b diff --git a/connector_typesense/static/description/index.html b/connector_typesense/static/description/index.html new file mode 100644 index 00000000..2185e470 --- /dev/null +++ b/connector_typesense/static/description/index.html @@ -0,0 +1,603 @@ + + + + + + +connector_typesense + + + +
+

connector_typesense

+ + +

Beta License: AGPL-3 OCA/search-engine Translate me on Weblate Try me on Runboat

+

Base module for connecting Odoo with external search engines. This addon is +intended to be used as a base for other addons that implement specific search +engines. It’s designed to be easily extensible and modular.

+

Table of contents

+ +
+

Installation

+

This addon uses the native json python package provided by python. When +a json for a record is recomputed, the new value is compared to the original +one to see if an export to the search engine index is needed. This is +done by comparing the md5 of the two json strings. This process when done on +a large number of records can be slow when the json is large and complex. To speed +up this process you can install the orjson package.

+
+pip install orjson
+
+

This package requires a typesense search engine running. +Please read this for a [quick docker based setup](https://typesense.org/docs/guide/install-typesense.html#option-2-local-machine-self-hosting).

+
+
+

Usage

+
+

Overview

+

A search engine is a system designed to store information in a way that makes +it easy to find through search and analytics queries. The main difference +between a search engine and a database is that a search engine is optimized +for search and analytics queries, while a database is optimized for +transactional and relational queries.

+

This addons is designed around 4 main concepts:

+
    +
  • The search engine backend is used to define into Odoo the kind +of search engine that will be used to index the data. It’s main responsibility +is to provide an instance of odoo.addons.search_engine.tools.adapter.SearchEngineAdapter +that will be used to communicate with the search engine.

    +
  • +
  • The search engine index is used to define into Odoo the index where +the data will be indexed. An index is always linked to a search engine backend. +The index provides methods to use to manage the lifecycle of the data put into +the index for the records of a given model. To do so, it uses:

    +
      +
    • The SearchEngineAdapter provided by the backend to communicate with the +search engine.
    • +
    • A ModelSerializer that is used to transform an odoo record into +a dictionary that can be indexed into the search engine.
    • +
    • A JsonValidator that is used to validate the data that is to be +indexed into the search engine.
    • +
    +

    The RecordSerializer and IndexDataValidator are defined on the index itself. +The current addon provides a default implementation only for the IndexDataValidator. +You can find into the github repository search-engine An implementation of the RecordSerializer based +on the jsonifier addon connector_search_engine_jsonifier.

    +
  • +
  • The search engine indexable record is a mixin that is used to define +the records that can be indexed into a search engine index. The mixin +provides methods:

    +
      +
    • To add a record to an index.
    • +
    • To remove a record from an index.
    • +
    • To mark the record into an index (the search engine bindings) as to be +recomputed (This method should be called when modifications are made on +the record that could impact the data that are indexed into the search +engine. It will instruct the index that the record must be recomputed and +re-indexed).
    • +
    +

    It also ensures that when the record is unlinked, it is removed from the indexes +it was indexed into.

    +
  • +
  • The search engine binding is a model that represents the link between +an index and an indexable odoo record. It give you access to the data +that are indexed into the search engine for the record. It’s also used to +manage the lifecycle of the data into the search engine. When a binding is +created, it’s marked as to be computed. Once the data are computed, the +binding is marked as to be indexed. Once the data are indexed, the binding +is marked as indexed. If the linked record is unlinked, the binding is +marked as to be removed. Once the data are removed from the search engine, +the binding is deleted.

    +
  • +
+
+
+

Indexing lifecycle

+

The indexing lifecycle is based on the following steps:

+
    +
  • When a record is added to an index, a binding is created and marked as to be +computed.
  • +
  • A cron job scheduled every 5 minutes will look for bindings that are to be +computed and for each of them will schedule a job to re compute the json data.
  • +
  • When the json data is computed, the binding is marked as to be exported if the +json is valid and is different from the one that has been computed last time.
  • +
  • A cron job scheduled every 5 minutes will ensure the syncing with the search +engine. It will:
      +
    • look for bindings that are to be exported and for each of them will schedule +a job to export the json data into the search engine. Once exported, the +binding is marked as ‘done’.
    • +
    • look for bindings that are to be removed and for each of them will schedule +a job to remove the data from the search engine. Once removed, the binding +is deleted.
    • +
    +
  • +
+

To keep in sync the data from your model instance and the data that are indexed +into the search engine, you should call the method _se_mark_to_update on the +mode instance when you make modifications that could impact the data that are +indexed into the search engine.

+
    +
  • When the method _se_mark_to_update is called, the binding is marked as to be +computed.
  • +
  • From there, the same process as described above will be used to recompute the +data and reindex them into the search engine.
  • +
+

When a model instance is unlinked, the binding is marked as to be removed. From +there if will be processed by the job syncing the data with the search engine.

+
+

Note

+

In previous versions of this addon, there was no method to mark a record as +to be recomputed. As a consequence, all the records were re-computed every day +to ensure that the data in the search engine were up to date. This was a +performance issue and consumed a lot of resources. If despite this, you want +to recompute all the records every day, you can activate the cron jon +Search engine: recompute all index and deactivate the one named +earch engine: Generate job for recompute binding to recompute per index.

+
+
+
+
+

Known issues / Roadmap

+
    +
  • Implement generic trigger for binding +based on ir.export linked to the index +(the aim is to set the binding to be updated +if we modify a field configured in the exporter)
  • +
+
+
+

Changelog

+
+

16.0.0.1.1 (2023-10-13)

+

Bugfixes

+
    +
  • Fixes cache issue with the se_binding_ids field on the s.indexable.record +model. When a binding is created or updated or deleted, the cache for the +se_binding_ids field for referenced records is now invalidated. That way, +the next time the field is accessed after such an operation, the value is +recomputed to reflect the change. (#163)
  • +
+
+
+

16.0.0.1.0 (2023-10-13)

+

Features

+
    +
  • A new action Update state is now available on Search Engine Record objects. +This action allows you to update the state of selected records on the tree view.

    +

    Add a smart button to quickly access to the bound records from the +Search Engine Backend and Search Engine Record views. (#162)

    +
  • +
+

Bugfixes

+
    +
  • Fix Search Engine Binding form view. The fields data and error are now +properly displayed and fit the width of the form.

    +

    Makes the Odoo’s admin user a member of the Search Engine Connector Manager group. (#162)

    +
  • +
+
+ +
+
+

Bug Tracker

+

Bugs are tracked on GitHub Issues. +In case of trouble, please check there if your issue has already been reported. +If you spotted it first, help us to smash it by providing a detailed and welcomed +feedback.

+

Do not contact contributors directly about support or help with technical issues.

+
+
+

Credits

+
+

Authors

+
    +
  • Derico
  • +
+
+
+

Contributors

+ +
+
+

Maintainers

+

This module is maintained by the OCA.

+Odoo Community Association +

OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use.

+

This module is part of the OCA/search-engine project on GitHub.

+

You are welcome to contribute. To learn how please visit https://odoo-community.org/page/Contribute.

+
+
+
+ + diff --git a/connector_typesense/tools/__init__.py b/connector_typesense/tools/__init__.py new file mode 100644 index 00000000..f502287f --- /dev/null +++ b/connector_typesense/tools/__init__.py @@ -0,0 +1 @@ +from . import adapter diff --git a/connector_typesense/tools/adapter.py b/connector_typesense/tools/adapter.py new file mode 100644 index 00000000..0f0019ae --- /dev/null +++ b/connector_typesense/tools/adapter.py @@ -0,0 +1,257 @@ +# Copyright 2024 Derico +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl). + +import json +import logging +from typing import Any, Iterator + +from odoo import _ +from odoo.exceptions import UserError + +from odoo.addons.connector_search_engine.tools.adapter import SearchEngineAdapter + +_logger = logging.getLogger(__name__) + + +try: + import typesense +except ImportError: + _logger.debug("Can not import typesense") + + +# def _is_delete_nonexistent_documents(elastic_exception): +# """True iff all errors in this exception are deleting a nonexisting document.""" +# b = lambda d: "delete" in d and d["delete"]["status"] == 404 # noqa +# return all(b(error) for error in elastic_exception.errors) + + +class TypesenseAdapter(SearchEngineAdapter): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__ts_client = None + + @property + def _index_name(self): + return self.index_record.name.lower() + + # @property + # def _es_connection_class(self): + # return elasticsearch.RequestsHttpConnection + + @property + def _ts_client(self): + if not self.__ts_client: + self.__ts_client = self._get_ts_client() + return self.__ts_client + + @property + def _index_config(self): + return self.index_record.config_id.body + + def _get_ts_client(self): + backend = self.backend_record + return typesense.Client( + { + "nodes": [ + { + "host": backend.ts_server_host, + "port": backend.ts_server_port, + "protocol": backend.ts_server_protocol, + } + ], + "api_key": backend.api_key, + "connection_timeout_seconds": int(backend.ts_server_timeout) or 300, + } + ) + + def test_connection(self): + ts = self._ts_client + try: + ts.collections.retrieve() + except typesense.exceptions.ObjectNotFound as exc: + raise UserError( + _("Not Found - The requested resource is not found.") + ) from exc + except typesense.RequestUnauthorized as exc: + raise UserError(_("Unauthorized - Your API key is wrong.")) from exc + except typesense.TypesenseClientError as exc: + raise UserError(_("Unable to connect :") + "\n\n" + repr(exc)) from exc + + def index(self, records) -> None: + """ """ + print(">>>>>> run TS index method") + ts = self._ts_client + records_for_bulk = "" + for record in records: + print(f">>> record: {record}") + if "id" in record: + record["id"] = str(record["id"]) + records_for_bulk += f"{json.dumps(record)}\n" + + _logger.info(f"Bulk import records into {self._index_name}'...") + res = ts.collections[self._index_name].documents.import_( + records_for_bulk, {"action": "emplace"} + ) + res = res.split("\n") + # checks if number of indexed object and object in records are equal + if not len(res) == len(records): + raise SystemError( + _( + "Unable to index all records. (indexed: %(indexed)s, " + "total: %(total)s)\n%(result)s", + indexed=len(res), + total=len(records), + result=res, + ) + ) + + def delete(self, binding_ids) -> None: + """ """ + ts = self._ts_client + _logger.info( + f"Delete binding_ids: {', '.join(binding_ids)} from collection " + f"'{self.index_name}'." + ) + ts.collections[self._index_name].documents.delete({"filter_by=id": binding_ids}) + + def clear(self) -> None: + """ """ + ts = self._ts_client + index_name = self._get_current_aliased_index_name() or self._index_name + _logger.info(f"Clear current_aliased_index_name '{index_name}'.") + ts.collections[index_name].delete() + self.settings() + + def each(self) -> Iterator[dict[str, Any]]: + """ """ + ts = self._ts_client + res = ts.collections[self._index_name].documents.search( + { + "q": "*", + } + ) + if not res: + # eg: empty index + return + hits = res["hits"]["documents"] + for hit in hits: + yield hit + + def settings(self) -> None: + ts = self._ts_client + try: + ts.collections[self._index_name].retrieve() + except typesense.exceptions.ObjectNotFound: + client = self._ts_client + # To allow rolling updates, we work with index aliases + aliased_index_name = self._get_next_aliased_index_name() + # index_name / collection_name is part of the schema defined in + # self._index_config + index_config = self._index_config + index_config.update( + { + "name": aliased_index_name, + } + ) + _logger.info(f"Create aliased_index_name '{aliased_index_name}'...") + client.collections.create(index_config) + _logger.info( + f"Set collection alias '{self._index_name}' >> aliased_index_name " + f"'{aliased_index_name}'." + ) + client.aliases.upsert( + self._index_name, {"collection_name": aliased_index_name} + ) + + def _get_current_aliased_index_name(self) -> str: + """Get the current aliased index name if any""" + current_aliased_index_name = None + alias = self._ts_client.aliases[self._index_name].retrieve() + if "collection_name" in alias: + current_aliased_index_name = alias["collection_name"] + return current_aliased_index_name + + def _get_next_aliased_index_name( + self, aliased_index_name: str | None = None + ) -> str: + """Get the next aliased index name + + The next aliased index name is based on the current aliased index name. + It's the current aliased index name incremented by 1. + + :param aliased_index_name: the current aliased index name + :return: the next aliased index name + """ + next_version = 1 + if aliased_index_name: + next_version = int(aliased_index_name.split("-")[-1]) + 1 + return f"{self._index_name}-{next_version}" + + def reindex(self) -> None: + """Reindex records according to the current config + This method is useful to allows a rolling update of index + configuration. + This process is based on the following steps: + 1. export data from current aliased index + 2. create a new index (collection) with the current config + 3. import data into new aliased index (collection) + 4. Update the index alias to point to the new aliased index (collection) + 5. Drop the old index. + """ + client = self._ts_client + current_aliased_index_name = self._get_current_aliased_index_name() + data = client.collections[current_aliased_index_name].documents.export() + next_aliased_index_name = self._get_next_aliased_index_name( + current_aliased_index_name + ) + try: + client.collections[next_aliased_index_name].retrieve() + except typesense.exceptions.ObjectNotFound: + # To allow rolling updates, we work with index aliases + # index_name / collection_name is part of the schema defined + # in self._index_config + _logger.info( + f"Create new_aliased_index_name '{next_aliased_index_name}'..." + ) + index_config = self._index_config + index_config.update( + { + "name": next_aliased_index_name, + } + ) + client.collections.create(index_config) + _logger.info( + f"Import existing data into new_aliased_index_name " + f"'{next_aliased_index_name}'..." + ) + client.collections[next_aliased_index_name].documents.import_( + data.encode("utf-8"), {"action": "create"} + ) + + try: + client.collections[next_aliased_index_name].retrieve() + except typesense.exceptions.ObjectNotFound as e: + _logger.warn( + f"New aliased_index_name not found, skip updating alias and " + f"not removing old index (collection)!\n\n{e}" + ) + else: + _logger.info( + f"Set collection alias '{self._index_name}' >> " + f"new_aliased_index_name '{next_aliased_index_name}'." + ) + client.aliases.upsert( + self._index_name, {"collection_name": next_aliased_index_name} + ) + _logger.info( + f"Remove old aliased index (collection) " + f"'{current_aliased_index_name}'." + ) + client.collections[current_aliased_index_name].delete() + + else: + _logger.warning( + f"next_aliased_index_name '{next_aliased_index_name}' " + f"already exists, skip!", + self._index_name, + ) diff --git a/connector_typesense/views/se_backend.xml b/connector_typesense/views/se_backend.xml new file mode 100644 index 00000000..426ec139 --- /dev/null +++ b/connector_typesense/views/se_backend.xml @@ -0,0 +1,36 @@ + + + + se.backend + + + + + + + + + + + + + + diff --git a/requirements.txt b/requirements.txt index 4ac43d30..88e80a07 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ elasticsearch>=7.0.0,<=7.13.4 pydantic requests typing-extensions +typesense unidecode diff --git a/setup/_metapackage/setup.py b/setup/_metapackage/setup.py index 987fe1aa..0ced9abf 100644 --- a/setup/_metapackage/setup.py +++ b/setup/_metapackage/setup.py @@ -1,6 +1,6 @@ import setuptools -with open('VERSION.txt', 'r') as f: +with open("VERSION.txt", "r") as f: version = f.read().strip() setuptools.setup( @@ -8,15 +8,15 @@ description="Meta package for oca-search-engine Odoo addons", version=version, install_requires=[ - 'odoo-addon-connector_elasticsearch>=16.0dev,<16.1dev', - 'odoo-addon-connector_search_engine>=16.0dev,<16.1dev', - 'odoo-addon-connector_search_engine_serializer_ir_export>=16.0dev,<16.1dev', - 'odoo-addon-search_engine_image_thumbnail>=16.0dev,<16.1dev', - 'odoo-addon-search_engine_serializer_pydantic>=16.0dev,<16.1dev', + "odoo-addon-connector_elasticsearch>=16.0dev,<16.1dev", + "odoo-addon-connector_search_engine>=16.0dev,<16.1dev", + "odoo-addon-connector_search_engine_serializer_ir_export>=16.0dev,<16.1dev", + "odoo-addon-search_engine_image_thumbnail>=16.0dev,<16.1dev", + "odoo-addon-search_engine_serializer_pydantic>=16.0dev,<16.1dev", ], classifiers=[ - 'Programming Language :: Python', - 'Framework :: Odoo', - 'Framework :: Odoo :: 16.0', - ] + "Programming Language :: Python", + "Framework :: Odoo", + "Framework :: Odoo :: 16.0", + ], ) diff --git a/setup/connector_elasticsearch/setup.py b/setup/connector_elasticsearch/setup.py index 28c57bb6..00a90304 100644 --- a/setup/connector_elasticsearch/setup.py +++ b/setup/connector_elasticsearch/setup.py @@ -1,6 +1,6 @@ import setuptools setuptools.setup( - setup_requires=['setuptools-odoo'], + setup_requires=["setuptools-odoo"], odoo_addon=True, ) diff --git a/setup/connector_search_engine/setup.py b/setup/connector_search_engine/setup.py index 28c57bb6..00a90304 100644 --- a/setup/connector_search_engine/setup.py +++ b/setup/connector_search_engine/setup.py @@ -1,6 +1,6 @@ import setuptools setuptools.setup( - setup_requires=['setuptools-odoo'], + setup_requires=["setuptools-odoo"], odoo_addon=True, ) diff --git a/setup/connector_search_engine_serializer_ir_export/setup.py b/setup/connector_search_engine_serializer_ir_export/setup.py index 28c57bb6..00a90304 100644 --- a/setup/connector_search_engine_serializer_ir_export/setup.py +++ b/setup/connector_search_engine_serializer_ir_export/setup.py @@ -1,6 +1,6 @@ import setuptools setuptools.setup( - setup_requires=['setuptools-odoo'], + setup_requires=["setuptools-odoo"], odoo_addon=True, ) diff --git a/setup/connector_typesense/odoo/addons/connector_typesense b/setup/connector_typesense/odoo/addons/connector_typesense new file mode 120000 index 00000000..1ffbdd91 --- /dev/null +++ b/setup/connector_typesense/odoo/addons/connector_typesense @@ -0,0 +1 @@ +../../../../connector_typesense \ No newline at end of file diff --git a/setup/connector_typesense/setup.py b/setup/connector_typesense/setup.py new file mode 100644 index 00000000..28c57bb6 --- /dev/null +++ b/setup/connector_typesense/setup.py @@ -0,0 +1,6 @@ +import setuptools + +setuptools.setup( + setup_requires=['setuptools-odoo'], + odoo_addon=True, +) diff --git a/setup/search_engine_image_thumbnail/setup.py b/setup/search_engine_image_thumbnail/setup.py index 28c57bb6..00a90304 100644 --- a/setup/search_engine_image_thumbnail/setup.py +++ b/setup/search_engine_image_thumbnail/setup.py @@ -1,6 +1,6 @@ import setuptools setuptools.setup( - setup_requires=['setuptools-odoo'], + setup_requires=["setuptools-odoo"], odoo_addon=True, ) diff --git a/setup/search_engine_serializer_pydantic/setup.py b/setup/search_engine_serializer_pydantic/setup.py index 28c57bb6..00a90304 100644 --- a/setup/search_engine_serializer_pydantic/setup.py +++ b/setup/search_engine_serializer_pydantic/setup.py @@ -1,6 +1,6 @@ import setuptools setuptools.setup( - setup_requires=['setuptools-odoo'], + setup_requires=["setuptools-odoo"], odoo_addon=True, )