From 9c76c826a934c98cb519bd57cb734fcd6ab0c64c Mon Sep 17 00:00:00 2001 From: Sebastien Date: Thu, 2 Dec 2021 17:45:29 +0100 Subject: [PATCH] Refacto datasources with a factory (#288) * simply add tripadvisor trigger logic to suggest * add tests for tripadvisor * first version refacto datasources with factory pattern * unravel circular depedencies : multiple utils module file were shared between multiples modules -> I migrate then to the utils module in a "half-dirty" way * improve factory and clean `get_name` function that was in utils but only used once * use classic factory instead to make test success * divide `get_places_bbox_impl` into sub-functions * create two functions to differentiate france and worldwide behavior for the datasource selection * fix review Co-authored-by: sdirollo --- idunn/api/__init__.py | 1 + idunn/api/categories.py | 3 +- idunn/api/closest.py | 2 +- idunn/api/constants.py | 1 + idunn/api/directions.py | 4 +- idunn/api/instant_answer.py | 6 +- idunn/api/places.py | 5 +- idunn/api/places_list.py | 149 ++++--- idunn/api/urls.py | 15 +- idunn/api/utils.py | 172 -------- idunn/datasources/__init__.py | 13 + idunn/datasources/osm.py | 34 ++ idunn/datasources/pages_jaunes.py | 21 +- idunn/datasources/tripadvisor.py | 25 +- idunn/geocoder/models/params.py | 13 +- idunn/geocoder/nlu_client.py | 2 +- idunn/places/__init__.py | 2 - idunn/places/base.py | 4 +- idunn/places/poi.py | 51 ++- idunn/utils/categories.yml | 3 +- idunn/utils/category.py | 44 ++ idunn/{places/utils.py => utils/place.py} | 12 +- idunn/utils/verbosity.py | 92 +++++ tests/conftest.py | 25 ++ tests/fixtures/pj/__init__.py | 6 +- tests/fixtures/tripadvisor_hotel_suecka.json | 403 +++++++++++++++++++ tests/test_categories.py | 41 +- tests/utils.py | 2 +- 28 files changed, 840 insertions(+), 311 deletions(-) delete mode 100644 idunn/api/utils.py create mode 100644 idunn/datasources/osm.py create mode 100644 idunn/utils/category.py rename idunn/{places/utils.py => utils/place.py} (88%) create mode 100644 idunn/utils/verbosity.py create mode 100644 tests/fixtures/tripadvisor_hotel_suecka.json diff --git a/idunn/api/__init__.py b/idunn/api/__init__.py index e69de29bb..788bc8963 100644 --- a/idunn/api/__init__.py +++ b/idunn/api/__init__.py @@ -0,0 +1 @@ +from .places_list import PlacesQueryParam diff --git a/idunn/api/categories.py b/idunn/api/categories.py index 25eba403f..8a43461bf 100644 --- a/idunn/api/categories.py +++ b/idunn/api/categories.py @@ -1,7 +1,8 @@ -from .utils import Category from pydantic import BaseModel, Field from typing import List +from idunn.utils.category import Category + class CategoryDescription(BaseModel): name: str = Field(..., description="Unique label of the category.") diff --git a/idunn/api/closest.py b/idunn/api/closest.py index 3b9c44c25..f402f5553 100644 --- a/idunn/api/closest.py +++ b/idunn/api/closest.py @@ -7,8 +7,8 @@ from idunn.utils.es_wrapper import get_elasticsearch from idunn.utils import prometheus from idunn.places import Street, Address, Place -from idunn.api.utils import Verbosity from idunn.datasources.mimirsbrunn import fetch_closest, get_es_place_type +from idunn.utils.verbosity import Verbosity logger = logging.getLogger(__name__) diff --git a/idunn/api/constants.py b/idunn/api/constants.py index 6645f3450..e63a9c16e 100644 --- a/idunn/api/constants.py +++ b/idunn/api/constants.py @@ -4,6 +4,7 @@ class PoiSource(str, Enum): OSM = "osm" PAGESJAUNES = "pages_jaunes" + TRIPADVISOR = "tripadvisor" ALL_POI_SOURCES = [s.value for s in PoiSource] diff --git a/idunn/api/directions.py b/idunn/api/directions.py index eef272fa8..d59d72442 100644 --- a/idunn/api/directions.py +++ b/idunn/api/directions.py @@ -2,11 +2,11 @@ from pydantic import confloat from idunn import settings -from idunn.places import Latlon, place_from_id +from idunn.places import Latlon from idunn.places.exceptions import IdunnPlaceError from idunn.utils.rate_limiter import IdunnRateLimiter from ..directions.client import directions_client - +from ..utils.place import place_from_id rate_limiter = IdunnRateLimiter( resource="idunn.api.directions", diff --git a/idunn/api/instant_answer.py b/idunn/api/instant_answer.py index 0f302b314..f28a5ec2a 100644 --- a/idunn/api/instant_answer.py +++ b/idunn/api/instant_answer.py @@ -13,14 +13,15 @@ from idunn.geocoder.bragi_client import bragi_client from idunn.geocoder.models import QueryParams from idunn.geocoder.models.geocodejson import IntentionType -from idunn.places import place_from_id, Place +from idunn.places import Place from idunn.api.places_list import get_places_bbox_impl, PlacesQueryParam from idunn.utils import maps_urls from idunn.utils.regions import get_region_lonlat from idunn.utils.result_filter import ResultFilter from idunn.instant_answer import normalize from .constants import PoiSource -from .utils import Verbosity +from ..utils.place import place_from_id +from ..utils.verbosity import Verbosity logger = logging.getLogger(__name__) result_filter = ResultFilter() @@ -163,7 +164,6 @@ async def get_instant_answer_intention(intention, query: str, lang: str): verbosity=Verbosity.default_list(), ), sort_by_distance=intention_around_point, - poi_es_index_name="poi", ) places = places_bbox_response.places diff --git a/idunn/api/places.py b/idunn/api/places.py index 4ab9c7e0f..52f2412a3 100644 --- a/idunn/api/places.py +++ b/idunn/api/places.py @@ -9,14 +9,15 @@ from idunn import settings -from idunn.api.utils import Verbosity from idunn.utils.es_wrapper import get_elasticsearch from idunn.utils.covid19_dataset import covid19_osm_task -from idunn.places import Place, Latlon, place_from_id +from idunn.places import Place, Latlon from idunn.places.base import BasePlace from idunn.places.exceptions import PlaceNotFound from idunn.places.exceptions import RedirectToPlaceId, InvalidPlaceId from .closest import get_closest_place +from ..utils.place import place_from_id +from ..utils.verbosity import Verbosity logger = logging.getLogger(__name__) diff --git a/idunn/api/places_list.py b/idunn/api/places_list.py index 818b3beb6..b463df6e3 100644 --- a/idunn/api/places_list.py +++ b/idunn/api/places_list.py @@ -10,13 +10,13 @@ from shapely.geometry import MultiPoint, box from idunn import settings -from idunn.places import POI, BragiPOI -from idunn.api.utils import Verbosity -from idunn.datasources.mimirsbrunn import fetch_es_pois, MimirPoiFilter -from idunn.datasources.pages_jaunes import pj_source -from idunn.geocoder.bragi_client import bragi_client +from idunn.datasources.pages_jaunes import pj_source, PagesJaunes from .constants import PoiSource, ALL_POI_SOURCES -from .utils import Category +from ..datasources import Datasource +from ..datasources.osm import Osm +from ..datasources.tripadvisor import Tripadvisor +from ..utils.category import Category +from ..utils.verbosity import Verbosity logger = logging.getLogger(__name__) @@ -25,6 +25,8 @@ EXTENDED_BBOX_MAX_SIZE = float( settings["LIST_PLACES_EXTENDED_BBOX_MAX_SIZE"] ) # max bbox width and height after second extended query +TRIPADVISOR_CATEGORIES_COVERED_WORLDWIDE = ["hotel", "leisure", "attraction", "restaurant"] +TRIPADVISOR_CATEGORIES_COVERED_IN_FRANCE = ["hotel", "leisure", "attraction"] class CommonQueryParam(BaseModel): @@ -152,60 +154,20 @@ async def get_places_bbox( ) -> PlacesBboxResponse: """Get all places in a bounding box.""" params = PlacesQueryParam(**locals()) - return await get_places_bbox_impl(params, "poi") - - -# TODO: Just created to test tripadvisor api. To remove after tests -async def get_tripadvisor_places_bbox( - bbox: str = Query( - ..., - title="Bounding box", - description="Format: left_lon,bottom_lat,right_lon,top_lat", - example="-4.56,48.35,-4.42,48.46", - ), - category: List[Category] = Query([]), - raw_filter: Optional[List[str]] = Query(None), - source: Optional[str] = Query(None), - q: Optional[str] = Query(None, title="Query", description="Full text query"), - size: Optional[int] = Query(None), - lang: Optional[str] = Query(None), - verbosity: Verbosity = Verbosity.default_list(), - extend_bbox: bool = Query(False), -) -> PlacesBboxResponse: - """Get all places in a bounding box.""" - params = PlacesQueryParam(**locals()) - return await get_places_bbox_impl(params, "poi_tripadvisor") + return await get_places_bbox_impl(params) async def get_places_bbox_impl( params: PlacesQueryParam, - poi_es_index_name: str, sort_by_distance: Optional[Point] = None, ) -> PlacesBboxResponse: - source = params.source - if source is None: - if ( - params.q or (params.category and all(c.pj_what() for c in params.category)) - ) and pj_source.bbox_is_covered(params.bbox): - params.source = PoiSource.PAGESJAUNES - else: - params.source = PoiSource.OSM - - places_list = await _fetch_places_list(params, poi_es_index_name) - bbox_extended = False + if params.source is None: + select_datasource(params) + places_list = await _fetch_places_list(params) + bbox_extended = False if params.extend_bbox and len(places_list) == 0: - original_bbox = params.bbox - original_bbox_width = original_bbox[2] - original_bbox[0] - original_bbox_height = original_bbox[3] - original_bbox[1] - original_bbox_size = max(original_bbox_height, original_bbox_width) - if original_bbox_size < EXTENDED_BBOX_MAX_SIZE: - # Compute extended bbox and fetch results a second time - scale_factor = EXTENDED_BBOX_MAX_SIZE / original_bbox_size - new_box = scale(box(*original_bbox), xfact=scale_factor, yfact=scale_factor) - params.bbox = new_box.bounds - bbox_extended = True - places_list = await _fetch_places_list(params, poi_es_index_name) + bbox_extended, places_list = await _fetch_extended_bbox(bbox_extended, params, places_list) if len(places_list) == 0: results_bbox = None @@ -228,33 +190,60 @@ async def get_places_bbox_impl( ) -async def _fetch_places_list(params: PlacesQueryParam, poi_es_index_name: str): - if params.source == PoiSource.PAGESJAUNES: - return await run_in_threadpool( - pj_source.get_places_bbox, - params.category, - params.bbox, - size=params.size, - query=params.q, - ) - if params.q: - # Default source (OSM) with query - bragi_response = await bragi_client.pois_query_in_bbox( - query=params.q, bbox=params.bbox, lang=params.lang, limit=params.size - ) - return [BragiPOI(f) for f in bragi_response.get("features", [])] +def select_datasource(params): + if pj_source.bbox_is_covered(params.bbox): + select_datasource_for_france(params) + else: + select_datasource_outside_france(params) - # Default source (OSM) with category or class/subclass filters - if params.raw_filter: - filters = [MimirPoiFilter.from_url_raw_filter(f) for f in params.raw_filter] + +def select_datasource_for_france(params): + if any(cat in params.category for cat in TRIPADVISOR_CATEGORIES_COVERED_IN_FRANCE): + params.source = PoiSource.TRIPADVISOR + elif is_brand_detected_or_pj_category_cover(params): + params.source = PoiSource.PAGESJAUNES else: - filters = [f for c in params.category for f in c.raw_filters()] - - bbox_places = await run_in_threadpool( - fetch_es_pois, - poi_es_index_name, - filters=filters, - bbox=params.bbox, - max_size=params.size, - ) - return [POI(p["_source"]) for p in bbox_places] + params.source = PoiSource.OSM + + +def select_datasource_outside_france(params): + if any(cat in params.category for cat in TRIPADVISOR_CATEGORIES_COVERED_WORLDWIDE): + params.source = PoiSource.TRIPADVISOR + else: + params.source = PoiSource.OSM + + +def is_brand_detected_or_pj_category_cover(params): + return params.q or (params.category and all(c.pj_what() for c in params.category)) + + +async def _fetch_extended_bbox(bbox_extended, params, places_list): + original_bbox = params.bbox + original_bbox_width = original_bbox[2] - original_bbox[0] + original_bbox_height = original_bbox[3] - original_bbox[1] + original_bbox_size = max(original_bbox_height, original_bbox_width) + if original_bbox_size < EXTENDED_BBOX_MAX_SIZE: + # Compute extended bbox and fetch results a second time + scale_factor = EXTENDED_BBOX_MAX_SIZE / original_bbox_size + new_box = scale(box(*original_bbox), xfact=scale_factor, yfact=scale_factor) + params.bbox = new_box.bounds + bbox_extended = True + places_list = await _fetch_places_list(params) + return bbox_extended, places_list + + +async def _fetch_places_list(params: PlacesQueryParam): + datasource = DatasourceFactory().get_datasource(params.source) + return await datasource.get_places_bbox(params) + + +class DatasourceFactory: + def get_datasource(self, source_type: PoiSource) -> Datasource: + """Get the matching datasource to fetch POIs""" + if source_type == PoiSource.TRIPADVISOR: + return Tripadvisor() + if source_type == PoiSource.PAGESJAUNES: + return PagesJaunes() + if source_type == PoiSource.OSM: + return Osm() + raise ValueError("%s is not a valid source type", source_type) diff --git a/idunn/api/urls.py b/idunn/api/urls.py index caec21bc4..6e4f6d3d2 100644 --- a/idunn/api/urls.py +++ b/idunn/api/urls.py @@ -4,7 +4,7 @@ from .pois import get_poi from .places import get_place, get_place_latlon from .status import get_status -from .places_list import get_places_bbox, get_tripadvisor_places_bbox, PlacesBboxResponse +from .places_list import get_places_bbox, PlacesBboxResponse from .categories import AllCategoriesResponse, get_all_categories from .closest import closest_address from ..directions.models import DirectionsResponse @@ -55,19 +55,6 @@ def get_api_urls(settings): response_model=PlacesBboxResponse, responses={400: {"description": "Client Error in query params"}}, ), - APIRoute( - "/tripadvisor_places", - get_tripadvisor_places_bbox, - dependencies=[ - rate_limiter_dependency( - resource="idunn.get_tripadvisor_places_bbox", - max_requests=int(settings["LIST_PLACES_RL_MAX_REQUESTS"]), - expire=int(settings["LIST_PLACES_RL_EXPIRE"]), - ) - ], - response_model=PlacesBboxResponse, - responses={400: {"description": "Client Error in query params"}}, - ), APIRoute("/places/latlon:{lat}:{lon}", get_place_latlon, response_model=Place), APIRoute("/places/{id}", get_place, response_model=Place), # Categories diff --git a/idunn/api/utils.py b/idunn/api/utils.py deleted file mode 100644 index ff12281d9..000000000 --- a/idunn/api/utils.py +++ /dev/null @@ -1,172 +0,0 @@ -from enum import Enum -import os -import logging -from idunn.blocks import ( - Weather, - ContactBlock, - DescriptionEvent, - GradesBlock, - ImagesBlock, - InformationBlock, - OpeningDayEvent, - OpeningHourBlock, - Covid19Block, - PhoneBlock, - RecyclingBlock, - WebSiteBlock, - TransactionalBlock, - SocialBlock, - DescriptionBlock, - DeliveryBlock, - StarsBlock, -) -from idunn.utils.settings import _load_yaml_file -from idunn.datasources.mimirsbrunn import MimirPoiFilter - -logger = logging.getLogger(__name__) - - -class Type(str, Enum): - # pylint: disable=invalid-name - # City = "city" # this field is available in Bragi but deprecated - House = "house" - Poi = "poi" - StopArea = "public_transport:stop_area" - Street = "street" - Zone = "zone" - - -def get_categories(): - categories_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../utils/categories.yml" - ) - return _load_yaml_file(categories_path)["categories"] - - -ALL_CATEGORIES = get_categories() - - -class CategoryEnum(str): - """ - Methods defining the behavior of the enum `Category` defined bellow. - """ - - def match_brand(self): - return ALL_CATEGORIES[self].get("match_brand", False) - - def pj_what(self): - return ALL_CATEGORIES[self].get("pj_what") - - def raw_filters(self) -> [MimirPoiFilter]: - raw_filters = ALL_CATEGORIES[self].get("raw_filters") - filters = [] - for f in raw_filters: - f = f.copy() - poi_class = f.pop("class", None) - poi_subclass = f.pop("subclass", None) - filters.append(MimirPoiFilter(poi_class, poi_subclass, extra=f)) - return filters - - def regex(self): - return ALL_CATEGORIES[self].get("regex") - - -# Load the list of categories as an enum for validation purpose -Category = Enum("Category", {cat: cat for cat in ALL_CATEGORIES}, type=CategoryEnum) - - -class Verbosity(str, Enum): - """ - Control the verbosity of the output. - """ - - LONG = "long" - SHORT = "short" - LIST = "list" - - @classmethod - def default(cls): - return cls.LONG - - @classmethod - def default_list(cls): - return cls.LIST - - -BLOCKS_BY_VERBOSITY = { - Verbosity.LONG: [ - Weather, - OpeningDayEvent, - DescriptionEvent, - OpeningHourBlock, - Covid19Block, - PhoneBlock, - InformationBlock, - WebSiteBlock, - ContactBlock, - ImagesBlock, - GradesBlock, - RecyclingBlock, - TransactionalBlock, - SocialBlock, - DescriptionBlock, - DeliveryBlock, - StarsBlock, - ], - Verbosity.LIST: [ - OpeningDayEvent, - DescriptionEvent, - OpeningHourBlock, - Covid19Block, - PhoneBlock, - WebSiteBlock, - ImagesBlock, - GradesBlock, - RecyclingBlock, - TransactionalBlock, - SocialBlock, - DeliveryBlock, - StarsBlock, - ], - Verbosity.SHORT: [OpeningHourBlock, Covid19Block], -} - - -def build_blocks(es_poi, lang, verbosity): - """Returns the list of blocks we want - depending on the verbosity. - """ - blocks = [] - for c in BLOCKS_BY_VERBOSITY[verbosity]: - if not c.is_enabled(): - continue - block = c.from_es(es_poi, lang) - if block is not None: - blocks.append(block) - return blocks - - -def get_name(properties, lang): - """ - Return the Place name from the properties field of the elastic response. Here 'name' - corresponds to the POI name in the language of the user request (i.e. 'name:{lang}' field). - - If lang is None or if name:lang is not in the properties then name receives the local name - value. - - >>> get_name({}, 'fr') is None - True - - >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, None) - 'spontini' - - >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, 'cz') - 'spontini' - - >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, 'fr') - 'spontinifr' - """ - name = properties.get(f"name:{lang}") - if name is None: - name = properties.get("name") - return name diff --git a/idunn/datasources/__init__.py b/idunn/datasources/__init__.py index e69de29bb..b1d236450 100644 --- a/idunn/datasources/__init__.py +++ b/idunn/datasources/__init__.py @@ -0,0 +1,13 @@ +import logging +from abc import ABC, abstractmethod + +logger = logging.getLogger(__name__) + + +class Datasource(ABC): + def __init__(self): + pass + + @abstractmethod + async def get_places_bbox(self, params) -> list: + """Get places within a given Bbox""" diff --git a/idunn/datasources/osm.py b/idunn/datasources/osm.py new file mode 100644 index 000000000..82e3c317c --- /dev/null +++ b/idunn/datasources/osm.py @@ -0,0 +1,34 @@ +import logging + +from starlette.concurrency import run_in_threadpool + +from idunn.datasources import Datasource +from idunn.datasources.mimirsbrunn import fetch_es_pois, MimirPoiFilter +from idunn.geocoder.bragi_client import bragi_client +from idunn.places import BragiPOI, POI + +logger = logging.getLogger(__name__) + + +class Osm(Datasource): + async def get_places_bbox(self, params) -> list: + """Get places within a given Bbox""" + if params.q: + # Default source (OSM) with query + bragi_response = await bragi_client.pois_query_in_bbox( + query=params.q, bbox=params.bbox, lang=params.lang, limit=params.size + ) + return [BragiPOI(f) for f in bragi_response.get("features", [])] + + if params.raw_filter: + filters = [MimirPoiFilter.from_url_raw_filter(f) for f in params.raw_filter] + else: + filters = [f for c in params.category for f in c.raw_filters()] + bbox_places = await run_in_threadpool( + fetch_es_pois, + "poi", + filters=filters, + bbox=params.bbox, + max_size=params.size, + ) + return [POI(p["_source"]) for p in bbox_places] diff --git a/idunn/datasources/pages_jaunes.py b/idunn/datasources/pages_jaunes.py index e77541f07..6d9e8d08e 100644 --- a/idunn/datasources/pages_jaunes.py +++ b/idunn/datasources/pages_jaunes.py @@ -4,13 +4,15 @@ import requests from requests import HTTPError as RequestsHTTPError +from starlette.concurrency import run_in_threadpool from idunn import settings -from idunn.api.utils import CategoryEnum +from idunn.datasources import Datasource from idunn.places.exceptions import PlaceNotFound from idunn.places.models import pj_info, pj_find from idunn.places.pj_poi import PjApiPOI from idunn.utils.auth_session import AuthSession +from idunn.utils.category import CategoryEnum from idunn.utils.geometry import bbox_inside_polygon, france_polygon logger = logging.getLogger(__name__) @@ -28,7 +30,7 @@ def get_authorization_params(self): } -class ApiPjSource: +class PagesJaunes(Datasource): PLACE_ID_NAMESPACE = "pj" PJ_RESULT_MAX_SIZE = 30 PJ_INFO_API_URL = "https://api.pagesjaunes.fr/v1/pros" @@ -47,7 +49,7 @@ def __init__(self): @staticmethod def format_where(bbox): """ - >>> ApiPjSource.format_where([2e-5,-0.5,2,0.5]) + >>> PagesJaunes.format_where([2e-5,-0.5,2,0.5]) 'gZ0.000020,-0.500000,2.000000,0.500000' """ @@ -103,7 +105,16 @@ def search_places(self, query: str, place_in_query: bool, size=10) -> List[PjApi self.PJ_FIND_API_URL, query_params, size, ignore_status=(400,) ) - def get_places_bbox( + async def get_places_bbox(self, params) -> List[PjApiPOI]: + return await run_in_threadpool( + self.fetch_places_bbox, + params.category, + params.bbox, + size=params.size, + query=params.q, + ) + + def fetch_places_bbox( self, categories: List[CategoryEnum], bbox, size=10, query="" ) -> List[PjApiPOI]: query_params = { @@ -149,4 +160,4 @@ def get_place(self, poi_id) -> PjApiPOI: raise -pj_source = ApiPjSource() +pj_source = PagesJaunes() diff --git a/idunn/datasources/tripadvisor.py b/idunn/datasources/tripadvisor.py index e5fbe40e4..4e77adaad 100644 --- a/idunn/datasources/tripadvisor.py +++ b/idunn/datasources/tripadvisor.py @@ -4,21 +4,42 @@ import httpx import pydantic from fastapi import HTTPException +from starlette.concurrency import run_in_threadpool from idunn import settings +from idunn.datasources import Datasource +from idunn.datasources.mimirsbrunn import MimirPoiFilter, fetch_es_pois +from idunn.places import POI logger = logging.getLogger(__name__) TA_API_BASE_URL = "https://api.tripadvisor.com/api/partner/3.0/synmeta-pricing" -class TripAdvisorAPI: +class Tripadvisor(Datasource): TA_API_TIMEOUT = float(settings.get("TA_API_TIMEOUT")) def __init__(self): super().__init__() self.client = httpx.AsyncClient(verify=settings["VERIFY_HTTPS"]) + async def get_places_bbox(self, params) -> list: + """Get places within a given Bbox""" + # Default source (OSM) with category or class/subclass filters + if params.raw_filter: + filters = [MimirPoiFilter.from_url_raw_filter(f) for f in params.raw_filter] + else: + filters = [f for c in params.category for f in c.raw_filters()] + bbox_places = await run_in_threadpool( + fetch_es_pois, + "poi_tripadvisor", + filters=filters, + bbox=params.bbox, + max_size=params.size, + ) + + return [POI(p["_source"]) for p in bbox_places] + async def get_hotel_pricing_by_hotel_id(self, params=None): try: response = await self.client.get( @@ -62,4 +83,4 @@ def cleanup_empty_params(d): return d # For convenience -tripadvisor_api = TripAdvisorAPI() +tripadvisor_api = Tripadvisor() diff --git a/idunn/geocoder/models/params.py b/idunn/geocoder/models/params.py index 243ccd940..837f12942 100644 --- a/idunn/geocoder/models/params.py +++ b/idunn/geocoder/models/params.py @@ -3,6 +3,8 @@ https://github.com/CanalTP/mimirsbrunn/blob/master/libs/bragi/src/routes/autocomplete.rs#L59 """ import json +from enum import Enum + from geojson_pydantic.features import Feature from typing import List, Optional from fastapi import Query @@ -10,7 +12,6 @@ from pydantic.dataclasses import dataclass from idunn import settings -from idunn.api.utils import Type from idunn.utils.math import with_precision from .cosmogony import ZoneType @@ -19,6 +20,16 @@ FOCUS_DECAY = float(settings["FOCUS_DECAY"]) +class Type(str, Enum): + # pylint: disable=invalid-name + # City = "city" # this field is available in Bragi but deprecated + House = "house" + Poi = "poi" + StopArea = "public_transport:stop_area" + Street = "street" + Zone = "zone" + + @dataclass class QueryParams: q: str = Query(..., title="Query string") diff --git a/idunn/geocoder/nlu_client.py b/idunn/geocoder/nlu_client.py index d03981200..750a0ee65 100644 --- a/idunn/geocoder/nlu_client.py +++ b/idunn/geocoder/nlu_client.py @@ -6,7 +6,6 @@ from unidecode import unidecode from idunn.api.places_list import MAX_HEIGHT, MAX_WIDTH -from idunn.api.utils import Category from idunn.geocoder.models.params import QueryParams as GeocoderParams from idunn import settings from idunn.utils.circuit_breaker import IdunnCircuitBreaker @@ -14,6 +13,7 @@ from .models.geocodejson import Intention, IntentionType from .bragi_client import bragi_client +from ..utils.category import Category logger = logging.getLogger(__name__) result_filter = ResultFilter() diff --git a/idunn/places/__init__.py b/idunn/places/__init__.py index 26714b0c7..2c105fd1d 100644 --- a/idunn/places/__init__.py +++ b/idunn/places/__init__.py @@ -6,5 +6,3 @@ from .pj_poi import PjApiPOI from .latlon import Latlon from .event import Event - -from .utils import place_from_id diff --git a/idunn/places/base.py b/idunn/places/base.py index 37e60e08c..7b42ca194 100644 --- a/idunn/places/base.py +++ b/idunn/places/base.py @@ -3,12 +3,10 @@ from geopy import Point from pytz import timezone, UTC from typing import Optional, Union - -from idunn.api.utils import Verbosity, build_blocks from idunn.datasources.wiki_es import wiki_es from idunn.utils import maps_urls, tz from .place import Place, PlaceMeta - +from ..utils.verbosity import build_blocks, Verbosity logger = logging.getLogger(__name__) diff --git a/idunn/places/poi.py b/idunn/places/poi.py index d24689074..31fd2985f 100644 --- a/idunn/places/poi.py +++ b/idunn/places/poi.py @@ -3,7 +3,6 @@ from .base import BasePlace from idunn import settings -from idunn.api.utils import get_name from idunn.api.constants import PoiSource OSM_CONTRIBUTION_HASHTAGS = settings["OSM_CONTRIBUTION_HASHTAGS"] @@ -22,7 +21,29 @@ def get_local_name(self): return self.properties.get("name", "") def get_name(self, lang): - return get_name(self.properties, lang) + """ + Return the Place name from the properties field of the elastic response. Here 'name' + corresponds to the POI name in the language of the user request (i.e. 'name:{lang}' field). + + If lang is None or if name:lang is not in the properties then name receives the local name + value. + + >>> get_name({}, 'fr') is None + True + + >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, None) + 'spontini' + + >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, 'cz') + 'spontini' + + >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, 'fr') + 'spontinifr' + """ + name = self.properties.get(f"name:{lang}") + if name is None: + name = self.properties.get("name") + return name def get_class_name(self): return self.properties.get("poi_class") @@ -82,3 +103,29 @@ def get_postcodes(self): def get_country_codes(self): return [c.upper() for c in self.get_raw_address().get("country_codes") or []] + + +def get_name(properties, lang): + """ + Return the Place name from the properties field of the elastic response. Here 'name' + corresponds to the POI name in the language of the user request (i.e. 'name:{lang}' field). + + If lang is None or if name:lang is not in the properties then name receives the local name + value. + + >>> get_name({}, 'fr') is None + True + + >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, None) + 'spontini' + + >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, 'cz') + 'spontini' + + >>> get_name({'name':'spontini', 'name:en':'spontinien', 'name:fr':'spontinifr'}, 'fr') + 'spontinifr' + """ + name = properties.get(f"name:{lang}") + if name is None: + name = properties.get("name") + return name diff --git a/idunn/utils/categories.yml b/idunn/utils/categories.yml index df0380d24..5062fb82e 100644 --- a/idunn/utils/categories.yml +++ b/idunn/utils/categories.yml @@ -390,11 +390,12 @@ categories: - class: "beer" pj_what: "bar" +# we don't want hotel from PagesJaunes because of Tripadvisor hotel: regex: "hotel" raw_filters: - class: "lodging" - pj_what: "hôtels" + - class: "hotel" historic: raw_filters: diff --git a/idunn/utils/category.py b/idunn/utils/category.py new file mode 100644 index 000000000..f8b25945e --- /dev/null +++ b/idunn/utils/category.py @@ -0,0 +1,44 @@ +import os +from enum import Enum + +from idunn.datasources.mimirsbrunn import MimirPoiFilter +from idunn.utils.settings import _load_yaml_file + + +def get_categories(): + categories_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "../utils/categories.yml" + ) + return _load_yaml_file(categories_path)["categories"] + + +ALL_CATEGORIES = get_categories() + + +class CategoryEnum(str): + """ + Methods defining the behavior of the enum `Category` defined bellow. + """ + + def match_brand(self): + return ALL_CATEGORIES[self].get("match_brand", False) + + def pj_what(self): + return ALL_CATEGORIES[self].get("pj_what") + + def raw_filters(self) -> [MimirPoiFilter]: + raw_filters = ALL_CATEGORIES[self].get("raw_filters") + filters = [] + for f in raw_filters: + f = f.copy() + poi_class = f.pop("class", None) + poi_subclass = f.pop("subclass", None) + filters.append(MimirPoiFilter(poi_class, poi_subclass, extra=f)) + return filters + + def regex(self): + return ALL_CATEGORIES[self].get("regex") + + +# Load the list of categories as an enum for validation purpose +Category = Enum("Category", {cat: cat for cat in ALL_CATEGORIES}, type=CategoryEnum) diff --git a/idunn/places/utils.py b/idunn/utils/place.py similarity index 88% rename from idunn/places/utils.py rename to idunn/utils/place.py index f0bdf1fae..e2575a31c 100644 --- a/idunn/places/utils.py +++ b/idunn/utils/place.py @@ -1,15 +1,9 @@ -from idunn.datasources.pages_jaunes import pj_source from idunn.datasources.mimirsbrunn import fetch_es_place, get_es_place_type from idunn.utils.es_wrapper import get_elasticsearch from idunn.utils import prometheus - - -from .admin import Admin -from .street import Street -from .address import Address -from .poi import POI -from .latlon import Latlon -from .exceptions import InvalidPlaceId, RedirectToPlaceId, PlaceNotFound +from ..datasources.pages_jaunes import pj_source +from ..places import Latlon, Admin, Address, POI, Street +from ..places.exceptions import InvalidPlaceId, PlaceNotFound, RedirectToPlaceId def place_from_id(id: str, type=None, follow_redirect=False): diff --git a/idunn/utils/verbosity.py b/idunn/utils/verbosity.py new file mode 100644 index 000000000..a790e580c --- /dev/null +++ b/idunn/utils/verbosity.py @@ -0,0 +1,92 @@ +from enum import Enum + +from idunn.blocks import ( + Weather, + ContactBlock, + DescriptionEvent, + GradesBlock, + ImagesBlock, + InformationBlock, + OpeningDayEvent, + OpeningHourBlock, + Covid19Block, + PhoneBlock, + RecyclingBlock, + WebSiteBlock, + TransactionalBlock, + SocialBlock, + DescriptionBlock, + DeliveryBlock, + StarsBlock, +) + + +class Verbosity(str, Enum): + """ + Control the verbosity of the output. + """ + + LONG = "long" + SHORT = "short" + LIST = "list" + + @classmethod + def default(cls): + return cls.LONG + + @classmethod + def default_list(cls): + return cls.LIST + + +BLOCKS_BY_VERBOSITY = { + Verbosity.LONG: [ + Weather, + OpeningDayEvent, + DescriptionEvent, + OpeningHourBlock, + Covid19Block, + PhoneBlock, + InformationBlock, + WebSiteBlock, + ContactBlock, + ImagesBlock, + GradesBlock, + RecyclingBlock, + TransactionalBlock, + SocialBlock, + DescriptionBlock, + DeliveryBlock, + StarsBlock, + ], + Verbosity.LIST: [ + OpeningDayEvent, + DescriptionEvent, + OpeningHourBlock, + Covid19Block, + PhoneBlock, + WebSiteBlock, + ImagesBlock, + GradesBlock, + RecyclingBlock, + TransactionalBlock, + SocialBlock, + DeliveryBlock, + StarsBlock, + ], + Verbosity.SHORT: [OpeningHourBlock, Covid19Block], +} + + +def build_blocks(es_poi, lang, verbosity): + """Returns the list of blocks we want + depending on the verbosity. + """ + blocks = [] + for c in BLOCKS_BY_VERBOSITY[verbosity]: + if not c.is_enabled(): + continue + block = c.from_es(es_poi, lang) + if block is not None: + blocks.append(block) + return blocks diff --git a/tests/conftest.py b/tests/conftest.py index c8f3f2797..10ea9ccfe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -90,6 +90,28 @@ def init_indices(mimir_client, wiki_client): ) mimir_client.indices.put_alias(name="munin", index="munin_poi") + mimir_client.indices.create( + index="munin_poi_tripadvisor", + mappings={ + "properties": { + "coord": {"type": "geo_point"}, + "weight": {"type": "float"}, + "poi_type.name": {"type": "text", "index_options": "docs", "analyzer": "word"}, + } + }, + settings={ + "analysis": { + "analyzer": { + "word": { + "filter": ["lowercase", "asciifolding"], + "type": "custom", + "tokenizer": "standard", + } + } + }, + }, + ) + mimir_client.indices.create( index="munin_addr", mappings={ @@ -142,6 +164,7 @@ def redis(docker_services): "street": "munin_street", "addr": "munin_addr", "poi": "munin_poi", + "poi_tripadvisor": "munin_poi_tripadvisor", } @@ -181,6 +204,8 @@ def load_all(mimir_client, init_indices): load_place("address_43_rue_de_paris.json", mimir_client, doc_type="addr") load_place("admin_dunkerque.json", mimir_client, doc_type="admin") load_place("admin_paris.json", mimir_client, doc_type="admin") + load_place("cinema_multiplexe.json", mimir_client, doc_type="poi_tripadvisor") + load_place("tripadvisor_hotel_suecka.json", mimir_client, doc_type="poi_tripadvisor") @pytest.fixture diff --git a/tests/fixtures/pj/__init__.py b/tests/fixtures/pj/__init__.py index f3b22d4f3..abe5aa3cf 100644 --- a/tests/fixtures/pj/__init__.py +++ b/tests/fixtures/pj/__init__.py @@ -4,15 +4,15 @@ import pytest -from idunn.datasources.pages_jaunes import ApiPjSource -from idunn.places import utils as places_utils +from idunn.datasources.pages_jaunes import PagesJaunes +from idunn.utils import place as places_utils from tests.utils import init_pj_source, override_settings def mock_pj_api(type_api: str, filename: str): api_result = json.load(open(os.path.join(os.path.dirname(__file__), filename))) updated_settings = {} - source_type = ApiPjSource + source_type = PagesJaunes with override_settings(updated_settings), init_pj_source(source_type): if type_api == "api": diff --git a/tests/fixtures/tripadvisor_hotel_suecka.json b/tests/fixtures/tripadvisor_hotel_suecka.json new file mode 100644 index 000000000..1802c45ae --- /dev/null +++ b/tests/fixtures/tripadvisor_hotel_suecka.json @@ -0,0 +1,403 @@ +{ + "id": "osm:way:63178753", + "label": "Bergrestaurant Suecka", + "name": "Bergrestaurant Suecka", + "coord": { + "lon": 2.3265827716099623, + "lat": 48.859917803575875 + }, + "administrative_regions": [ + { + "id": "admin:osm:relation:2188567", + "insee": "7510725", + "level": 10, + "label": "Quartier Saint-Thomas-d'Aquin (75007), Paris 7e Arrondissement, Paris, Île-de-France, France", + "name": "Quartier Saint-Thomas-d'Aquin", + "zip_codes": [ + "75007" + ], + "weight": 0.0, + "coord": { + "lon": 2.3255879694021946, + "lat": 48.85526260583396 + }, + "bbox": [ + 2.319015, + 48.8484202, + 2.3332665, + 48.8614751 + ], + "zone_type": "suburb", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:9521", + "insee": "75107", + "level": 9, + "label": "Paris 7e Arrondissement (75007), Paris, Île-de-France, France", + "name": "Paris 7e Arrondissement", + "zip_codes": [ + "75007" + ], + "weight": 0.0004035305800469838, + "coord": { + "lon": 2.3201953, + "lat": 48.8570281 + }, + "bbox": [ + 2.2898239, + 48.8459336, + 2.3332665, + 48.8637782 + ], + "zone_type": "city_district", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:7444", + "insee": "75056", + "level": 8, + "label": "Paris (75000-75116), Île-de-France, France", + "name": "Paris", + "zip_codes": [ + "75000", + "75001", + "75002", + "75003", + "75004", + "75005", + "75006", + "75007", + "75008", + "75009", + "75010", + "75011", + "75012", + "75013", + "75014", + "75015", + "75016", + "75017", + "75018", + "75019", + "75020", + "75116" + ], + "weight": 0.015618298409952111, + "coord": { + "lon": 2.3514992, + "lat": 48.8566101 + }, + "bbox": [ + 2.224122, + 48.8155755, + 2.4697602, + 48.902156 + ], + "zone_type": "city", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:71525", + "insee": "75", + "level": 6, + "label": "Paris, Île-de-France, France", + "name": "Paris", + "zip_codes": [], + "weight": 0.015618298409952111, + "coord": { + "lon": 2.3514992, + "lat": 48.8566101 + }, + "bbox": [ + 2.224122, + 48.8155755, + 2.4697602, + 48.902156 + ], + "zone_type": "state_district", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:8649", + "insee": "11", + "level": 4, + "label": "Île-de-France, France", + "name": "Île-de-France", + "zip_codes": [], + "weight": 0.08250229135889316, + "coord": { + "lon": 2.3514992, + "lat": 48.8566101 + }, + "bbox": [ + 1.4462445, + 48.1201456, + 3.5592208, + 49.241431 + ], + "zone_type": "state", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:2202162", + "insee": "", + "level": 2, + "label": "France", + "name": "France", + "zip_codes": [], + "weight": 0.015618298409952111, + "coord": { + "lon": 2.3514992, + "lat": 48.8566101 + }, + "bbox": [ + -5.4534286, + 41.2632185, + 9.8678344, + 51.268318 + ], + "zone_type": "country", + "parent_id": null, + "codes": [], + "country_codes": ["FR"] + } + ], + "weight": 1.0, + "zip_codes": [ + "75007" + ], + "poi_type": { + "id": "hotel", + "name": "class_hotel subclass_hotel" + }, + "properties": [ + { + "key": "addr:street", + "value": "Rue de la Légion d'Honneur" + }, + { + "key": "tourism", + "value": "hotel" + }, + { + "key": "importance", + "value": "international" + }, + { + "key": "name", + "value": "Bergrestaurant Suecka" + }, + { + "key": "name:en", + "value": "Bergrestaurant Suecka" + }, + { + "key": "name:fr", + "value": "Bergrestaurant Suecka" + }, + { + "key": "poi_subclass", + "value": "hotel" + }, + { + "key": "poi_class", + "value": "hotel" + } + ], + "address": { + "type": "addr", + "id": "addr_poi:osm:way:63178753", + "name": "1 Rue de la Légion d'Honneur", + "house_number": "1", + "street": { + "id": "street_poi:osm:way:63178753", + "name": "Rue de la Légion d'Honneur", + "administrative_regions": [ + { + "id": "admin:osm:relation:2188567", + "insee": "7510725", + "level": 10, + "label": "Quartier Saint-Thomas-d'Aquin (75007), Paris 7e Arrondissement, Paris, Île-de-France, France", + "name": "Quartier Saint-Thomas-d'Aquin", + "zip_codes": [ + "75007" + ], + "weight": 0.0, + "coord": { + "lon": 2.3255879694021946, + "lat": 48.85526260583396 + }, + "bbox": [ + 2.319015, + 48.8484202, + 2.3332665, + 48.8614751 + ], + "zone_type": "suburb", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:9521", + "insee": "75107", + "level": 9, + "label": "Paris 7e Arrondissement (75007), Paris, Île-de-France, France", + "name": "Paris 7e Arrondissement", + "zip_codes": [ + "75007" + ], + "weight": 0.0004035305800469838, + "coord": { + "lon": 2.3201953, + "lat": 48.8570281 + }, + "bbox": [ + 2.2898239, + 48.8459336, + 2.3332665, + 48.8637782 + ], + "zone_type": "city_district", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:7444", + "insee": "75056", + "level": 8, + "label": "Paris (75000-75116), Île-de-France, France", + "name": "Paris", + "zip_codes": [ + "75000", + "75001", + "75002", + "75003", + "75004", + "75005", + "75006", + "75007", + "75008", + "75009", + "75010", + "75011", + "75012", + "75013", + "75014", + "75015", + "75016", + "75017", + "75018", + "75019", + "75020", + "75116" + ], + "weight": 0.015618298409952111, + "coord": { + "lon": 2.3514992, + "lat": 48.8566101 + }, + "bbox": [ + 2.224122, + 48.8155755, + 2.4697602, + 48.902156 + ], + "zone_type": "city", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:71525", + "insee": "75", + "level": 6, + "label": "Paris, Île-de-France, France", + "name": "Paris", + "zip_codes": [], + "weight": 0.015618298409952111, + "coord": { + "lon": 2.3514992, + "lat": 48.8566101 + }, + "bbox": [ + 2.224122, + 48.8155755, + 2.4697602, + 48.902156 + ], + "zone_type": "state_district", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:8649", + "insee": "11", + "level": 4, + "label": "Île-de-France, France", + "name": "Île-de-France", + "zip_codes": [], + "weight": 0.08250229135889316, + "coord": { + "lon": 2.3514992, + "lat": 48.8566101 + }, + "bbox": [ + 1.4462445, + 48.1201456, + 3.5592208, + 49.241431 + ], + "zone_type": "state", + "parent_id": null, + "codes": [] + }, + { + "id": "admin:osm:relation:2202162", + "insee": "", + "level": 2, + "label": "France", + "name": "France", + "zip_codes": [], + "weight": 0.015618298409952111, + "coord": { + "lon": 2.3514992, + "lat": 48.8566101 + }, + "bbox": [ + -5.4534286, + 41.2632185, + 9.8678344, + 51.268318 + ], + "zone_type": "country", + "parent_id": null, + "codes": [], + "country_codes": ["FR"] + } + ], + "label": "Rue de la Légion d'Honneur (Paris)", + "weight": 0.015618298409952111, + "coord": { + "lon": 2.3265827716099623, + "lat": 48.859917803575875 + }, + "zip_codes": [ + "75007" + ] + }, + "label": "1 Rue de la Légion d'Honneur (Paris)", + "coord": { + "lon": 2.3265827716099623, + "lat": 48.859917803575875 + }, + "weight": 0.015618298409952111, + "zip_codes": [ + "75007" + ] + } +} diff --git a/tests/test_categories.py b/tests/test_categories.py index 35d21b01e..bc8b4e4c8 100644 --- a/tests/test_categories.py +++ b/tests/test_categories.py @@ -1,4 +1,3 @@ -import pytest from unittest.mock import ANY from app import app from fastapi.testclient import TestClient @@ -7,7 +6,6 @@ from .fixtures.pj import mock_pj_api_with_musee_picasso_short from .test_full import OH_BLOCK - BBOX_PARIS = "2.252876,48.819862,2.395707,48.891132" BBOX_BREST = "-4.807542,48.090743,-4.097541,48.800743" INVALID_BBOX_PARIS_LEFT_PERM_RIGHT = "2.395707,48.819862,2.252876,48.891132" @@ -15,7 +13,7 @@ @freeze_time("2018-06-14 8:30:00", tz_offset=2) -def test_bbox(): +def test_bbox_should_trigger_osm_sources_when_raw_filter_specified(): """ Test the bbox query. @@ -277,6 +275,37 @@ def test_bbox(): } +@freeze_time("2021-11-29 8:30:00", tz_offset=2) +def test_bbox_should_trigger_tripadvisor_sources_anywhere_on_hotel_category(): + client = TestClient(app) + + response = client.get(url=f"http://localhost/v1/places?bbox={BBOX_PARIS}&category=hotel") + + assert response.status_code == 200 + + resp = response.json() + + assert resp == { + "bbox": [2.326583, 48.859918, 2.326583, 48.859918], + "bbox_extended": False, + "places": [ + { + "address": ANY, + "blocks": [], + "class_name": "hotel", + "geometry": ANY, + "id": "osm:way:63178753", + "local_name": "Bergrestaurant Suecka", + "meta": ANY, + "name": "Bergrestaurant Suecka", + "subclass_name": "hotel", + "type": "poi", + } + ], + "source": "tripadvisor", + } + + @freeze_time("2018-06-14 8:30:00", tz_offset=2) def test_size_list(): """ @@ -582,9 +611,9 @@ def test_category_or_raw_filter(): assert resp == {"detail": "One of 'category', 'raw_filter' or 'q' parameter is required"} -def test_valid_category(): +def test_valid_category_that_trigger_tripadvisor_over_osm(): """ - Test a valid category filter which should fetch only one cinema in a bbox around Brest city. + Test a valid category filter which should fetch only one cinema in a bbox around Brest city with tripadvisor """ client = TestClient(app) @@ -595,7 +624,7 @@ def test_valid_category(): resp = response.json() assert resp == { - "source": "osm", + "source": "tripadvisor", "places": [ { "type": "poi", diff --git a/tests/utils.py b/tests/utils.py index 91f770595..e1da4d281 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -8,7 +8,7 @@ from idunn.api import places_list, instant_answer from idunn.datasources.recycling import recycling_client from idunn.datasources.wiki_es import WikiEs -from idunn.places import utils as places_utils +from idunn.utils import place as places_utils @contextmanager