Skip to content

Commit

Permalink
Refacto datasources with a factory (#288)
Browse files Browse the repository at this point in the history
* simply add tripadvisor trigger logic to suggest

* add tests for tripadvisor

* first version refacto datasources with factory pattern

* unravel circular depedencies : multiple utils module file were shared between multiples modules -> I migrate then to the utils module in a "half-dirty" way

* improve factory and clean `get_name` function that was in utils but only used once

* use classic factory instead to make test success

* divide `get_places_bbox_impl` into sub-functions

* create two functions to differentiate france and worldwide behavior for the datasource selection

* fix review

Co-authored-by: sdirollo <sebastien.dirollo@gmail.com>
  • Loading branch information
sdrll and sdirollo committed Dec 2, 2021
1 parent 199c907 commit 9c76c82
Show file tree
Hide file tree
Showing 28 changed files with 840 additions and 311 deletions.
1 change: 1 addition & 0 deletions idunn/api/__init__.py
@@ -0,0 +1 @@
from .places_list import PlacesQueryParam
3 changes: 2 additions & 1 deletion idunn/api/categories.py
@@ -1,7 +1,8 @@
from .utils import Category
from pydantic import BaseModel, Field
from typing import List

from idunn.utils.category import Category


class CategoryDescription(BaseModel):
name: str = Field(..., description="Unique label of the category.")
Expand Down
2 changes: 1 addition & 1 deletion idunn/api/closest.py
Expand Up @@ -7,8 +7,8 @@
from idunn.utils.es_wrapper import get_elasticsearch
from idunn.utils import prometheus
from idunn.places import Street, Address, Place
from idunn.api.utils import Verbosity
from idunn.datasources.mimirsbrunn import fetch_closest, get_es_place_type
from idunn.utils.verbosity import Verbosity

logger = logging.getLogger(__name__)

Expand Down
1 change: 1 addition & 0 deletions idunn/api/constants.py
Expand Up @@ -4,6 +4,7 @@
class PoiSource(str, Enum):
OSM = "osm"
PAGESJAUNES = "pages_jaunes"
TRIPADVISOR = "tripadvisor"


ALL_POI_SOURCES = [s.value for s in PoiSource]
Expand Down
4 changes: 2 additions & 2 deletions idunn/api/directions.py
Expand Up @@ -2,11 +2,11 @@
from pydantic import confloat

from idunn import settings
from idunn.places import Latlon, place_from_id
from idunn.places import Latlon
from idunn.places.exceptions import IdunnPlaceError
from idunn.utils.rate_limiter import IdunnRateLimiter
from ..directions.client import directions_client

from ..utils.place import place_from_id

rate_limiter = IdunnRateLimiter(
resource="idunn.api.directions",
Expand Down
6 changes: 3 additions & 3 deletions idunn/api/instant_answer.py
Expand Up @@ -13,14 +13,15 @@
from idunn.geocoder.bragi_client import bragi_client
from idunn.geocoder.models import QueryParams
from idunn.geocoder.models.geocodejson import IntentionType
from idunn.places import place_from_id, Place
from idunn.places import Place
from idunn.api.places_list import get_places_bbox_impl, PlacesQueryParam
from idunn.utils import maps_urls
from idunn.utils.regions import get_region_lonlat
from idunn.utils.result_filter import ResultFilter
from idunn.instant_answer import normalize
from .constants import PoiSource
from .utils import Verbosity
from ..utils.place import place_from_id
from ..utils.verbosity import Verbosity

logger = logging.getLogger(__name__)
result_filter = ResultFilter()
Expand Down Expand Up @@ -163,7 +164,6 @@ async def get_instant_answer_intention(intention, query: str, lang: str):
verbosity=Verbosity.default_list(),
),
sort_by_distance=intention_around_point,
poi_es_index_name="poi",
)

places = places_bbox_response.places
Expand Down
5 changes: 3 additions & 2 deletions idunn/api/places.py
Expand Up @@ -9,14 +9,15 @@


from idunn import settings
from idunn.api.utils import Verbosity
from idunn.utils.es_wrapper import get_elasticsearch
from idunn.utils.covid19_dataset import covid19_osm_task
from idunn.places import Place, Latlon, place_from_id
from idunn.places import Place, Latlon
from idunn.places.base import BasePlace
from idunn.places.exceptions import PlaceNotFound
from idunn.places.exceptions import RedirectToPlaceId, InvalidPlaceId
from .closest import get_closest_place
from ..utils.place import place_from_id
from ..utils.verbosity import Verbosity

logger = logging.getLogger(__name__)

Expand Down
149 changes: 69 additions & 80 deletions idunn/api/places_list.py
Expand Up @@ -10,13 +10,13 @@
from shapely.geometry import MultiPoint, box

from idunn import settings
from idunn.places import POI, BragiPOI
from idunn.api.utils import Verbosity
from idunn.datasources.mimirsbrunn import fetch_es_pois, MimirPoiFilter
from idunn.datasources.pages_jaunes import pj_source
from idunn.geocoder.bragi_client import bragi_client
from idunn.datasources.pages_jaunes import pj_source, PagesJaunes
from .constants import PoiSource, ALL_POI_SOURCES
from .utils import Category
from ..datasources import Datasource
from ..datasources.osm import Osm
from ..datasources.tripadvisor import Tripadvisor
from ..utils.category import Category
from ..utils.verbosity import Verbosity

logger = logging.getLogger(__name__)

Expand All @@ -25,6 +25,8 @@
EXTENDED_BBOX_MAX_SIZE = float(
settings["LIST_PLACES_EXTENDED_BBOX_MAX_SIZE"]
) # max bbox width and height after second extended query
TRIPADVISOR_CATEGORIES_COVERED_WORLDWIDE = ["hotel", "leisure", "attraction", "restaurant"]
TRIPADVISOR_CATEGORIES_COVERED_IN_FRANCE = ["hotel", "leisure", "attraction"]


class CommonQueryParam(BaseModel):
Expand Down Expand Up @@ -152,60 +154,20 @@ async def get_places_bbox(
) -> PlacesBboxResponse:
"""Get all places in a bounding box."""
params = PlacesQueryParam(**locals())
return await get_places_bbox_impl(params, "poi")


# TODO: Just created to test tripadvisor api. To remove after tests
async def get_tripadvisor_places_bbox(
bbox: str = Query(
...,
title="Bounding box",
description="Format: left_lon,bottom_lat,right_lon,top_lat",
example="-4.56,48.35,-4.42,48.46",
),
category: List[Category] = Query([]),
raw_filter: Optional[List[str]] = Query(None),
source: Optional[str] = Query(None),
q: Optional[str] = Query(None, title="Query", description="Full text query"),
size: Optional[int] = Query(None),
lang: Optional[str] = Query(None),
verbosity: Verbosity = Verbosity.default_list(),
extend_bbox: bool = Query(False),
) -> PlacesBboxResponse:
"""Get all places in a bounding box."""
params = PlacesQueryParam(**locals())
return await get_places_bbox_impl(params, "poi_tripadvisor")
return await get_places_bbox_impl(params)


async def get_places_bbox_impl(
params: PlacesQueryParam,
poi_es_index_name: str,
sort_by_distance: Optional[Point] = None,
) -> PlacesBboxResponse:
source = params.source
if source is None:
if (
params.q or (params.category and all(c.pj_what() for c in params.category))
) and pj_source.bbox_is_covered(params.bbox):
params.source = PoiSource.PAGESJAUNES
else:
params.source = PoiSource.OSM

places_list = await _fetch_places_list(params, poi_es_index_name)
bbox_extended = False
if params.source is None:
select_datasource(params)

places_list = await _fetch_places_list(params)
bbox_extended = False
if params.extend_bbox and len(places_list) == 0:
original_bbox = params.bbox
original_bbox_width = original_bbox[2] - original_bbox[0]
original_bbox_height = original_bbox[3] - original_bbox[1]
original_bbox_size = max(original_bbox_height, original_bbox_width)
if original_bbox_size < EXTENDED_BBOX_MAX_SIZE:
# Compute extended bbox and fetch results a second time
scale_factor = EXTENDED_BBOX_MAX_SIZE / original_bbox_size
new_box = scale(box(*original_bbox), xfact=scale_factor, yfact=scale_factor)
params.bbox = new_box.bounds
bbox_extended = True
places_list = await _fetch_places_list(params, poi_es_index_name)
bbox_extended, places_list = await _fetch_extended_bbox(bbox_extended, params, places_list)

if len(places_list) == 0:
results_bbox = None
Expand All @@ -228,33 +190,60 @@ async def get_places_bbox_impl(
)


async def _fetch_places_list(params: PlacesQueryParam, poi_es_index_name: str):
if params.source == PoiSource.PAGESJAUNES:
return await run_in_threadpool(
pj_source.get_places_bbox,
params.category,
params.bbox,
size=params.size,
query=params.q,
)
if params.q:
# Default source (OSM) with query
bragi_response = await bragi_client.pois_query_in_bbox(
query=params.q, bbox=params.bbox, lang=params.lang, limit=params.size
)
return [BragiPOI(f) for f in bragi_response.get("features", [])]
def select_datasource(params):
if pj_source.bbox_is_covered(params.bbox):
select_datasource_for_france(params)
else:
select_datasource_outside_france(params)

# Default source (OSM) with category or class/subclass filters
if params.raw_filter:
filters = [MimirPoiFilter.from_url_raw_filter(f) for f in params.raw_filter]

def select_datasource_for_france(params):
if any(cat in params.category for cat in TRIPADVISOR_CATEGORIES_COVERED_IN_FRANCE):
params.source = PoiSource.TRIPADVISOR
elif is_brand_detected_or_pj_category_cover(params):
params.source = PoiSource.PAGESJAUNES
else:
filters = [f for c in params.category for f in c.raw_filters()]

bbox_places = await run_in_threadpool(
fetch_es_pois,
poi_es_index_name,
filters=filters,
bbox=params.bbox,
max_size=params.size,
)
return [POI(p["_source"]) for p in bbox_places]
params.source = PoiSource.OSM


def select_datasource_outside_france(params):
if any(cat in params.category for cat in TRIPADVISOR_CATEGORIES_COVERED_WORLDWIDE):
params.source = PoiSource.TRIPADVISOR
else:
params.source = PoiSource.OSM


def is_brand_detected_or_pj_category_cover(params):
return params.q or (params.category and all(c.pj_what() for c in params.category))


async def _fetch_extended_bbox(bbox_extended, params, places_list):
original_bbox = params.bbox
original_bbox_width = original_bbox[2] - original_bbox[0]
original_bbox_height = original_bbox[3] - original_bbox[1]
original_bbox_size = max(original_bbox_height, original_bbox_width)
if original_bbox_size < EXTENDED_BBOX_MAX_SIZE:
# Compute extended bbox and fetch results a second time
scale_factor = EXTENDED_BBOX_MAX_SIZE / original_bbox_size
new_box = scale(box(*original_bbox), xfact=scale_factor, yfact=scale_factor)
params.bbox = new_box.bounds
bbox_extended = True
places_list = await _fetch_places_list(params)
return bbox_extended, places_list


async def _fetch_places_list(params: PlacesQueryParam):
datasource = DatasourceFactory().get_datasource(params.source)
return await datasource.get_places_bbox(params)


class DatasourceFactory:
def get_datasource(self, source_type: PoiSource) -> Datasource:
"""Get the matching datasource to fetch POIs"""
if source_type == PoiSource.TRIPADVISOR:
return Tripadvisor()
if source_type == PoiSource.PAGESJAUNES:
return PagesJaunes()
if source_type == PoiSource.OSM:
return Osm()
raise ValueError("%s is not a valid source type", source_type)
15 changes: 1 addition & 14 deletions idunn/api/urls.py
Expand Up @@ -4,7 +4,7 @@
from .pois import get_poi
from .places import get_place, get_place_latlon
from .status import get_status
from .places_list import get_places_bbox, get_tripadvisor_places_bbox, PlacesBboxResponse
from .places_list import get_places_bbox, PlacesBboxResponse
from .categories import AllCategoriesResponse, get_all_categories
from .closest import closest_address
from ..directions.models import DirectionsResponse
Expand Down Expand Up @@ -55,19 +55,6 @@ def get_api_urls(settings):
response_model=PlacesBboxResponse,
responses={400: {"description": "Client Error in query params"}},
),
APIRoute(
"/tripadvisor_places",
get_tripadvisor_places_bbox,
dependencies=[
rate_limiter_dependency(
resource="idunn.get_tripadvisor_places_bbox",
max_requests=int(settings["LIST_PLACES_RL_MAX_REQUESTS"]),
expire=int(settings["LIST_PLACES_RL_EXPIRE"]),
)
],
response_model=PlacesBboxResponse,
responses={400: {"description": "Client Error in query params"}},
),
APIRoute("/places/latlon:{lat}:{lon}", get_place_latlon, response_model=Place),
APIRoute("/places/{id}", get_place, response_model=Place),
# Categories
Expand Down

0 comments on commit 9c76c82

Please sign in to comment.