diff --git a/pynsee/geodata/__init__.py b/pynsee/geodata/__init__.py index e7327835..1d21e582 100644 --- a/pynsee/geodata/__init__.py +++ b/pynsee/geodata/__init__.py @@ -4,4 +4,5 @@ from .get_geodata_list import get_geodata_list from .GeoFrDataFrame import GeoFrDataFrame + __all__ = ["get_geodata", "get_geodata_list", "GeoFrDataFrame"] diff --git a/pynsee/geodata/get_geodata.py b/pynsee/geodata/get_geodata.py index 8dbf4fdd..caaf552c 100644 --- a/pynsee/geodata/get_geodata.py +++ b/pynsee/geodata/get_geodata.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import warnings -from pynsee.geodata.GeoFrDataFrame import GeoFrDataFrame +from pynsee.geodata import GeoFrDataFrame from pynsee.geodata._get_geodata import _get_geodata diff --git a/pynsee/sirene/SireneDataFrame.py b/pynsee/sirene/SireneDataFrame.py index f180099c..884f8e9b 100644 --- a/pynsee/sirene/SireneDataFrame.py +++ b/pynsee/sirene/SireneDataFrame.py @@ -1,5 +1,44 @@ +import logging +import re +import requests +import warnings + +from functools import lru_cache +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +import numpy as np import pandas as pd +from tqdm import trange +from shapely.geometry import Point +from shapely.errors import ShapelyDeprecationWarning + +from pynsee.geodata import GeoFrDataFrame +from pynsee.sirene._get_location_openstreetmap import ( + _get_location_openstreetmap, +) + + +logger = logging.getLogger(__name__) + + +@lru_cache(maxsize=None) +def _warning_get_location(): + logger.warning( + "For at least one point, exact location has not been found, city " + "location has been given instead" + ) + + +@lru_cache(maxsize=None) +def _warning_OSM(): + logger.info( + "This function returns data made available by OpenStreetMap and its " + "contributors.\n" + "Please comply with Openstreetmap's Copyright and ODbL Licence" + ) + class SireneDataFrame(pd.DataFrame): """Class for handling dataframes built from INSEE SIRENE API's data""" @@ -11,4 +50,195 @@ def __init__(self, *args, **kwargs): def _constructor(self): return SireneDataFrame - from pynsee.sirene.get_location import get_location + def get_location(self, update=False): + """ + Get latitude and longitude from OpenStreetMap, add geometry column and + turn ``SireneDataframe`` into ``GeoFrDataFrame``. + + Args: + update (bool, optional): data is saved locally, set update=True to + trigger an update. Defaults to False. + + Notes: + If it fails to find the exact location, by default it returns the + location of the city. Whether the exact location has been found or + not is encoded in the `exact_location` column of the new + ``GeoFrDataFrame``. + + Examples: + >>> from pynsee.metadata import get_activity_list + >>> from pynsee.sirene import search_sirene + >>> # + >>> # Get activity list + >>> naf5 = get_activity_list('NAF5') + >>> # + >>> # Get alive legal entities belonging to the automotive industry + >>> df = search_sirene(variable = ["activitePrincipaleEtablissement"], + >>> pattern = ['29.10Z'], kind = 'siret') + >>> # + >>> # Keep businesses with more than 100 employees + >>> df = df.loc[df['effectifsMinEtablissement'] > 100] + >>> df = df.reset_index(drop=True) + >>> # + >>> # Get location + >>> df = df.get_location() + """ + _warning_OSM() + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", category=ShapelyDeprecationWarning) + + df = self.reset_index(drop=True) + + def clean(string): + if pd.isna(string): + cleaned = "" + else: + cleaned = string + return cleaned + + list_col = [ + "siret", + "numeroVoieEtablissement", + "typeVoieEtablissementLibelle", + "libelleVoieEtablissement", + "codePostalEtablissement", + "libelleCommuneEtablissement", + ] + + if set(list_col).issubset(df.columns): + list_location = [] + timeSleep = 1 + session = requests.Session() + retry = Retry(connect=3, backoff_factor=timeSleep) + adapter = HTTPAdapter(max_retries=retry) + session.mount("http://", adapter) + session.mount("https://", adapter) + + for i in trange(len(df.index), desc="Getting location"): + siret = clean(df.loc[i, "siret"]) + nb = clean(df.loc[i, "numeroVoieEtablissement"]) + street_type = clean( + df.loc[i, "typeVoieEtablissementLibelle"]) + street_name = clean(df.loc[i, "libelleVoieEtablissement"]) + + postal_code = clean(df.loc[i, "codePostalEtablissement"]) + city = clean(df.loc[i, "libelleCommuneEtablissement"]) + city = re.sub("[0-9]|EME", "", city) + + city = re.sub(" D ", " D'", re.sub(" L ", " L'", city)) + street_name = re.sub( + " D ", " D'", re.sub(" L ", " L'", street_name) + ) + street_type = re.sub( + " D ", " D'", re.sub(" L ", " L'", street_type) + ) + + list_var = [] + + variables = [ + nb, street_type, street_name, postal_code, city + ] + + for var in variables: + if var != "": + list_var += [re.sub(" ", "+", var)] + + query = "+".join(list_var) + + if query != "": + query += "+FRANCE" + + list_var_backup = [] + + for var in [postal_code, city]: + if var != "": + list_var_backup += [re.sub(" ", "+", var)] + + query_backup = "+".join(list_var_backup) + + if query_backup != "": + query_backup += "+FRANCE" + + exact_location = True + + try: + ( + lat, + lon, + category, + typeLoc, + importance, + ) = _get_location_openstreetmap( + query=query, session=session, update=update + ) + except Exception: + exact_location = False + + try: + ( + lat, + lon, + category, + typeLoc, + importance, + ) = _get_location_openstreetmap( + query=query_backup, session=session, + update=update + ) + importance = None + except Exception: + lat, lon, category, typeLoc, importance = ( + None, + None, + None, + None, + None, + ) + else: + _warning_get_location() + + df_location = pd.DataFrame({ + "siret": siret, + "latitude": lat, + "longitude": lon, + "category": category, + "crsCoord": "EPSG:4326", + "type": typeLoc, + "importance": importance, + "exact_location": exact_location, + }, + index=[0], + ) + + list_location.append(df_location) + + df_location = pd.concat(list_location) + df_location = df_location.reset_index(drop=True) + + sirene_df = pd.merge( + self, df_location, on="siret", how="left") + + sirene_df["latitude"] = pd.to_numeric(sirene_df["latitude"]) + sirene_df["longitude"] = pd.to_numeric(sirene_df["longitude"]) + list_points = [] + + for i in range(len(sirene_df.index)): + if (sirene_df.loc[i, "latitude"] is None) or np.isnan( + sirene_df.loc[i, "latitude"] + ): + list_points += [None] + else: + list_points += [ + Point( + sirene_df.loc[i, "longitude"], + sirene_df.loc[i, "latitude"], + ) + ] + + sirene_df["geometry"] = list_points + + return GeoFrDataFrame(sirene_df) + + return df diff --git a/pynsee/sirene/_get_location_openstreetmap.py b/pynsee/sirene/_get_location_openstreetmap.py index e91efa3e..8017e6e3 100644 --- a/pynsee/sirene/_get_location_openstreetmap.py +++ b/pynsee/sirene/_get_location_openstreetmap.py @@ -1,14 +1,20 @@ +import json import os -from pathlib import Path import requests + +from pathlib import Path from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry + import pandas as pd +from pynsee.utils._create_insee_folder import _create_insee_folder +from pynsee.utils._hash import _hash from pynsee.utils._make_dataframe_from_dict import _make_dataframe_from_dict +from pynsee.utils._warning_cached_data import _warning_cached_data -def _get_location_openstreetmap(query, session=None): +def _get_location_openstreetmap(query, session=None, update=False): if session is None: session = requests.Session() @@ -17,26 +23,38 @@ def _get_location_openstreetmap(query, session=None): session.mount("http://", adapter) session.mount("https://", adapter) - api_link = "https://nominatim.openstreetmap.org/search.php?q={}&format=jsonv2&limit=1".format( - query - ) - # api_link = 'https://nominatim.openstreetmap.org/search?q=ZONE+INDUSTRIELLE+54980+BATILLY+FRANCE&format=json&limit=1' + api_link = "https://nominatim.openstreetmap.org/search.php?" \ + f"q={query}&format=jsonv2&limit=1" + + insee_folder = _create_insee_folder() + filename = os.path.join(insee_folder, f"{_hash(api_link)}.json") try: home = str(Path.home()) user_agent = os.path.basename(home) - except: + except Exception: user_agent = "" headers = {"User-Agent": "python_package_pynsee_" + user_agent.replace("/", "")} try: proxies = {"http": os.environ["http_proxy"], "https": os.environ["https_proxy"]} - except: + except Exception: proxies = {"http": "", "https": ""} - results = session.get(api_link, proxies=proxies, headers=headers) - data = results.json() + data = None + + if update or not os.path.isfile(filename): + results = session.get(api_link, proxies=proxies, headers=headers) + data = results.json() + + with open(filename, "w") as f: + json.dump(data, f) + else: + with open(filename, "r") as f: + data = json.load(f) + + _warning_cached_data(filename) list_dataframe = [] diff --git a/pynsee/sirene/get_location.py b/pynsee/sirene/get_location.py deleted file mode 100644 index 2dbecd12..00000000 --- a/pynsee/sirene/get_location.py +++ /dev/null @@ -1,213 +0,0 @@ -import re -import pandas as pd -from tqdm import trange -import numpy as np -from functools import lru_cache -import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry -from shapely.geometry import Point -import warnings -from shapely.errors import ShapelyDeprecationWarning - -from pynsee.geodata.GeoFrDataFrame import GeoFrDataFrame -from pynsee.sirene._get_location_openstreetmap import ( - _get_location_openstreetmap, -) - -import logging - -logger = logging.getLogger(__name__) - - -@lru_cache(maxsize=None) -def _warning_get_location(): - logger.warning( - "For at least one point, exact location has not been found, city " - "location has been given instead" - ) - - -@lru_cache(maxsize=None) -def _warning_OSM(): - logger.info( - "This function returns data made available by OpenStreetMap and its " - "contributors.\n" - "Please comply with Openstreetmap's Copyright and ODbL Licence" - ) - - -def get_location(self): - """Get latitude and longitude from OpenStreetMap, add geometry column and turn SireneDataframe into GeoFrDataFrame - - Notes: - If it fails to find the exact location, by default it returns the location of the city. - - Examples: - >>> from pynsee.metadata import get_activity_list - >>> from pynsee.sirene import search_sirene - >>> # - >>> # Get activity list - >>> naf5 = get_activity_list('NAF5') - >>> # - >>> # Get alive legal entities belonging to the automotive industry - >>> df = search_sirene(variable = ["activitePrincipaleEtablissement"], - >>> pattern = ['29.10Z'], kind = 'siret') - >>> # - >>> # Keep businesses with more than 100 employees - >>> df = df.loc[df['effectifsMinEtablissement'] > 100] - >>> df = df.reset_index(drop=True) - >>> # - >>> # Get location - >>> df = df.get_location() - """ - - _warning_OSM() - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) - - df = self.reset_index(drop=True) - - def clean(string): - if pd.isna(string): - cleaned = "" - else: - cleaned = string - return cleaned - - list_col = [ - "siret", - "numeroVoieEtablissement", - "typeVoieEtablissementLibelle", - "libelleVoieEtablissement", - "codePostalEtablissement", - "libelleCommuneEtablissement", - ] - - if set(list_col).issubset(df.columns): - list_location = [] - timeSleep = 1 - session = requests.Session() - retry = Retry(connect=3, backoff_factor=timeSleep) - adapter = HTTPAdapter(max_retries=retry) - session.mount("http://", adapter) - session.mount("https://", adapter) - - for i in trange(len(df.index), desc="Getting location"): - siret = clean(df.loc[i, "siret"]) - nb = clean(df.loc[i, "numeroVoieEtablissement"]) - street_type = clean(df.loc[i, "typeVoieEtablissementLibelle"]) - street_name = clean(df.loc[i, "libelleVoieEtablissement"]) - - postal_code = clean(df.loc[i, "codePostalEtablissement"]) - city = clean(df.loc[i, "libelleCommuneEtablissement"]) - city = re.sub("[0-9]|EME", "", city) - - city = re.sub(" D ", " D'", re.sub(" L ", " L'", city)) - street_name = re.sub( - " D ", " D'", re.sub(" L ", " L'", street_name) - ) - street_type = re.sub( - " D ", " D'", re.sub(" L ", " L'", street_type) - ) - - list_var = [] - for var in [nb, street_type, street_name, postal_code, city]: - if var != "": - list_var += [re.sub(" ", "+", var)] - - query = "+".join(list_var) - if query != "": - query += "+FRANCE" - - list_var_backup = [] - for var in [postal_code, city]: - if var != "": - list_var_backup += [re.sub(" ", "+", var)] - - query_backup = "+".join(list_var_backup) - if query_backup != "": - query_backup += "+FRANCE" - - try: - ( - lat, - lon, - category, - typeLoc, - importance, - ) = _get_location_openstreetmap( - query=query, session=session - ) - exact_location = True - except: - try: - ( - lat, - lon, - category, - typeLoc, - importance, - ) = _get_location_openstreetmap( - query=query_backup, session=session - ) - importance = None - exact_location = False - except: - lat, lon, category, typeLoc, importance = ( - None, - None, - None, - None, - None, - ) - exact_location = False - else: - _warning_get_location() - - df_location = pd.DataFrame( - { - "siret": siret, - "latitude": lat, - "longitude": lon, - "category": category, - "crsCoord": "EPSG:4326", - "type": typeLoc, - "importance": importance, - 'exact_location': exact_location, - }, - index=[0], - ) - - list_location.append(df_location) - - df_location = pd.concat(list_location) - df_location = df_location.reset_index(drop=True) - - sirene_df = pd.merge(self, df_location, on="siret", how="left") - - sirene_df["latitude"] = pd.to_numeric(sirene_df["latitude"]) - sirene_df["longitude"] = pd.to_numeric(sirene_df["longitude"]) - list_points = [] - - for i in range(len(sirene_df.index)): - if (sirene_df.loc[i, "latitude"] is None) or np.isnan( - sirene_df.loc[i, "latitude"] - ): - list_points += [None] - else: - list_points += [ - Point( - sirene_df.loc[i, "longitude"], - sirene_df.loc[i, "latitude"], - ) - ] - - sirene_df["geometry"] = list_points - - GeoDF = GeoFrDataFrame(sirene_df) - - return GeoDF - else: - return df diff --git a/tests/geodata/test_pynsee_geodata.py b/tests/geodata/test_pynsee_geodata.py index 89e58397..86a55b6c 100644 --- a/tests/geodata/test_pynsee_geodata.py +++ b/tests/geodata/test_pynsee_geodata.py @@ -7,13 +7,11 @@ import requests import unittest -from shapely.geometry import Polygon, MultiPolygon, MultiLineString, MultiPoint, Point +from shapely.geometry import Polygon, MultiPolygon, MultiPoint, Point -from pynsee.geodata.get_geodata_list import get_geodata_list -from pynsee.geodata.get_geodata import get_geodata +from pynsee.geodata import GeoFrDataFrame, get_geodata, get_geodata_list from pynsee.geodata._get_geodata import _get_geodata from pynsee.geodata._get_bbox_list import _get_bbox_list -from pynsee.geodata.GeoFrDataFrame import GeoFrDataFrame from pynsee.geodata._get_data_with_bbox import _get_data_with_bbox, _set_global_var from pynsee.geodata._get_geodata_with_backup import _get_geodata_with_backup @@ -21,48 +19,49 @@ # coverage run -m unittest tests/geodata/test_pynsee_geodata.py # coverage report --omit=*/utils/*,*/macrodata/*,*/localdata/*,*/download/*,*/sirene/*,*/metadata/* -m + class TestFunction(TestCase): version_3_7 = (sys.version_info[0] == 3) & (sys.version_info[1] == 7) if version_3_7 is False: - + def test_get_geodata_with_backup(self): df = _get_geodata_with_backup("ADMINEXPRESS-COG.LATEST:departement") - self.assertTrue(isinstance(df, pd.DataFrame)) - + self.assertTrue(isinstance(df, pd.DataFrame)) + def test_get_geodata_short(self): - + global session session = requests.Session() list_bbox = (-2, 43.0, 6.0, 44.5) for crs in ["EPSG:4326"]: link= f"https://wxs.ign.fr/administratif/geoportail/wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&TYPENAME=ADMINEXPRESS-COG-CARTO.LATEST:commune&srsName={crs}&OUTPUTFORMAT=application/json&COUNT=1000" data = _get_data_with_bbox(link, list_bbox, crsPolygon=crs) - self.assertTrue(isinstance(data, pd.DataFrame)) - + self.assertTrue(isinstance(data, pd.DataFrame)) + square = [Point(0, 0), - Point(0, 0), - Point(0, 0), - Point(0, 0)] - + Point(0, 0), + Point(0, 0), + Point(0, 0)] + poly_bbox = Polygon([[p.x, p.y] for p in square]) df = _get_geodata(id = 'ADMINEXPRESS-COG-CARTO.LATEST:commune', polygon = poly_bbox, update=True) - self.assertTrue(isinstance(df, pd.DataFrame)) - + self.assertTrue(isinstance(df, pd.DataFrame)) + _set_global_var(args=[link, list_bbox, session, "EPSG:4326"]) - + df = get_geodata_list(update=True) self.assertTrue(isinstance(df, pd.DataFrame)) - - chflieu = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:chflieu_commune', update=True) + + chflieu = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:chflieu_commune', update=True) self.assertTrue(isinstance(chflieu, GeoFrDataFrame)) geo = chflieu.get_geom() self.assertTrue(isinstance(geo, MultiPoint)) geo_chflieut = chflieu.translate().zoom().get_geom() - self.assertTrue(isinstance(geo_chflieut, MultiPoint)) + self.assertTrue(isinstance(geo_chflieut, MultiPoint)) - com = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True) + com = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True) self.assertTrue(isinstance(com, GeoFrDataFrame)) geo = com.get_geom() self.assertTrue(isinstance(geo, MultiPolygon)) @@ -74,9 +73,9 @@ def test_get_geodata_short(self): geo29 = dep29.get_geom() self.assertTrue(isinstance(geo29, MultiPolygon)) - com29 = _get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True, polygon=geo29, crsPolygon="EPSG:4326") + com29 = _get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True, polygon=geo29, crsPolygon="EPSG:4326") self.assertTrue(isinstance(com29, pd.DataFrame)) - + # query with polygon and crs 3857 dep29 = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:departement', update=True, crs="EPSG:3857") dep29 = dep29[dep29["insee_dep"] == "29"] @@ -84,34 +83,34 @@ def test_get_geodata_short(self): geo29 = dep29.get_geom() self.assertTrue(isinstance(geo29, MultiPolygon)) - com29 = _get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True, polygon=geo29, crsPolygon="EPSG:3857") - self.assertTrue(isinstance(com29, pd.DataFrame)) + com29 = _get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True, polygon=geo29, crsPolygon="EPSG:3857") + self.assertTrue(isinstance(com29, pd.DataFrame)) ovdep = com.translate().zoom() self.assertTrue(isinstance(ovdep, GeoFrDataFrame)) geo_ovdep = ovdep.get_geom() self.assertTrue(isinstance(geo_ovdep, MultiPolygon)) - + #test _add_insee_dep_from_geodata - epci = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:epci', update=True) - self.assertTrue(isinstance(epci, GeoFrDataFrame)) + epci = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:epci', update=True) + self.assertTrue(isinstance(epci, GeoFrDataFrame)) epcit = epci.translate().zoom() self.assertTrue(isinstance(epcit, GeoFrDataFrame)) geo_epcit = epcit.get_geom() self.assertTrue(isinstance(geo_epcit, MultiPolygon)) - + # test _add_insee_dep_region - reg = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:region', update=True) - self.assertTrue(isinstance(reg, GeoFrDataFrame)) + reg = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:region', update=True) + self.assertTrue(isinstance(reg, GeoFrDataFrame)) regt = reg.translate().zoom() self.assertTrue(isinstance(regt, GeoFrDataFrame)) geo_regt = regt.get_geom() - self.assertTrue(isinstance(geo_regt, MultiPolygon)) + self.assertTrue(isinstance(geo_regt, MultiPolygon)) dep = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:departement', crs="EPSG:4326") dep13 = dep[dep["insee_dep"] == "13"] - geo13 = dep13.get_geom() - + geo13 = dep13.get_geom() + bbox = _get_bbox_list(polygon=geo13, update=True, crsPolygon="EPSG:4326") self.assertTrue(isinstance(bbox, list)) bbox = _get_bbox_list(polygon=geo13) @@ -119,14 +118,14 @@ def test_get_geodata_short(self): dep = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:departement', crs="EPSG:3857") dep13 = dep[dep["insee_dep"] == "13"] - geo13 = dep13.get_geom() - + geo13 = dep13.get_geom() + bbox = _get_bbox_list(polygon=geo13, update=True, crsPolygon="EPSG:3857") self.assertTrue(isinstance(bbox, list)) - - data = get_geodata(id='test', update=True) + + data = get_geodata(id='test', update=True) self.assertTrue(isinstance(data, pd.DataFrame)) - + if __name__ == '__main__': unittest.main() - #python test_pynsee_geodata.py \ No newline at end of file + #python test_pynsee_geodata.py diff --git a/tests/sirene/test_pynsee_sirene.py b/tests/sirene/test_pynsee_sirene.py index 65cfb779..14e1b8bc 100644 --- a/tests/sirene/test_pynsee_sirene.py +++ b/tests/sirene/test_pynsee_sirene.py @@ -2,18 +2,16 @@ from unittest import TestCase from pandas import pandas as pd -import numpy as np import sys -from shapely.geometry import Point, Polygon, MultiPolygon, LineString, MultiLineString, MultiPoint +from shapely.geometry import ( + Point, Polygon, MultiPolygon, LineString, MultiLineString, MultiPoint) -from pynsee.sirene.get_sirene_data import get_sirene_data -from pynsee.sirene.search_sirene import search_sirene +from pynsee.geodata import GeoFrDataFrame +from pynsee.sirene import ( + SireneDataFrame, get_dimension_list, get_sirene_data, + get_sirene_relatives, search_sirene) from pynsee.sirene._request_sirene import _request_sirene -from pynsee.sirene.get_dimension_list import get_dimension_list -from pynsee.sirene.SireneDataFrame import SireneDataFrame -from pynsee.geodata.GeoFrDataFrame import GeoFrDataFrame -from pynsee.sirene.get_sirene_relatives import get_sirene_relatives class TestFunction(TestCase): @@ -21,28 +19,28 @@ class TestFunction(TestCase): version_3_7 = (sys.version_info[0] == 3) & (sys.version_info[1] == 7) if version_3_7: - + def test_get_sirene_relatives(self): test = True df = get_sirene_relatives('00555008200027') test = test & isinstance(df, SireneDataFrame) - + df = get_sirene_relatives(['39860733300059', '00555008200027']) test = test & isinstance(df, SireneDataFrame) - + df = get_sirene_relatives(['39860733300059', '1']) test = test & isinstance(df, SireneDataFrame) - + self.assertTrue(test) - + def test_error_get_relatives1(self): with self.assertRaises(ValueError): get_sirene_relatives(1) - + def test_error_get_relatives2(self): with self.assertRaises(ValueError): - get_sirene_relatives('0') - + get_sirene_relatives('0') + def test_error_get_relatives(self): with self.assertRaises(ValueError): get_sirene_relatives('0') @@ -62,8 +60,8 @@ def test_get_dimension_list(self): test = test & isinstance(df, pd.DataFrame) self.assertTrue(test) - - + + def test_error_get_dimension_list(self): with self.assertRaises(ValueError): @@ -71,11 +69,11 @@ def test_error_get_dimension_list(self): def test_get_location(self): df = search_sirene(variable=["activitePrincipaleEtablissement"], - pattern=['29.10Z'], kind='siret') + pattern=['29.10Z'], kind='siret') test = True test = test & isinstance(df, SireneDataFrame) - + df = search_sirene(variable="activitePrincipaleEtablissement", pattern='29.10Z', kind='siret') df = df.loc[df['effectifsMinEtablissement'] > 100] @@ -84,10 +82,10 @@ def test_get_location(self): sirdf = df.get_location() test = test & isinstance(sirdf, GeoFrDataFrame) - geo = sirdf.get_geom() - test = test & (type(geo) in [Point, Polygon, MultiPolygon, + geo = sirdf.get_geom() + test = test & (type(geo) in [Point, Polygon, MultiPolygon, LineString, MultiLineString, MultiPoint]) - + self.assertTrue(test) def test_get_sirene_data(self): @@ -96,7 +94,7 @@ def test_get_sirene_data(self): test = isinstance(df1, pd.DataFrame) & isinstance( df2, pd.DataFrame) self.assertTrue(test) - + def test_error_get_sirene_data(self): with self.assertRaises(ValueError): get_sirene_data('1') @@ -147,7 +145,7 @@ def test_search_sirene(self): df = search_sirene(variable='libelleCommuneEtablissement', pattern="montrouge", kind="siret") test = test & isinstance(df, pd.DataFrame) - + df = search_sirene(variable=["denominationUniteLegale", 'categorieEntreprise'], pattern=["Pernod Ricard", 'GE'], phonetic_search=[True, False],