def read(path):
    df = (
        pd.read_csv(path)
        # .dropna(how="all", axis=0)
        .rename(str.lower, axis="columns")
        .rename(lambda x: x.replace(" ", "_"), axis="columns")
        .pipe(
            fix_longlat,
            # columns with -1 as a NaN
            cols=[
                "junction_detail",
                "junction_control",
                "2nd_road_class",
                "pedestrian_crossing-human_control",
                "pedestrian_crossing-physical_facilities",
                "light_conditions",
                "road_surface_conditions",
                "special_conditions_at_site",
                "carriageway_hazards",
                "lsoa_of_accident_location",
            ],
        )
        .assign(
            month=lambda x: x["date"]
            .apply(pd.to_datetime, format="%Y-%m-%d")
            .apply(lambda x: x.month),
            day=lambda x: x["date"]
            .apply(pd.to_datetime, format="%Y-%m-%d")
            .apply(lambda x: x.day),
            hour=lambda x: x["time"]
            .apply(pd.to_datetime, format="%H:%M")
            .apply(lambda x: x.hour),
            minute=lambda x: x["time"]
            .apply(pd.to_datetime, format="%H:%M")
            .apply(lambda x: x.minute),
            latlong=lambda x: x[["latitude", "longitude"]].apply(tuple, axis=1),
        )
        .reset_index(drop=True)
    )

    return df


def fix_longlat(df, cols):
    df = df.copy()
    for col in cols:
        df[col] = df[col].replace({"-1": np.nan})

    return df

In [198]:
import pandas as pd
import numpy as np

from scipy.spatial.distance import cdist

In [177]:
def read(path, na=np.nan):
    df = (
        pd.read_csv(path, na_values=na)
        .dropna(how="all", axis=0)
        .rename(str.lower, axis="columns")
        .rename(lambda x: x.replace(" ", "_"), axis="columns")
    )

    return df

In [161]:
def fetch_locales(lat: float, lon: float, ndigits: int = 3):
    """Inventario de locales de la ciudad de Barcelona con el objectivo de identificar
        todos los locales en planta baja con actividad económica o sin actividad."""

    # create bounding box for coordinates
    add = round(1 * 10 ** -(ndigits), ndigits=ndigits + 1)
    lat = round(lat, ndigits=ndigits)
    lon = round(lon, ndigits=ndigits)

    lat_min = round(lat - add, ndigits=ndigits)
    lon_min = round(lon - add, ndigits=ndigits)

    lat_max = round(lat + add, ndigits=ndigits)
    lon_max = round(lon + add, ndigits=ndigits)

    # print(lat, lon, lat_min, lon_min, lat_max, lon_max)

    df = read("data/2016_cens_locals_plantabaixa.csv")
    df = df[["latitud", "longitud", "id_princip"]]

    df = df.loc[
        df["longitud"].between(lon_min, lon_max, inclusive=True)
        & df["latitud"].between(lat_min, lat_max, inclusive=True)
    ]

    try:
        active = df["id_princip"].value_counts()[1]
    except:
        active = 0
        
        
    # es mejor práctica capturar todos los posibles errores (abajo) pero asi es 100% seguro que tira
    #except KeyError:
    #    active = 0
    #except IndexError:
    #    active = 0

    try:
        inactive = df["id_princip"].value_counts()[0]
    except:
        inactive = 0
        
    # es mejor práctica capturar todos los posibles errores (abajo) pero asi es 100% seguro que tira

    #except KeyError:
    #    inactive = 0
    #except IndexError:
    #    inactive = 0

    return {
        "active": active,
        "inactive": inactive,
    }

In [162]:
for _ in range(1000):
    lat = np.random.uniform(locales.latitud.min(), locales.latitud.max())
    lon = np.random.uniform(locales.longitud.min(), locales.longitud.max())
    
    a = fetch_locales(lat, lon)
    print(dict(a))
    break

{'active': 10, 'inactive': 0}


In [199]:
def closest_point(point, points):
    """ Find closest point from a list of points. """
    return points[cdist([point], points).argmin()]

In [206]:
def fetch_qualitat(lat: float, lon: float, ndigits: int = 3):
    """Find closer station and fetch data.
    
        El valor que devuelve son el número de intervalos de 1h en los que
        la calidad del aire era X, por ej. 'qa_high': 450 significa que en ese
        mes han habido 450 intervalos de 1h en los que la calidad era alta."""

    # create bounding box for coordinates

    lat = round(lat, ndigits=ndigits)
    lon = round(lon, ndigits=ndigits)

    point = (lat, lon)

    points = list(zip(qa["latitud"], qa["longitud"]))

    closest = closest_point(point, points)

    # print(closest)

    df = read("data/2019_01_Gener_qualitat_aire_BCN.csv", na="--")
    df = df[["latitud", "longitud", "qualitat_aire"]]

    df = df.loc[(df["longitud"] == closest[1]) & (df["latitud"] == closest[0])]
    
    assert df.shape != (0,0)
    
    try:
        qa_bona = df["qualitat_aire"].value_counts()["Bona"]
    except:
        qa_bona = 0

    try:
        qa_regular = df["qualitat_aire"].value_counts()["Regular"]
    except:
        qa_regular = 0

    try:
        qa_pobre = df["qualitat_aire"].value_counts()["Pobra"]
    except:
        qa_pobre = 0

    return {"qa_high": qa_bona, "qa_medium": qa_regular, "qa_low": qa_pobre}

In [207]:
for _ in range(1000):
    lat = np.random.uniform(locales.latitud.min(), locales.latitud.max())
    lon = np.random.uniform(locales.longitud.min(), locales.longitud.max())
    
    a = fetch_qualitat(lat, lon)
    print(dict(a))

(41.3875, 2.1151)
{'qa_high': 676, 'qa_medium': 15, 'qa_low': 0}
(41.3875, 2.1151)
{'qa_high': 676, 'qa_medium': 15, 'qa_low': 0}
(41.4039, 2.2045)
{'qa_high': 693, 'qa_medium': 40, 'qa_low': 0}
(41.4039, 2.2045)
{'qa_high': 693, 'qa_medium': 40, 'qa_low': 0}
(41.4039, 2.2045)
{'qa_high': 693, 'qa_medium': 40, 'qa_low': 0}
(41.3853, 2.1538)
{'qa_high': 688, 'qa_medium': 42, 'qa_low': 5}
(41.3788, 2.1331)
{'qa_high': 678, 'qa_medium': 13, 'qa_low': 0}
(41.4039, 2.2045)
{'qa_high': 693, 'qa_medium': 40, 'qa_low': 0}
(41.4039, 2.2045)
{'qa_high': 693, 'qa_medium': 40, 'qa_low': 0}
(41.3875, 2.1151)
{'qa_high': 676, 'qa_medium': 15, 'qa_low': 0}
(41.3875, 2.1151)
{'qa_high': 676, 'qa_medium': 15, 'qa_low': 0}
(41.3987, 2.1534)
{'qa_high': 690, 'qa_medium': 43, 'qa_low': 0}
(41.4183, 2.1239)
{'qa_high': 735, 'qa_medium': 1, 'qa_low': 0}
(41.4261, 2.148)
{'qa_high': 727, 'qa_medium': 7, 'qa_low': 0}
(41.3875, 2.1151)
{'qa_high': 676, 'qa_medium': 15, 'qa_low': 0}
(41.4261, 2.148)
{'qa_high':

KeyboardInterrupt: 