In [54]:
import requests
import pandas as pd
import h3
import json
import os
import h3pandas

In [55]:
def get_metadata():
    with open("../../data/poi/metadata.json") as f:
        return json.load(f)

In [56]:
metadata = get_metadata()

In [57]:
def get_data_file_path(category):
    return f'../../data/poi/{metadata[category]["filename"]}'

def is_data_file_exists(category):
    return os.path.isfile(get_data_file_path(category))

def read_data_file(category):
    with open(get_data_file_path(category)) as f:
        data = json.load(f)
    return data

def save_data_file(category, data):
    with open(get_data_file_path(category), "w") as f:
        json.dump(data, f)

def fetch_data(category):
    response = requests.get(
        "http://overpass-api.de/api/interpreter",
        params={"data": metadata[category]["query"]},
    )
    data = response.json()
    save_data_file(category, data)
    return data

def get_data(category):
    if is_data_file_exists(category):
        return read_data_file(category)
    return fetch_data(category)

In [59]:
poi_dfs = []
categories = [['sustenance', True], ['public_transport', False], ['education', True], ['arts_and_culture', True], ['sports', False]]

for category, amenity in categories:
    data = get_data(category)
    df = pd.DataFrame(data["elements"])
    df["category"] = category
    if amenity:
        df["amenity"] = df["tags"].apply(lambda tags: tags["amenity"])
    poi_dfs.append(df)

In [60]:
all_poi_df = pd.concat(poi_dfs)
all_poi_df = all_poi_df.drop(columns={"type", "id", "tags"})
all_poi_df = all_poi_df.fillna("Not Specified")
all_poi_df

Unnamed: 0,lat,lon,category,amenity
0,41.925574,-87.649212,sustenance,fast_food
1,41.879673,-87.626422,sustenance,fast_food
2,41.880136,-87.627940,sustenance,fast_food
3,41.880226,-87.624665,sustenance,fast_food
4,41.880648,-87.626422,sustenance,cafe
...,...,...,...,...
106,41.898906,-87.686700,sports,Not Specified
107,41.855305,-87.626832,sports,Not Specified
108,41.937489,-87.644507,sports,Not Specified
109,42.008272,-87.659333,sports,Not Specified


In [61]:
all_poi_df.to_parquet("../../data/poi/poi_data.parquet")

In [62]:
dfs = []
for h3_res in range(7,10):
    h3_col = "h3_" + "{:02d}".format(h3_res)
    all_poi_df = all_poi_df.h3.geo_to_h3(h3_res, "lat", "lon", set_index=False)
    all_h3_with_poi = all_poi_df.groupby([h3_col, "category"]).size().to_frame()
    all_h3_with_poi = all_h3_with_poi.reset_index()
    all_h3_with_poi["h3_res"] = h3_res
    all_h3_with_poi = all_h3_with_poi.rename(columns={0: "pois", h3_col: "h3"})
    dfs.append(all_h3_with_poi)

In [63]:
all_h3_with_poi = pd.concat(dfs, ignore_index=True, axis=0)
all_h3_with_poi

Unnamed: 0,h3,category,pois,h3_res
0,872664190ffffff,public_transport,9,7
1,872664191ffffff,public_transport,16,7
2,872664191ffffff,sustenance,1,7
3,872664193ffffff,public_transport,11,7
4,872664194ffffff,public_transport,48,7
...,...,...,...,...
5066,89275936d4fffff,sustenance,3,9
5067,89275936d63ffff,sustenance,1,9
5068,89275936dc3ffff,sustenance,2,9
5069,89275936dd3ffff,sustenance,2,9


In [64]:
hexagons_df = all_h3_with_poi[["h3", "h3_res"]].drop_duplicates().copy()
hexagons_df["h3_incl_neighbors"] = hexagons_df.apply(
    lambda row: list(h3.k_ring(row["h3"], 1)), axis=1
)
hexagons_df

Unnamed: 0,h3,h3_res,h3_incl_neighbors
0,872664190ffffff,7,"[872664191ffffff, 872664195ffffff, 872664193ff..."
1,872664191ffffff,7,"[872664191ffffff, 872664182ffffff, 872664195ff..."
3,872664193ffffff,7,"[872664191ffffff, 87266456dffffff, 872664193ff..."
4,872664194ffffff,7,"[872664cc9ffffff, 872664195ffffff, 8726641b2ff..."
6,872664195ffffff,7,"[872664191ffffff, 872664195ffffff, 872664182ff..."
...,...,...,...
5065,89275936d4fffff,9,"[89275936d7bffff, 89275936d4bffff, 892759368b7..."
5067,89275936d63ffff,9,"[89275936d7bffff, 89275936d73ffff, 89275936d6b..."
5068,89275936dc3ffff,9,"[89275936dd3ffff, 89275936dd7ffff, 89275936dcb..."
5069,89275936dd3ffff,9,"[892664d936bffff, 89275936dd3ffff, 89275936ca7..."


In [65]:
def calculate_poi(hex_and_neighbors, category):
    return all_h3_with_poi[
        (
               (all_h3_with_poi["h3"].isin(hex_and_neighbors))
                & (all_h3_with_poi["category"] == category)
        )
    ]["pois"].sum()

In [66]:
for category, _ in categories:
    hexagons_df[f"{category}_poi"] = hexagons_df["h3_incl_neighbors"].apply(
        lambda row: calculate_poi(row, category)
    )

hexagons_df

Unnamed: 0,h3,h3_res,h3_incl_neighbors,sustenance_poi,public_transport_poi,education_poi,arts_and_culture_poi,sports_poi
0,872664190ffffff,7,"[872664191ffffff, 872664195ffffff, 872664193ff...",8,93,0,0,1
1,872664191ffffff,7,"[872664191ffffff, 872664182ffffff, 872664195ff...",3,55,0,0,1
3,872664193ffffff,7,"[872664191ffffff, 87266456dffffff, 872664193ff...",2,64,2,0,0
4,872664194ffffff,7,"[872664cc9ffffff, 872664195ffffff, 8726641b2ff...",13,191,1,0,1
6,872664195ffffff,7,"[872664191ffffff, 872664195ffffff, 872664182ff...",9,93,0,0,1
...,...,...,...,...,...,...,...,...
5065,89275936d4fffff,9,"[89275936d7bffff, 89275936d4bffff, 892759368b7...",5,5,0,0,0
5067,89275936d63ffff,9,"[89275936d7bffff, 89275936d73ffff, 89275936d6b...",1,0,0,0,0
5068,89275936dc3ffff,9,"[89275936dd3ffff, 89275936dd7ffff, 89275936dcb...",4,0,0,0,0
5069,89275936dd3ffff,9,"[892664d936bffff, 89275936dd3ffff, 89275936ca7...",4,0,0,0,0


In [67]:
hexagons_df.to_parquet("../../data/poi/poi_hexagon_data.parquet")