In [1]:
import requests
import pandas as pd
import h3
import json
import os
import h3pandas

In [2]:
def get_metadata():
    with open("../../data/poi/metadata.json") as f:
        return json.load(f)

In [3]:
metadata = get_metadata()

In [4]:
def get_data_file_path(category):
    return f'../../data/poi/{metadata[category]["filename"]}'

def is_data_file_exists(category):
    return os.path.isfile(get_data_file_path(category))

def read_data_file(category):
    with open(get_data_file_path(category)) as f:
        data = json.load(f)
    return data

def save_data_file(category, data):
    with open(get_data_file_path(category), "w") as f:
        json.dump(data, f)

def fetch_data(category):
    response = requests.get(
        "http://overpass-api.de/api/interpreter",
        params={"data": metadata[category]["query"]},
    )
    data = response.json()
    save_data_file(category, data)
    return data

def get_data(category):
    if is_data_file_exists(category):
        return read_data_file(category)
    return fetch_data(category)

In [5]:
poi_dfs = []
categories = [['sustenance', True], ['public_transport', False], ['education', True], ['arts_and_culture', True], ['sports', False]]

for category, amenity in categories:
    data = get_data(category)
    df = pd.DataFrame(data["elements"])
    df["category"] = category
    if amenity:
        df["amenity"] = df["tags"].apply(lambda tags: tags["amenity"])
    poi_dfs.append(df)

In [6]:
all_poi_df = pd.concat(poi_dfs)
all_poi_df = all_poi_df.drop(columns={"type", "id", "tags"})
all_poi_df = all_poi_df.fillna("Not Specified")
all_poi_df

Unnamed: 0,lat,lon,category,amenity
0,41.925574,-87.649212,sustenance,fast_food
1,41.879673,-87.626422,sustenance,fast_food
2,41.880136,-87.627940,sustenance,fast_food
3,41.880226,-87.624665,sustenance,fast_food
4,41.880648,-87.626422,sustenance,cafe
...,...,...,...,...
106,41.898906,-87.686700,sports,Not Specified
107,41.855305,-87.626832,sports,Not Specified
108,41.937489,-87.644507,sports,Not Specified
109,42.008272,-87.659333,sports,Not Specified


In [7]:
all_poi_df.to_parquet("../../data/poi/poi_data.parquet")

In [8]:
dfs = []
for h3_res in range(5,10):
    h3_col = "h3_" + "{:02d}".format(h3_res)
    all_poi_df = all_poi_df.h3.geo_to_h3(h3_res, "lat", "lon", set_index=False)
    all_h3_with_poi = all_poi_df.groupby([h3_col, "category"]).size().to_frame()
    all_h3_with_poi = all_h3_with_poi.reset_index()
    all_h3_with_poi["h3_res"] = h3_res
    all_h3_with_poi = all_h3_with_poi.rename(columns={0: "pois", h3_col: "h3"})
    dfs.append(all_h3_with_poi)

In [9]:
all_h3_with_poi = pd.concat(dfs, ignore_index=True, axis=0)
all_h3_with_poi

Unnamed: 0,h3,category,pois,h3_res
0,8526641bfffffff,public_transport,200,5
1,8526641bfffffff,sports,1,5
2,8526641bfffffff,sustenance,15,5
3,85266453fffffff,education,9,5
4,85266453fffffff,public_transport,44,5
...,...,...,...,...
5214,89275936d4fffff,sustenance,3,9
5215,89275936d63ffff,sustenance,1,9
5216,89275936dc3ffff,sustenance,2,9
5217,89275936dd3ffff,sustenance,2,9


In [10]:
hexagons_df = all_h3_with_poi[["h3", "h3_res"]].drop_duplicates().copy()
hexagons_df["h3_incl_neighbors"] = hexagons_df.apply(
    lambda row: list(h3.k_ring(row["h3"], 1)), axis=1
)
hexagons_df

Unnamed: 0,h3,h3_res,h3_incl_neighbors
0,8526641bfffffff,5,"[852664c7fffffff, 8526640bfffffff, 852664cffff..."
3,85266453fffffff,5,"[852664cbfffffff, 8526645bfffffff, 852664cffff..."
6,85266457fffffff,5,"[8526640bfffffff, 852664cffffffff, 85266453fff..."
9,852664c3fffffff,5,"[852664c7fffffff, 852664cbfffffff, 852664cffff..."
14,852664cbfffffff,5,"[852664cbfffffff, 852664cffffffff, 85266453fff..."
...,...,...,...
5213,89275936d4fffff,9,"[892759368b7ffff, 89275936d43ffff, 89275936d4b..."
5215,89275936d63ffff,9,"[89275936d7bffff, 89275936d67ffff, 89275936d6b..."
5216,89275936dc3ffff,9,"[89275936dd3ffff, 89275936dcfffff, 89275936dc3..."
5217,89275936dd3ffff,9,"[89275936ca7ffff, 89275936dd3ffff, 892664d936b..."


In [11]:
def calculate_poi(hex_and_neighbors, category):
    return all_h3_with_poi[
        (
               (all_h3_with_poi["h3"].isin(hex_and_neighbors))
                & (all_h3_with_poi["category"] == category)
        )
    ]["pois"].sum()

In [12]:
for category, _ in categories:
    hexagons_df[f"{category}_poi"] = hexagons_df["h3_incl_neighbors"].apply(
        lambda row: calculate_poi(row, category)
    )

hexagons_df

Unnamed: 0,h3,h3_res,h3_incl_neighbors,sustenance_poi,public_transport_poi,education_poi,arts_and_culture_poi,sports_poi
0,8526641bfffffff,5,"[852664c7fffffff, 8526640bfffffff, 852664cffff...",543,3351,178,14,15
3,85266453fffffff,5,"[852664cbfffffff, 8526645bfffffff, 852664cffff...",1697,4330,333,33,49
6,85266457fffffff,5,"[8526640bfffffff, 852664cffffffff, 85266453fff...",609,3395,187,14,15
9,852664c3fffffff,5,"[852664c7fffffff, 852664cbfffffff, 852664cffff...",4094,5173,386,72,105
14,852664cbfffffff,5,"[852664cbfffffff, 852664cffffffff, 85266453fff...",4285,5262,396,72,110
...,...,...,...,...,...,...,...,...
5213,89275936d4fffff,9,"[892759368b7ffff, 89275936d43ffff, 89275936d4b...",5,5,0,0,0
5215,89275936d63ffff,9,"[89275936d7bffff, 89275936d67ffff, 89275936d6b...",1,0,0,0,0
5216,89275936dc3ffff,9,"[89275936dd3ffff, 89275936dcfffff, 89275936dc3...",4,0,0,0,0
5217,89275936dd3ffff,9,"[89275936ca7ffff, 89275936dd3ffff, 892664d936b...",4,0,0,0,0


In [13]:
hexagons_df.to_parquet("../../data/poi/poi_hexagon_data.parquet")