In [1]:
import osmnx as ox
import geopandas as gpd
import requests
import osm2geojson
import time


def get_osm_data(tag_query, extent, area_type):

    osm_crs = 'EPSG:4326'
    url = "http://overpass-api.de/api/interpreter"

    my_crs = extent.crs
    if extent.crs != osm_crs:
        extent = extent.to_crs(osm_crs)

    if area_type == 'bbox':
        bbox = extent.total_bounds
        area = '{1},{0},{3},{2}'.format(bbox[0], bbox[1], bbox[2], bbox[3])
    elif area_type == 'convexhull':
        xy = extent.convex_hull.geometry[0].exterior.coords.xy
        osm_poly = 'poly:"'
        for i, (lat, lon) in enumerate(zip(xy[1], xy[0])):
            osm_poly = osm_poly + '{} {} '.format(lat, lon)
        area = osm_poly[:-1] + '"'
    else:
        raise ValueError('Unknown type {}: please use [bbox] or [convexhull]'.format(area_type))
        return None

    data_request = """
        [out:json];
        (
            node{0}({1});
            way{0}({1});
            relation{0}({1});
        );
        out geom;
        >;
        out skel qt;
    """.format(tag_query, area)

    n=5
    for i in range(n):
        response = requests.get(url, params={'data': data_request})

        if response.ok:

            geojson = osm2geojson.json2geojson(response.json())

            if geojson['features']:
                gdf = gpd.GeoDataFrame.from_features(geojson)
                gdf = gdf[gdf['tags'].notna()]      # to avoid including both ways and their waypoints
                gdf.crs = osm_crs
                return gdf.to_crs(my_crs)            # CRS of our input dataset
            else:
                return gpd.GeoDataFrame()            # empty dataframe

        elif response.status_code == 429 or response.status_code == 504:
            if i < n-1:
                print('Response {}\nWaiting {}sec, and trying again max {} more times'.format(response, (i+1)*30, n-i-1))
                time.sleep((i+1)*30)
            else:
                raise ValueError('No valid response to OSM query after trying {} times'.format(n))
                return None

        else:
            raise ValueError('Response {}\nAborting'.format(response))
            return None



def get_osm_pois_for_city(city_name, buffer_m, query, category, cols_to_keep):
    area_type = 'convexhull'
    extent = ox.geocode_to_gdf(city_name)
    extent = ox.geocode_to_gdf(city_name).to_crs('epsg:28992')
    extent = extent.buffer(buffer_m, join_style=2)
    count=0
    gdf_res = get_osm_data(query, extent, area_type)   
    gdf_res["osm_category"] = category
    # convert tags dict to columns
#     if category == 'place':
#         cols_to_keep.remove('opening_hours')
    print(gdf_res.head())
    gdf_tags = gdf_res.tags.apply(pd.Series)[cols_to_keep]
    return pd.concat([gdf_res, gdf_tags.apply(pd.Series)], axis=1)

In [2]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine
from geoalchemy2 import Geometry, WKTElement
import geopandas as gpd
import time


###########################
# Connection String to DB #
###########################
db_connection_string = 'postgresql://postgres:postgres@localhost/age_segregation'
engine = create_engine(db_connection_string)

########################
# selecting tags
#########################
leisure_cats = ["beach_resort", "dog_park", "park", "playground", "swimming_area"]
amenity_cats = ["cafe", "fast_food", "food_court", "ice_cream", "pub", "restaurant", "social_facility",
                "arts_centre", "cinema", "community_centre","social_centre", "theatre", "bench", 'library', 'marketplace']
tourism_cats = ['attraction', 'gallery', 'museum' ]
place_cats=["square"]

########################
# classifying categories: sustenance, culture, public, shop
#########################
amenity_to_sustenance = ["cafe", "fast_food", "food_court", "ice_cream", "pub", "restaurant", 'marketplace']
amenity_to_culture = ["social_facility", "arts_centre", "cinema", "community_centre","social_centre", "theatre"]
tourism_to_culture = ['attraction', 'gallery', 'museum' ]
leisure_to_public = ["beach_resort", "dog_park", "park", "playground", "swimming_area"]
amenity_to_public = [ "bench", 'library']
place_to_public=["square"]

# from OSM - Shop Taxonomy
food = 'alcohol,bakery,beverages,brewing_supplies,butcher,cheese,chocolate,coffee,confectionery,convenience,deli,dairy,farm,frozen_food,greengrocer,health_food,ice_cream,organic,pasta,pastry,seafood,spices,tea,wine,water'.split(",")
general = 'general,kiosk,mall,supermarket,wholesale'.split(',')
clothes = 'baby_goods,bag,boutique,clothes,fabric,fashion,fashion_accessories,jewelry,leather,sewing,shoes,tailor,watches,wool'.split(",")
discount = 'charity,second_hand,variety_store'.split(",")
beauty = 'beauty,chemist,cosmetics,drugstore,erotic,hairdresser,hairdresser_supply,hearing_aids,herbalist,massage,medical_supply,nutrition_supplements,optician,perfumery,tattoo'.split(",")
diy = 'agrarian,appliance,bathroom_furnishing,doityourself,electrical,energy,fireplace,florist,garden_centre,garden_furniture,gas,glaziery,groundskeeping,hardware,houseware,locksmith,paint,security,trade,windows'.split(",")
furniture = 'antiques,bed,candles,carpet,curtain,doors,flooring,furniture,household_linen,interior_decoration,kitchen,lamps,lighting,tiles,window_blind'.split(",")
electronics = 'computer,electronics,hifi,mobile_phone,radiotechnics,vacuum_cleaner'.split(",")
sports_vehicles = 'atv,bicycle,boat,car,car_repair,car_parts,caravan,fuel,fishing,golf,hunting,jetski,military_surplus,motorcycle,outdoor,scuba_diving,ski,snowmobile,sports,swimming_pool,trailer,tyres'.split(",")
art = 'art,collector,craft,frame,games,model,music,musical_instrument,photo,camera,trophy,video,video_games'.split(",")
stationery = 'anime,books,gift,lottery,newsagent,stationery,ticket'.split(",")
other = 'bookmaker,cannabis,copyshop,dry_cleaning,e-cigarette,funeral_directors,laundry,money_lender,party,pawnbroker,pet,pet_grooming,pest_control,pyrotechnics,religion,storage_rental,tobacco,toys,travel_agency,vacant,weapons,outpost,user defined'.split(",")

# For each sity
# 'hague',  "rotterdam", "amsterdam", "eindhoven", "utrecht", "hague"
buffer_m = 1000
for city_name in ["hague, netherlands"]:
    gdf = gpd.GeoDataFrame()
    print(city_name)
#     '["shop"]'
    for query in  [ '["amenity"]', '["shop"]', '["place"]', '["tourism"]',  '["leisure"]',]:
        print(query)
        cat = query.split('"')[1]
        cols_to_keep = [cat, 'name']
        # cols_to_keep = [cat, 'name', 'opening_hours', 'max_age', 'min_age', 'type' ]
        gdf_new = get_osm_pois_for_city(city_name,  buffer_m,query, cat, cols_to_keep)
        gdf = gdf.append(gdf_new)
    
    ##########################################
    # Filter - keep only specific categories #
    ##########################################
    gdf_filter_tourism = gdf[ gdf['tourism'].isin(tourism_cats)]
    gdf_filter_leis = gdf[ gdf['leisure'].isin(leisure_cats)]
    gdf_filter_amen = gdf[gdf['amenity'].isin(amenity_cats)]
    gdf_filter_shop = gdf[gdf['osm_category']=='shop']
    gdf_filter_square = gdf[ gdf['place'].isin(place_cats)]
    
    # Append Results into a single geodataframe
    gdf_res = gpd.GeoDataFrame()
    gdf_res = gdf_filter_leis.append(gdf_filter_amen)
    gdf_res = gdf_res.append(gdf_filter_shop)
    gdf_res = gdf_res.append(gdf_filter_square)
    
    # create categories
    gdf_res.fillna('', inplace=True)
    gdf_res['subcategory'] = gdf_res['leisure'] + gdf_res['amenity'] + gdf_res['place'] +gdf_res['tourism'] + gdf_res['shop']
    gdf_res.drop(['leisure', 'amenity', 'place', 'tourism', 'shop'], axis=1, inplace=True)
    gdf_res.loc[gdf_res['subcategory'].isin(amenity_to_sustenance),"category"]= 'sustenance'
    gdf_res.loc[gdf_res['subcategory'].isin(amenity_to_culture),"category"]= 'culture'
    gdf_res.loc[gdf_res['subcategory'].isin(tourism_to_culture),"category"]= 'culture'
    gdf_res.loc[gdf_res['subcategory'].isin(leisure_to_public),"category"]= 'public'
    gdf_res.loc[gdf_res['subcategory'].isin(amenity_to_public),"category"]= 'public'
    gdf_res.loc[gdf_res['subcategory'].isin(place_to_public),"category"]= 'public'
    
    gdf_res.loc[gdf_res['subcategory'].isin(food),"category"]= 'food_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(general),"category"]= 'general_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(clothes),"category"]= 'clothes_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(discount),"category"]= 'discount_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(beauty),"category"]= 'beauty_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(diy),"category"]= 'diy_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(furniture),"category"]= 'furniture_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(electronics),"category"]= 'electronics_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(sports_vehicles),"category"]= 'sportsvehicles_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(art),"category"]= 'art_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(stationery),"category"]= 'stationery_shop'
    gdf_res.loc[gdf_res['subcategory'].isin(other),"category"]= 'other_shop'

    
    # store
    city_name="hague"
    gdf_res.to_postgis("pois_" + "buff_" + str(buffer_m) + "_" + city_name[0:3] , schema=city_name, con=engine)
    print(gdf_res.shape)

hague, netherlands
["amenity"]
                       geometry  type        id  \
0  POINT (84015.830 447906.006)  node  26113742   
1  POINT (79980.833 455024.863)  node  27298196   
2  POINT (85790.268 456162.014)  node  27339866   
3  POINT (78381.736 456749.638)  node  27371903   
4  POINT (78104.318 456818.882)  node  27374886   

                                                tags osm_category  
0  {'amenity': 'post_box', 'brand': 'PostNL', 'br...      amenity  
1  {'amenity': 'restaurant', 'diet:vegan': 'yes',...      amenity  
2                              {'amenity': 'police'}      amenity  
3      {'amenity': 'post_box', 'operator': 'PostNL'}      amenity  
4      {'amenity': 'post_box', 'operator': 'PostNL'}      amenity  
["shop"]
                       geometry  type        id  \
0  POINT (78010.957 450184.168)  node  34044390   
1  POINT (77743.610 451114.431)  node  34044395   
2  POINT (80874.957 454938.012)  node  34044402   
3  POINT (80027.280 455404.992)  node  34