In [4]:
import osmnx as ox
import pandas as pd
from shapely.geometry import Point

# Places data collection from OpenStreetMap

In [6]:
# Define the area of interest (New York City)
place_name = "New York City, USA"
gdf = ox.geocode_to_gdf(place_name)

In [8]:
tags = {
    "amenity": [
        "train_station", "bus_station", "taxi", "cafe", "restaurant", "school",
        "university", "hospital", "pharmacy", "library", "post_office",
        "police", "fire_station", "bank", "atm", "place_of_worship",
        "theatre", "cinema", "nightclub", "pub", "bar", "fast_food",
        "marketplace", "fuel", "parking", "bicycle_parking",
        "car_wash", "kindergarten", "college", "clinic", "dentist",
        "doctors", "arts_centre", "community_centre", "coworking",
        "internet_cafe", "social_facility", "veterinary",
        "childcare", "retirement_home" 
    ],
    "railway": ["station", "halt"],
    "bus": ["yes"],
    "taxi": ["yes"],
    "shop": [
        "bakery", "butcher", "convenience", "supermarket", "greengrocer",
        "kiosk", "books", "clothes", "shoes", "electronics", "furniture",
        "jewelry", "mall",
        "doityourself", "hardware", "garden_centre", "florist", 
        "laundry" 
    ],
    "office": ["travel_agency", "estate_agent"],
    "tourism": ["attraction", "museum", "artwork", "viewpoint"],
    "leisure": ["park", "playground", "stadium", "sports_centre", "swimming_pool"],
    "building": [
        "apartments", "house", "residential", "dormitory", "detached",
        "semidetached_house", "terraced_house", "bungalow" 
    ],
    "landuse": ["residential"] 
}

In [10]:
pois = ox.features_from_place(place_name, tags=tags)

In [17]:
places_data = pois[[
    'name',
    'amenity',
    'railway',
    'bus',
    'taxi',
    'shop',
    'office',
    'tourism',
    'leisure',
    'building',
    'landuse',
    'opening_hours',
    'geometry'
]].copy()

In [19]:
print(f"Number of places collected: {len(places_data)}")

Number of places collected: 266682


In [21]:
places_data['place_type'] = None
places_data['place_subtype'] = None

# Fill 'place_type' and 'place_subtype' based on the original tags
for index, row in places_data.iterrows():
    if pd.notna(row['amenity']):
        places_data.loc[index, 'place_type'] = 'amenity'
        places_data.loc[index, 'place_subtype'] = row['amenity']
    elif pd.notna(row['railway']):
        places_data.loc[index, 'place_type'] = 'transport'
        places_data.loc[index, 'place_subtype'] = row['railway'] + (' station' if row['railway'] in ['station', 'halt'] else ' stop')
    elif pd.notna(row['bus']):
        places_data.loc[index, 'place_type'] = 'transport'
        places_data.loc[index, 'place_subtype'] = 'bus stop'
    elif pd.notna(row['taxi']):
        places_data.loc[index, 'place_type'] = 'transport'
        places_data.loc[index, 'place_subtype'] = 'taxi stand'
    elif pd.notna(row['shop']):
        places_data.loc[index, 'place_type'] = 'shop'
        places_data.loc[index, 'place_subtype'] = row['shop']
    elif pd.notna(row['office']):
        places_data.loc[index, 'place_type'] = 'office'
        places_data.loc[index, 'place_subtype'] = row['office']
    elif pd.notna(row['tourism']):
        places_data.loc[index, 'place_type'] = 'tourism'
        places_data.loc[index, 'place_subtype'] = row['tourism']
    elif pd.notna(row['leisure']):
        places_data.loc[index, 'place_type'] = 'leisure'
        places_data.loc[index, 'place_subtype'] = row['leisure']
    elif pd.notna(row['building']):
        if row['building'] in ['apartments', 'house', 'residential', 'dormitory', 'detached',
                               'semidetached_house', 'terraced_house', 'bungalow']:
            places_data.loc[index, 'place_type'] = 'residential'
            places_data.loc[index, 'place_subtype'] = row['building']
        elif row['building'] in ['train_station', 'bus_station']:
            places_data.loc[index, 'place_type'] = 'transport'
            places_data.loc[index, 'place_subtype'] = f'{row["building"].replace("_", " ")}'
    elif pd.notna(row['landuse']) and row['landuse'] == 'residential':
        places_data.loc[index, 'place_type'] = 'residential'
        places_data.loc[index, 'place_subtype'] = 'residential'

In [27]:
places_data.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
MultiIndex: 266682 entries, ('node', 42432939) to ('relation', 18995909)
Data columns (total 15 columns):
 #   Column         Non-Null Count   Dtype   
---  ------         --------------   -----   
 0   name           55576 non-null   object  
 1   amenity        42441 non-null   object  
 2   railway        551 non-null     object  
 3   bus            18083 non-null   object  
 4   taxi           31 non-null      object  
 5   shop           9226 non-null    object  
 6   office         436 non-null     object  
 7   tourism        1400 non-null    object  
 8   leisure        50930 non-null   object  
 9   building       147831 non-null  object  
 10  landuse        1122 non-null    object  
 11  opening_hours  14017 non-null   object  
 12  geometry       266682 non-null  geometry
 13  place_type     266681 non-null  object  
 14  place_subtype  266681 non-null  object  
dtypes: geometry(1), object(14)
memory usage: 50.0+ MB


In [29]:
final_places_data = places_data[[
    'place_type',
    'place_subtype',
    #'name',
    'geometry',
    'opening_hours'
]].copy()


final_places_data

Unnamed: 0_level_0,Unnamed: 1_level_0,place_type,place_subtype,geometry,opening_hours
element_type,osmid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
node,42432939,transport,bus stop,POINT (-73.94962 40.81410),
node,42448838,transport,bus stop,POINT (-73.94984 40.76168),
node,42503134,transport,bus stop,POINT (-73.98250 40.58210),
node,42535961,transport,bus stop,POINT (-73.95397 40.58801),
node,42736282,transport,station station,POINT (-73.90387 40.85841),
...,...,...,...,...,...
relation,18874034,amenity,parking,"POLYGON ((-74.23580 40.52792, -74.23572 40.527...",
relation,18903072,amenity,parking,"POLYGON ((-74.14015 40.61581, -74.14059 40.615...",
relation,18903581,amenity,parking,"POLYGON ((-74.07558 40.63431, -74.07557 40.634...",
relation,18995634,amenity,parking,"POLYGON ((-74.17702 40.60537, -74.17701 40.605...",


In [33]:
final_places_data.to_csv("../data/raw/nyc_places.csv", index=False, encoding='utf-8')