In [None]:
import requests
import pandas as pd

In [3]:
# Define the Overpass API endpoint
overpass_url = "https://overpass-api.de/api/interpreter"

# Define the query to retrieve places of interest in Manhattan
query = """
[out:json];
(
  node["tourism"](40.6997,-74.0183,40.8774,-73.9105);
  way["tourism"](40.6997,-74.0183,40.8774,-73.9105);
  relation["tourism"](40.6997,-74.0183,40.8774,-73.9105);
);
out body;
>;
out skel qt;
"""

# Send the query to the Overpass API
response = requests.post(overpass_url, data=query)

# Parse the JSON response
data = response.json()['elements']

# Extract all information for each node
nodes_info = []
for node in data:
    node_info = {
        'id': node['id'],
        'type': node['type'],
        'latitude': node.get('lat'),
        'longitude': node.get('lon'),
    }
    tags = node.get('tags', {})
    for key, value in tags.items():
        node_info[key] = value
    nodes_info.append(node_info)

# Create a DataFrame from the extracted data
df = pd.DataFrame(nodes_info)

# Display the DataFrame
df

Unnamed: 0,id,type,latitude,longitude,alt_name,ele,gnis:feature_id,name,name:ru,name:sr,...,admin_level,name:bg,name:ml,seamark:platform:category,seamark:type,closed,tunnel,addr:place,bicycle,old_railway_operator
0,158801311,node,40.725102,-73.979583,Loisaida,4,2062665,Alphabet City,Алфабет-сити,Алфабет сити,...,,,,,,,,,,
1,158809984,node,40.722880,-73.998750,,9,2045148,SoHo,Сохо,Сохо,...,,,,,,,,,,
2,158862484,node,40.748623,-73.971389,,18,2083304,Tudor City,Тьюдор-сити,Тудор сити,...,,,,,,,,,,
3,320948228,node,40.705751,-74.002906,,,,South Street Seaport,,,...,,,,,,,,,,
4,357618558,node,40.732267,-73.997255,,7,2080738,"New York Studio School of Drawing, Painting an...",,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10051,104917339,way,,,,,,,,,...,,,,,,,,,,
10052,104917349,way,,,,,,,,,...,,,,,,,,,,
10053,104917358,way,,,,,,,,,...,,,,,,,,,,
10054,285900676,way,,,,,,,,,...,,,,,,,,,,


In [4]:
df.isnull().sum()

id                          0
type                        0
latitude                  580
longitude                 580
alt_name                 9987
                        ...  
closed                  10055
tunnel                  10055
addr:place              10055
bicycle                 10055
old_railway_operator    10055
Length: 335, dtype: int64

In [5]:
for col in df.columns:
    print(col)

id
type
latitude
longitude
alt_name
ele
gnis:feature_id
name
name:ru
name:sr
name:sr-Latn
place
tourism
wikidata
name:zh
name:zh-Hant
wikipedia
wheelchair
amenity
attraction
architect
artist_name
artwork_type
historic
start_date
website
website:alternate
artwork_subject
material
addr:state
gnis:county_name
gnis:import_uuid
gnis:reviewed
source
addr:housenumber
addr:postcode
addr:street
phone
addr:city
addr:country
lgbtq
sport
building
building:colour
building:levels
building:material
height
nycdoitt:bin
roof:material
roof:shape
addr:floor
opening_hours
name:es
email
internet_access
internet_access:fee
smoking
fee
name:fr
toilets:wheelchair
leisure
artist:wikidata
image
memorial
note
wikimedia_commons
artist:wikipedia
subject:wikidata
subject:wikipedia
official_name
disused:amenity
brand
brand:wikidata
noexit
branch
operator
rooms
old_name
air_conditioning
bar
reservation
board_type
information
species:wikidata
species:wikipedia
description
name:hr
internet_access:ssid
emergency
operato

In [32]:
df = df[['id', 'type', 'name', 'latitude', 'longitude', 'tourism', 'wikidata', 'website', 'opening_hours', 'email']]

In [33]:
df

Unnamed: 0,id,type,name,latitude,longitude,tourism,wikidata,website,opening_hours,email
0,158801311,node,Alphabet City,40.725102,-73.979583,yes,Q1156938,,,
1,158809984,node,SoHo,40.722880,-73.998750,attraction,Q461572,,,
2,158862484,node,Tudor City,40.748623,-73.971389,yes,Q549206,,,
3,320948228,node,South Street Seaport,40.705751,-74.002906,attraction,Q223000,,,
4,357618558,node,"New York Studio School of Drawing, Painting an...",40.732267,-73.997255,yes,Q7014742,,,
...,...,...,...,...,...,...,...,...,...,...
10051,104917339,way,,,,,,,,
10052,104917349,way,,,,,,,,
10053,104917358,way,,,,,,,,
10054,285900676,way,,,,,,,,


In [30]:
df['name'].nunique()

1256

In [36]:
df['name'].unique().tolist()

['Alphabet City',
 'SoHo',
 'Tudor City',
 'South Street Seaport',
 'New York Studio School of Drawing, Painting and Sculpture',
 'Richard Morris Hunt',
 'Joan of Arc Memorial',
 'Fort Lee Museum at the Judge Moore House',
 'Storefront for Art and Architecture',
 'Stonewall Inn',
 'Paula Cooper Gallery',
 'Fraunces Tavern Museum',
 'National Museum of the American Indian',
 'Times Square Museum & Visitor Center',
 'Hotel Belleclaire',
 'Hotel Churchill',
 'Bronx Museum of the Arts',
 'Yeshiva University Museum',
 'Dahesh Museum of Art',
 'Museum of the Moving Image',
 'Museum of African Art',
 'Lower East Side Tenement Museum',
 'Greenpoint Branch YMCA',
 'West Side YMCA',
 'Charging Bull',
 'Janet Borden',
 'Sir Walter Scott',
 'Robert Burns',
 'Christopher Columbus',
 'The Sphere',
 'William Shakespeare',
 'Indian Hunter',
 'Fitz-Greene Halleck',
 'Alice in Wonderland',
 'International Center of Photography',
 'Burnett Memorial Fountain',
 nan,
 'L-Hostels',
 'Country Inn & Suites by

In [37]:
df[df['name'] == 'Grizzly Bear']

Unnamed: 0,id,type,name,latitude,longitude,tourism,wikidata,website,opening_hours,email
72,1240900963,node,Grizzly Bear,40.768411,-73.971913,attraction,,,,


In [39]:
df.isnull().sum()

id                  0
type                0
name             8613
latitude          580
longitude         580
tourism          7968
wikidata         9702
website          9219
opening_hours    9806
email            9821
dtype: int64