# Tutorial for Geocoding API and Open Street Map. 

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
# importing necessary modules
import geopandas as gpd # used for handling geospatial data
import pandas as pd # used for creating and manipulating data in tabular format 
import folium # used to create interactive maps
import requests # used to send an API call, i.e., U.S Census Geocoder
import numpy as np
from geopy.geocoders import Nominatim
from time import sleep
from datetime import datetime

## Part 1 - Converting Street Address to Lat/Lon

#### Geocoding API is a service that allows you to convert addresses into a geographic coordinates which you can use to place markers on a map. This service is currently available for the USA only. 
#### This can be achieved in batch mode i.e., multiple addresses at once and convert them to geographic coordinates. The compatible file formats to accomplish this task includes .csv, .txt, .dat, and .xlsx files. 

In [5]:
def get_coordinates(address):
    base_url = "https://geocoding.geo.census.gov/geocoder/locations/onelineaddress"
    params = {
        "address": address,
        "benchmark": "4", # Public_AR_Current
        "format": "json"
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        matches = data.get("result", {}).get("addressMatches", [])
        if matches:
            coordinates = matches[0]["coordinates"]
            return coordinates["y"], coordinates["x"]
    return None, None

### Using .txt file as an example with names of random Museums in the United States as an example. 

#### Note: this Museum Text File was randomnly generated. You Need to find a directory that has the appropriate address and Name of a destination you would like to map out. 

In [11]:
# Read the text file
file_path = 'museums.txt'
df = pd.read_csv(file_path, delimiter='\t', header=None)  # assuming "\t" tab-delimited file , replace with "," if comma seperated
# Labelling the first & second column
df.columns = ["Name", "Address"]

In [13]:
df

Unnamed: 0,Name,Address
0,The Metropolitan Museum of Art,"1000 5th Ave, New York, NY 10028"
1,The Museum of Modern Art (MoMA),"11 W 53rd St, New York, NY 10019"
2,The Art Institute of Chicago,"111 S Michigan Ave, Chicago, IL 60603"
3,Smithsonian National Museum of Natural History,"10th St. & Constitution Ave. NW, Washington, D..."
4,The Getty Center,"1200 Getty Center Dr, Los Angeles, CA 90049"
5,The National Gallery of Art,"Constitution Ave NW, Washington, DC 20565"
6,The Solomon R. Guggenheim Museum,"1071 5th Ave, New York, NY 10128"
7,The American Museum of Natural History,"Central Park West & 79th St, New York, NY 10024"
8,The Field Museum,"1400 S Lake Shore Dr, Chicago, IL 60605"
9,"The Museum of Fine Arts, Boston","465 Huntington Ave, Boston, MA 02115"


In [15]:
# Get coordinates for each address
df["Latitude"], df["Longitude"] = zip(*df["Address"].apply(get_coordinates))

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude), crs="EPSG:4326")

In [16]:
df

Unnamed: 0,Name,Address,Latitude,Longitude
0,The Metropolitan Museum of Art,"1000 5th Ave, New York, NY 10028",40.778445,-73.962587
1,The Museum of Modern Art (MoMA),"11 W 53rd St, New York, NY 10019",40.760627,-73.976177
2,The Art Institute of Chicago,"111 S Michigan Ave, Chicago, IL 60603",41.880649,-87.624172
3,Smithsonian National Museum of Natural History,"10th St. & Constitution Ave. NW, Washington, D...",38.892079,-77.025989
4,The Getty Center,"1200 Getty Center Dr, Los Angeles, CA 90049",34.088523,-118.475584
5,The National Gallery of Art,"Constitution Ave NW, Washington, DC 20565",,
6,The Solomon R. Guggenheim Museum,"1071 5th Ave, New York, NY 10128",40.782857,-73.959367
7,The American Museum of Natural History,"Central Park West & 79th St, New York, NY 10024",40.775644,-73.960514
8,The Field Museum,"1400 S Lake Shore Dr, Chicago, IL 60605",41.865007,-87.614119
9,"The Museum of Fine Arts, Boston","465 Huntington Ave, Boston, MA 02115",42.339058,-71.092566


#### It is important to have the correct and specific street address. Otherwise the conversion would not work. This is one downside of geocoding. 

### Notice how a couple of Museums do not have Lat/Lon Found, this happens because the Street Address for these Museums are wrong. 

In [19]:
# Create map
m = folium.Map(location=[df["Latitude"].mean(), df["Longitude"].mean()], zoom_start=4)

# Add markers for each location
for _, row in df.iterrows():
    if not np.isnan(row["Latitude"]) and not np.isnan(row["Longitude"]):
        folium.Marker(
            location=[row["Latitude"], row["Longitude"]],
            popup=row["Address"],
            tooltip=row["Address"]
        ).add_to(m)

In [21]:
m

## Part 2

### Using Open Street Map to map out Points of Interest Using User Input Query

### Function to automate query in Open Streeet Map using Overpass Api. 
More information about how to geocode with OSM can be found here. https://wiki.openstreetmap.org/wiki/Overpass_API?form=MG0AV3#Introduction

#### Function to pass the query to Overpass API

#### For correct Query Input visit: https://wiki.openstreetmap.org/wiki/Map_features

##### Store names like "Target", "Wallmart" wouldn't work in this case. We have to be more generic or specify Brand. 

In [5]:
def get_locations(categories, queries, cities, state, country, brand=None):
    """Fetch locations from OpenStreetMap using Overpass API."""
    overpass_url = "http://overpass-api.de/api/interpreter"
    all_locations = []

    # Construct the optional brand filter
    brand_filter = f'["brand"="{brand}"]' if brand else ''

    # Handle multiple categories, queries, and cities
    if not categories or not queries or not cities:
        print("No categories, queries, or cities provided.")
        return []

    for category in categories:
        for city in cities:
            for query in queries:
                overpass_query = f"""
                [out:json];
                area[name="{city}"]->.searchArea;
                (
                  node["{category}"="{query}"]{brand_filter}(area.searchArea);
                  way["{category}"="{query}"]{brand_filter}(area.searchArea);
                  relation["{category}"="{query}"]{brand_filter}(area.searchArea);
                );
                out center;
                """
                
                try:
                    response = requests.get(overpass_url, params={'data': overpass_query})
                    response.raise_for_status()
                    data = response.json()
                    all_locations.extend(data.get("elements", []))
                except requests.exceptions.RequestException as e:
                    print(f"Request error for {category}={query} in {city}: {e}")
                except requests.exceptions.JSONDecodeError:
                    print(f"Error decoding JSON response from API for {category}={query} in {city}.")

    return all_locations

In [7]:
def plot_locations(data, city, state, country):
    """Plot locations on a Folium map."""
    if not data:
        print("No locations found.")
        return None

    # Extract the first valid location for map centering
    for place in data:
        lat = place.get('lat') or (place.get('center', {}).get('lat'))
        lon = place.get('lon') or (place.get('center', {}).get('lon'))
        if lat and lon:
            m = folium.Map(location=[lat, lon], zoom_start=12)
            break
    else:
        print("No valid coordinates found.")
        return None

    # Add markers
    for place in data:
        lat = place.get('lat') or (place.get('center', {}).get('lat'))
        lon = place.get('lon') or (place.get('center', {}).get('lon'))
        if lat and lon:
            name = place.get('tags', {}).get('name', 'Unknown')
            folium.Marker([lat, lon], popup=f"{name} ({lat}, {lon})").add_to(m)

    return m

In [9]:
def display_locations(data):
    """Display location names with coordinates in a DataFrame."""
    locations = []
    for place in data:
        lat = place.get('lat') or (place.get('center', {}).get('lat'))
        lon = place.get('lon') or (place.get('center', {}).get('lon'))
        if lat and lon:
            name = place.get('tags', {}).get('name', 'Unknown')
            locations.append([name, lat, lon])
    
    df = pd.DataFrame(locations, columns=['Name', 'Latitude', 'Longitude'])
    return df

### Please refer to the Query Website to See which specific Keywords to use for OSM

In [12]:
# Example Query Parameters
category = ["shop"]  # General category
#queries = ["supermarket", "department_store", "greengrocer", "farm", "health_food", "retail"]  # Multiple specific queries
queries = ["supermarket"]
#cities = ["Indianapolis", "Lawrence"]  # Cities in Marion County, IN
cities = ["Indianapolis"]
state = "Indiana"
country = "USA"
brand = None  # Change to "Walmart" or "Target" if needed or None 

# Fetch Data
data = get_locations(category, queries, cities, state, country, brand)

# Plot Data on Map
map_result = plot_locations(data, cities[0] if cities else None, state, country)

# Display DataFrame of Locations
df_locations = display_locations(data)

# Display the map and data
if map_result:
    display(map_result)

display(df_locations)

Unnamed: 0,Name,Latitude,Longitude
0,Kroger,39.913798,-86.205519
1,Trader Joe's,39.912493,-86.212464
2,Kroger,39.875170,-86.119302
3,Hana Market,39.754988,-86.242418
4,Needler's Fresh Market,39.771972,-86.151670
...,...,...,...
114,El Rancho Grande,39.808881,-86.240642
115,Apna Bazaar,39.822927,-86.268925
116,Safeway,39.789167,-86.083754
117,Saraga International Grocery,39.650968,-86.120974


In [18]:
def get_bbox(city, state, country):
    """Get bounding box for the city using geopy."""
    geolocator = Nominatim(user_agent="osm-date-filter")
    location = geolocator.geocode(f"{city}, {state}, {country}", exactly_one=True)
    if location:
        bbox = location.raw.get('boundingbox', None)
        if bbox:
            south, north = float(bbox[0]), float(bbox[1])
            west, east = float(bbox[2]), float(bbox[3])
            return west, south, east, north
    raise ValueError("Bounding box could not be found.")

def get_ohsome_elements(bbox, key="shop", value="supermarket", date="2019-12-31"):
    """Query Ohsome API for features with given tag that existed on a date."""
    bbox_str = ",".join(map(str, bbox))
    ohsome_url = "https://api.ohsome.org/v1/elements/geometry"
    params = {
        "bboxes": bbox_str,
        "time": date,
        "filter": f"type:node and {key}={value}",
        "format": "json"
    }

    # Retry if rate-limited
    while True:
        response = requests.get(ohsome_url, params=params)
        if response.status_code == 429:
            print("Rate limited. Sleeping for 10 seconds...")
            time.sleep(10)
            continue
        response.raise_for_status()
        return response.json().get('features', [])

def plot_ohsome_data(features):
    """Plot Ohsome features on a Folium map."""
    if not features:
        print("No features found.")
        return None

    # Get center of first feature
    coords = features[0]['geometry']['coordinates']
    m = folium.Map(location=[coords[1], coords[0]], zoom_start=12)

    for feat in features:
        coords = feat['geometry']['coordinates']
        lat, lon = coords[1], coords[0]
        name = feat['properties'].get('tags', {}).get('name', 'Unknown')
        popup = f"{name} ({lat:.5f}, {lon:.5f})"
        folium.Marker(location=[lat, lon], popup=popup).add_to(m)

    return m

def display_ohsome_data(features):
    """Return a DataFrame with name and coordinates."""
    data = []
    for feat in features:
        coords = feat['geometry']['coordinates']
        lat, lon = coords[1], coords[0]
        name = feat['properties'].get('tags', {}).get('name', 'Unknown')
        data.append([name, lat, lon])
    return pd.DataFrame(data, columns=["Name", "Latitude", "Longitude"])

# ========== USAGE ==========
city = "Indianapolis"
state = "Indiana"
country = "USA"
key = "shop"
value = "supermarket"
cutoff_date = "2019-12-31"

# Step 1: Get bounding box
bbox = get_bbox(city, state, country)

# Step 2: Fetch historical features from Ohsome
features = get_ohsome_elements(bbox, key, value, cutoff_date)

# Step 3: Plot map and show data
map_result = plot_ohsome_data(features)
df = display_ohsome_data(features)

# Display results (if in Jupyter or similar)
if map_result:
    display(map_result)

print(df)


HTTPError: 400 Client Error:  for url: https://api.ohsome.org/v1/elements/geometry?bboxes=-86.3281207%2C39.6321626%2C-85.9380401%2C39.9275253&time=2019-12-31&filter=type%3Anode+and+shop%3Dsupermarket&format=json