# Find Nearest ATM

Use this notebook to enrich the DSK office list with geographic coordinates so that follow-up nearest-ATM lookups can rely on latitude/longitude instead of free-form addresses. The steps below load the office JSON, geocode each address, and persist the results.

> ⚠️ Geocoding calls an external (or locally hosted) Nominatim service. Respect the service usage policy, throttle requests, and prefer a self-hosted instance when possible.

In [None]:
# Optional: install dependencies in the active kernel (uncomment if needed)
# %pip install geopy pandas


In [None]:
import json
from pathlib import Path
from typing import Dict, Tuple

import pandas as pd

try:
    from geopy.geocoders import Nominatim
    from geopy.extra.rate_limiter import RateLimiter
except ModuleNotFoundError as exc:
    raise ModuleNotFoundError('Install geopy in this environment: `pip install geopy`.') from exc


In [None]:
DATA_PATH = Path('dsk_offices_parsed.json')

with DATA_PATH.open(encoding='utf-8') as fp:
    offices_payload = json.load(fp)

entries = offices_payload['entries']
offices_df = pd.DataFrame(entries)
offices_df.head()


Configure the geocoding client. Update `GEOCODER_DOMAIN` if you run a local Nominatim instance (for example `localhost:8080`).

In [None]:
GEOCODER_DOMAIN = 'nominatim.openstreetmap.org'  # set to 'localhost:8080' when using a local instance
GEOCODER_SCHEME = 'https' if GEOCODER_DOMAIN.endswith('openstreetmap.org') else 'http'
USER_AGENT = 'dsk-office-geocoder'
MIN_DELAY_SECONDS = 1.1  # follow the public Nominatim usage policy
REQUEST_TIMEOUT = 10

geolocator = Nominatim(
    user_agent=USER_AGENT,
    domain=GEOCODER_DOMAIN,
    scheme=GEOCODER_SCHEME,
    timeout=REQUEST_TIMEOUT,
)
geocode = RateLimiter(
    geolocator.geocode,
    min_delay_seconds=MIN_DELAY_SECONDS,
    swallow_exceptions=False,
)


In [None]:
def build_query(entry: Dict[str, str]) -> str:
    address_parts = [entry.get('address_line'), entry.get('city'), 'Bulgaria']
    return ', '.join([part for part in address_parts if part])


def geocode_entry(entry: Dict[str, str]) -> Tuple[float, float, str]:
    query = build_query(entry)
    if not query:
        return (None, None, '')
    location = geocode(query)
    if not location:
        return (None, None, '')
    return (location.latitude, location.longitude, location.address or '')


In [None]:
geocode_cache: Dict[str, Tuple[float, float, str]] = {}
enriched_entries = []
for entry in entries:
    query = build_query(entry)
    if query in geocode_cache:
        latitude, longitude, resolved = geocode_cache[query]
    else:
        latitude, longitude, resolved = geocode_entry(entry)
        geocode_cache[query] = (latitude, longitude, resolved)
    enriched_entry = dict(entry)
    enriched_entry['latitude'] = latitude
    enriched_entry['longitude'] = longitude
    if resolved:
        enriched_entry['geocoded_address'] = resolved
    enriched_entries.append(enriched_entry)

enriched_df = pd.DataFrame(enriched_entries)
enriched_df[['office_name', 'latitude', 'longitude']].head()


Persist the enriched payload. By default we keep the original file untouched and write a sibling file. Change `OUTPUT_PATH` to `DATA_PATH` if you want to overwrite in place once you are happy with the results.

In [None]:
OUTPUT_PATH = Path('dsk_offices_parsed_with_coords.json')

offices_payload['entries'] = enriched_entries
with OUTPUT_PATH.open('w', encoding='utf-8') as fp:
    json.dump(offices_payload, fp, ensure_ascii=False, indent=2)
print(f'Wrote {len(enriched_entries)} entries with coordinates to {OUTPUT_PATH}')
