# Geolocating Filevine Contacts

In [1]:
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Nominatim
import pandas as pd
import time

In [2]:
# Load provider data
EXCEL_PATH = './data/cleaned_outbound_referrals.xlsx'
df = pd.read_excel(EXCEL_PATH)
df.head(5)

Unnamed: 0,Person ID,Full Name,Street,City,State,Zip,Referral Count
0,996701857,Waldorf Total Health Chiropractic & Physical T...,12102 Old Line Center,Waldorf,MD,20602,60
1,996605635,Bezak Chiropractic And Rehabilitation,7500 Hanover Parkway,Greenbelt,MD,20770,37
2,996356411,Dunkirk Chiropractic & Wellness Center,10020 Southern Maryland Blvd,Dunkirk,MD,20754,24
3,996605523,"Effective Integrative Healthcare - Lanham, Md",7400 Riverdale Rd,Lanham,MD,20706,21
4,996616309,Rebound Integrative Health & Sports Clinic,7701 Greenbelt Rd,Greenbelt,MD,20770-6521,18


In [3]:
df.dtypes

Person ID          int64
Full Name         object
Street            object
City              object
State             object
Zip               object
Referral Count     int64
dtype: object

In [4]:
# if 'Full Address' not in df.columns:
#     df['Full Address'] = (
#         df['Street'].fillna('') + ', '
#         + df['City'].fillna('') + ', '
#         + df['State'].fillna('') + ' '
#         + df['Zip'].fillna('')
#     )
#     df['Full Address'] = df['Full Address'].str.replace(r',\s*,', ',', regex=True).str.replace(r',\s*$', '', regex=True)

# df

In [5]:
# Summary:
# - Free-first, DMV-focused async geocoding cascade: US Census (structured) → US Census (oneline) → Nominatim (DMV-bounded).
# - Aggressive fallbacks: Full Address → Street+City+State → Street+Zip → Street-only.
# - Adds: Latitude, Longitude, GeocodeSource, GeocodeQuality, ConfidenceScore (normalized 0–1), GeocodeStatus.
# - Enforces Nominatim etiquette (≥1 req/sec) and filters out results outside the DMV bounding box.

import asyncio
import time
import random
from typing import Dict, Optional, Tuple, List

import pandas as pd
import httpx

# -------------------- CONFIG --------------------
# US Census endpoints (free)
CENSUS_STRUCT_URL = "https://geocoding.geo.census.gov/geocoder/locations/address"
CENSUS_ONE_URL    = "https://geocoding.geo.census.gov/geocoder/locations/onelineaddress"
CENSUS_PARAMS_BASE = {
    "benchmark": "Public_AR_Current",
    "format": "json"
}

# Nominatim (free; follow usage policy)
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
NOMINATIM_USER_AGENT = "my-firm-provider-geocoder (benjamin@jaklitschlawgroup.com)"  # <-- put your contact
NOMINATIM_MIN_INTERVAL = 1.2  # ≥1 req/sec (safer than 1.0)

# DMV bounding box (approx): SW & NE corners (lat, lng)
DMV_BBOX = {
    "sw_lat": 38.40, "sw_lng": -77.60,
    "ne_lat": 39.40, "ne_lng": -76.00
}

# Concurrency controls
MAX_CONCURRENCY = 6  # tasks can be scheduled; per-source limiter still enforces pacing

# Column mapping: adapt to your DataFrame schema
DEFAULT_COLS = {
    "street": "Street",              # e.g., "Address 1 Line 1" if that's your field
    "city":   "City",
    "state":  "State",
    "zip":    "Zip",
    "full":   "Full Address"
}

# -------------------- HELPERS --------------------
class AsyncRateLimiter:
    """Enforce a minimum interval between calls across all coroutines."""
    def __init__(self, min_interval_seconds: float = 1.0):
        self.min_interval = float(min_interval_seconds)
        self._lock = asyncio.Lock()
        self._last = 0.0
    async def wait(self):
        async with self._lock:
            now = time.monotonic()
            wait_for = self.min_interval - (now - self._last)
            if wait_for > 0:
                await asyncio.sleep(wait_for)
            self._last = time.monotonic()

def _normalize_columns(df: pd.DataFrame, cols: Dict[str, str]):
    """Ensure address columns exist, are strings, and build a clean Full Address when missing."""
    for key in ("street", "city", "state", "zip"):
        c = cols[key]
        if c not in df.columns:
            raise ValueError(f"Missing required column: {c!r} mapped from {key!r}")
        df[c] = df[c].astype(str).fillna("").str.strip()

    # Build Full Address if not present
    if cols["full"] not in df.columns:
        df[cols["full"]] = (
            df[cols["street"]] + ", " +
            df[cols["city"]]   + ", " +
            df[cols["state"]]  + " " +
            df[cols["zip"]]
        )
    # Clean punctuation/spacing
    df[cols["full"]] = (
        df[cols["full"]]
          .str.replace(r"\s+", " ", regex=True)
          .str.replace(r",\s*,", ",", regex=True)
          .str.replace(r",\s*$", "", regex=True)
          .str.strip()
    )

    # Ensure output columns exist
    for c in ("Latitude", "Longitude", "GeocodeSource", "GeocodeQuality", "ConfidenceScore", "GeocodeStatus"):
        if c not in df.columns:
            df[c] = pd.NA

def _in_dmv_bbox(lat: float, lng: float) -> bool:
    return (DMV_BBOX["sw_lat"] <= lat <= DMV_BBOX["ne_lat"] and
            DMV_BBOX["sw_lng"] <= lng <= DMV_BBOX["ne_lng"])

# Quality helpers
def _score_census() -> Tuple[str, float]:
    # Census often returns a good street/parcel-level point (TIGER-based)
    return ("census:match", 0.85)  # normalized to 0–1

def _score_nominatim(n_type: Optional[str], importance: Optional[float]) -> Tuple[str, float]:
    # Favor house/building/address types; scale by "importance".
    t = (n_type or "").lower()
    base = 0.0
    if t in {"house", "building", "address"}:
        base = 0.75
    elif t in {"residential", "road"}:
        base = 0.55
    else:
        base = 0.45
    imp = float(importance) if importance is not None else 0.3
    # Blend and clamp to [0,1]
    score = max(0.0, min(1.0, 0.5 * base + 0.5 * min(imp, 1.0)))
    return (f"nominatim:{t or 'unknown'}", score)

# -------------------- SOURCE CALLS --------------------
async def _census_structured(
    client: httpx.AsyncClient, street: str, city: str, state: str, zip_code: str
) -> Optional[Dict]:
    if not street:
        return None
    params = dict(CENSUS_PARAMS_BASE)
    params.update({"street": street, "city": city, "state": state, "zip": zip_code})
    try:
        r = await client.get(CENSUS_STRUCT_URL, params=params, timeout=30)
        if r.status_code != 200:
            return None
        data = r.json()
        matches = data.get("result", {}).get("addressMatches", [])
        if not matches:
            return None
        # Take best match
        m0 = matches[0]
        coords = m0.get("coordinates", {})
        lat = float(coords.get("y"))
        lng = float(coords.get("x"))
        quality, score = _score_census()
        return {"lat": lat, "lng": lng, "source": "census_struct", "quality": quality, "score": score}
    except Exception:
        return None

async def _census_oneline(client: httpx.AsyncClient, one_line: str) -> Optional[Dict]:
    if not one_line:
        return None
    params = dict(CENSUS_PARAMS_BASE)
    params.update({"address": one_line})
    try:
        r = await client.get(CENSUS_ONE_URL, params=params, timeout=30)
        if r.status_code != 200:
            return None
        data = r.json()
        matches = data.get("result", {}).get("addressMatches", [])
        if not matches:
            return None
        m0 = matches[0]
        coords = m0.get("coordinates", {})
        lat = float(coords.get("y"))
        lng = float(coords.get("x"))
        quality, score = _score_census()
        return {"lat": lat, "lng": lng, "source": "census_oneline", "quality": quality, "score": score}
    except Exception:
        return None

class _NominatimWrapper:
    """Nominatim with a global rate limiter and DMV bounding."""
    def __init__(self, min_interval=NOMINATIM_MIN_INTERVAL, user_agent=NOMINATIM_USER_AGENT):
        self.limiter = AsyncRateLimiter(min_interval)
        self.headers = {"User-Agent": user_agent, "Accept": "application/json"}

    async def search(self, client: httpx.AsyncClient, q: str) -> Optional[Dict]:
        if not q:
            return None
        params = {
            "q": q,
            "format": "json",
            "limit": 1,
            "addressdetails": 0,
            "countrycodes": "us",
            # DMV bounding box (lng,lat order inside viewbox spec)
            "viewbox": f'{DMV_BBOX["sw_lng"]},{DMV_BBOX["ne_lat"]},{DMV_BBOX["ne_lng"]},{DMV_BBOX["sw_lat"]}',
            "bounded": 1
        }
        await self.limiter.wait()
        try:
            r = await client.get(NOMINATIM_URL, params=params, headers=self.headers, timeout=30)
            if r.status_code != 200:
                return None
            arr = r.json()
            if not arr:
                return None
            item = arr[0]
            lat = float(item.get("lat"))
            lng = float(item.get("lon"))
            n_type = item.get("type")
            importance = item.get("importance")
            quality, score = _score_nominatim(n_type, importance)
            return {"lat": lat, "lng": lng, "source": "nominatim", "quality": quality, "score": score}
        except Exception:
            return None

NOMI = _NominatimWrapper()

# -------------------- PER-ROW GEOCODING --------------------
async def _geocode_one_row(
    idx: int, df: pd.DataFrame, client: httpx.AsyncClient, cols: Dict[str, str], cache: Dict[str, Optional[Dict]]
):
    # If already has coordinates, set status and return
    if pd.notna(df.at[idx, "Latitude"]) and pd.notna(df.at[idx, "Longitude"]):
        if pd.isna(df.at[idx, "GeocodeStatus"]):
            df.at[idx, "GeocodeStatus"] = "skipped_has_coords"
        return

    street = str(df.at[idx, cols["street"]]).strip()
    city   = str(df.at[idx, cols["city"]]).strip()
    state  = str(df.at[idx, cols["state"]]).strip()
    zipc   = str(df.at[idx, cols["zip"]]).strip()
    full   = str(df.at[idx, cols["full"]]).strip()

    # Candidate queries in descending specificity
    queries = [
        ("full_struct", (street, city, state, zipc)),                 # Census structured
        ("full_oneline", (full,)),                                    # Census oneline
        ("street_city_state", (f"{street}, {city}, {state}",)),       # Nominatim
        ("street_zip", (f"{street}, {zipc}",)),                       # Nominatim
        ("street_only", (street,)),                                   # Nominatim
    ]

    best = None

    # 1) Census structured
    key = f"CENSUS_STRUCT::{street}|{city}|{state}|{zipc}"
    if key in cache:
        res = cache[key]
    else:
        res = await _census_structured(client, street, city, state, zipc)
        cache[key] = res
    if res and _in_dmv_bbox(res["lat"], res["lng"]):
        best = res

    # 2) Census oneline (if needed)
    if best is None and full:
        key = f"CENSUS_ONE::{full}"
        if key in cache:
            res = cache[key]
        else:
            res = await _census_oneline(client, full)
            cache[key] = res
        if res and _in_dmv_bbox(res["lat"], res["lng"]):
            best = res

    # 3) Nominatim fallbacks with DMV bounding
    if best is None:
        for label, args in queries[2:]:
            q = args[0]
            k = f"NOMI::{label}::{q}"
            if k in cache:
                res = cache[k]
            else:
                res = await NOMI.search(client, q)
                cache[k] = res
            if res and _in_dmv_bbox(res["lat"], res["lng"]):
                # Take the first DMV-in result with the highest score encountered
                if best is None or res["score"] > best["score"]:
                    best = res

    # Write back
    if best is not None:
        df.at[idx, "Latitude"]        = best["lat"]
        df.at[idx, "Longitude"]       = best["lng"]
        df.at[idx, "GeocodeSource"]   = best["source"]
        df.at[idx, "GeocodeQuality"]  = best["quality"]
        df.at[idx, "ConfidenceScore"] = round(float(best["score"]), 3)
        df.at[idx, "GeocodeStatus"]   = "ok"
    else:
        df.at[idx, "GeocodeSource"]   = "no_result"
        df.at[idx, "GeocodeQuality"]  = pd.NA
        df.at[idx, "ConfidenceScore"] = pd.NA
        df.at[idx, "GeocodeStatus"]   = "no_result"

# -------------------- PUBLIC API --------------------
async def geocode_addresses_free_async(
    df: pd.DataFrame,
    col_map: Dict[str, str] = None,
    max_concurrency: int = MAX_CONCURRENCY
) -> pd.DataFrame:
    """
    Free-first, DMV-biased async geocoder.
    df must contain: Street, City, State, Zip (or map them via col_map).
    """
    cols = dict(DEFAULT_COLS) if col_map is None else col_map
    _normalize_columns(df, cols)

    # Only process rows missing coordinates
    need_idx = df.index[(df["Latitude"].isna() | df["Longitude"].isna())].tolist()

    limits = httpx.Limits(max_connections=max_concurrency, max_keepalive_connections=10)
    cache: Dict[str, Optional[Dict]] = {}

    async with httpx.AsyncClient(timeout=30, limits=limits, headers={"Accept": "application/json"}) as client:
        sem = asyncio.Semaphore(max_concurrency)

        async def worker(i: int):
            async with sem:
                await _geocode_one_row(i, df, client, cols, cache)

        await asyncio.gather(*(worker(i) for i in need_idx))

    return df

def geocode_addresses_free(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
    """Synchronous convenience wrapper for scripts/legacy notebooks."""
    return asyncio.run(geocode_addresses_free_async(df, **kwargs))


In [6]:
# Summary:
# - Maps your existing columns to what the geocoder expects.
# - Runs the async geocoder with a DMV bias and free sources (Census → Nominatim).
# - Returns `df` with new lat/lon and quality/source columns added.

# Tell the geocoder which columns in *your* df contain address parts
col_map = {
    "street": "Street",
    "city":   "City",
    "state":  "State",
    "zip":    "Zip",
    "full":   "Full Address"   # This will be created if it doesn't exist
}

# Run the async geocoding (works in modern Jupyter with top-level `await`)
df = await geocode_addresses_free_async(
    df,
    col_map=col_map,
    max_concurrency=6  # OK to adjust; DMV filters & Nominatim limiter are built-in
)

## Review Results
df


Unnamed: 0,Person ID,Full Name,Street,City,State,Zip,Referral Count,Full Address,Latitude,Longitude,GeocodeSource,GeocodeQuality,ConfidenceScore,GeocodeStatus
0,996701857,Waldorf Total Health Chiropractic & Physical T...,12102 Old Line Center,Waldorf,MD,20602,60,"12102 Old Line Center, Waldorf, MD 20602",39.36482,-76.969956,nominatim,nominatim:post_office,0.225,ok
1,996605635,Bezak Chiropractic And Rehabilitation,7500 Hanover Parkway,Greenbelt,MD,20770,37,"7500 Hanover Parkway, Greenbelt, MD 20770",38.992615,-76.875414,census_struct,census:match,0.85,ok
2,996356411,Dunkirk Chiropractic & Wellness Center,10020 Southern Maryland Blvd,Dunkirk,MD,20754,24,"10020 Southern Maryland Blvd, Dunkirk, MD 20754",38.715339,-76.659473,census_struct,census:match,0.85,ok
3,996605523,"Effective Integrative Healthcare - Lanham, Md",7400 Riverdale Rd,Lanham,MD,20706,21,"7400 Riverdale Rd, Lanham, MD 20706",38.962083,-76.884654,census_struct,census:match,0.85,ok
4,996616309,Rebound Integrative Health & Sports Clinic,7701 Greenbelt Rd,Greenbelt,MD,20770-6521,18,"7701 Greenbelt Rd, Greenbelt, MD 20770-6521",38.994033,-76.874777,census_struct,census:match,0.85,ok
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,996593018,Neuropro Concussion Clinic,1110 Benfield Blvd,Millersville,MD,21108,1,"1110 Benfield Blvd, Millersville, MD 21108",39.094464,-76.631838,census_struct,census:match,0.85,ok
57,996668361,Rebound Integrative Health & Sports Clinic - T...,3611 Branch Avenue,Temple Hills,MD,20748,1,"3611 Branch Avenue, Temple Hills, MD 20748",38.844634,-76.950739,census_struct,census:match,0.85,ok
58,996613952,"Excelsia Injury Care - Suitland, Md","5801 Allentown Road, #302",Suitland,MD,20746,1,"5801 Allentown Road, #302, Suitland, MD 20746",38.806706,-76.902575,census_struct,census:match,0.85,ok
59,996675194,Rx Wellness--Alexandria,3543 W Braddock Rd,Alexandria,VA,22302,1,"3543 W Braddock Rd, Alexandria, VA 22302",38.827099,-77.089397,census_struct,census:match,0.85,ok


In [7]:
# Save updated file
print('Saving updated Excel file...')
df.to_excel('./data/outbound_geolocated.xlsx', index=False)
print('Done.')

Saving updated Excel file...
Done.


In [None]:
raise Exception('End of Functional Code')

In [None]:
# def get_latitude_and_longitude(df, str_col_names = ['Street', 'City', 'State', 'Zip']):

# for col in df.columns:

#     if col in str_col_names:
#         df[col] = df[col].astype(str)
#     else:
#         print(f'Skipping column {col} - not included for conversion.')

#     # Build full address if not present
#     if 'Full Address' not in df.columns:
#         df['Full Address'] = (
#             df['Street'].fillna('') + ', '
#             + df['City'].fillna('') + ', '
#             + df['State'].fillna('') + ' '
#             + df['Zip'].fillna('')
#         )
#         df['Full Address'] = df['Full Address'].str.replace(r',\s*,', ',', regex=True).str.replace(r',\s*$', '', regex=True)

#     # Set up geocoder
#     geolocator = Nominatim(user_agent="provider_geocoder")
#     geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1, max_retries=3)

#     # Add Latitude/Longitude columns if missing
#     if 'Latitude' not in df.columns:
#         df['Latitude'] = None
#     if 'Longitude' not in df.columns:
#         df['Longitude'] = None

#     # Geocode only missing lat/lon
#     for idx, row in df.iterrows():
#         if pd.isnull(row['Latitude']) or pd.isnull(row['Longitude']):
#             address = row['Full Address']
#             try:
#                 location = geocode(address, timeout=10)
#                 if location:
#                     df.at[idx, 'Latitude'] = location.latitude
#                     df.at[idx, 'Longitude'] = location.longitude
#                 else:
#                     print(f"No result for: {address}")
#             except Exception as e:
#                 print(f"Error geocoding {address}: {e}")
#             time.sleep(1)  # Be nice to the API

#     return df

In [None]:
# # Summary:
# # - Geocodes addresses in a DataFrame using geopy + Nominatim.
# # - If the full address fails, retries using only the Street value.
# # - Adds Latitude, Longitude, and GeocodeStatus columns.
# # - Skips rows that already have coordinates.
# # - Caches results to avoid duplicate API calls in the same run.

# def get_latitude_and_longitude(
#     df: pd.DataFrame,
#     str_col_names=('Street', 'City', 'State', 'Zip'),
#     address_col='Full Address',
#     user_agent='my-firm-provider-geocoder (you@firm.com)',  # <-- replace with your info
#     min_delay_seconds=1.0,      # Nominatim-friendly rate limit
#     max_retries=2,              # Retries on network/geocoder errors
#     error_wait_seconds=2.0,     # Wait between retries on error
#     timeout=10,                 # Seconds for each geocode call
#     verbose=True
# ) -> pd.DataFrame:
#     """
#     Geocode a DataFrame of addresses using Nominatim (OpenStreetMap) with a fallback:
#     1) Try the full address (Street, City, State, Zip)
#     2) If no result, retry using only Street.

#     Adds/updates columns:
#       - 'Full Address' (if not present)
#       - 'Latitude', 'Longitude'
#       - 'GeocodeStatus' in {'full_address','street_fallback','no_result','error','skipped_has_coords'}

#     Parameters
#     ----------
#     df : pd.DataFrame
#         Input DataFrame with at least the columns in str_col_names, or an existing 'Full Address'.
#     str_col_names : tuple
#         The names of the columns containing Street/City/State/Zip.
#     address_col : str
#         Name of a prebuilt full-address column (created if missing).
#     user_agent : str
#         Required by Nominatim ToS; include contact info.
#     min_delay_seconds : float
#         Minimum delay between geocoding calls (per Nominatim etiquette).
#     max_retries : int
#         Max retries on transient errors.
#     error_wait_seconds : float
#         Wait time between retries.
#     timeout : int
#         Timeout (seconds) for each geocode call.
#     verbose : bool
#         Print progress messages.

#     Returns
#     -------
#     pd.DataFrame
#         The same DataFrame with Latitude/Longitude/GeocodeStatus populated/updated.
#     """

#     # ---- 1) Ensure required columns are strings (when present) ----------------
#     for col in df.columns:
#         if col in str_col_names:
#             df[col] = df[col].astype(str).fillna('').str.strip()

#     # ---- 2) Build Full Address if not present --------------------------------
#     if address_col not in df.columns:
#         missing = [c for c in ('Street','City','State','Zip') if c not in df.columns]
#         if missing:
#             raise ValueError(
#                 f"Missing required columns to build '{address_col}': {missing}"
#             )
#         df[address_col] = (
#             df['Street'].fillna('').str.strip() + ', ' +
#             df['City'].fillna('').str.strip() + ', ' +
#             df['State'].fillna('').str.strip() + ' ' +
#             df['Zip'].fillna('').str.strip()
#         )
#         # Clean up accidental extra commas/spaces
#         df[address_col] = (
#             df[address_col]
#               .str.replace(r'\s+', ' ', regex=True)
#               .str.replace(r',\s*,', ',', regex=True)
#               .str.replace(r',\s*$', '', regex=True)
#               .str.strip()
#         )

#     # ---- 3) Add coordinate/status columns if missing -------------------------
#     if 'Latitude' not in df.columns:
#         df['Latitude'] = pd.NA
#     if 'Longitude' not in df.columns:
#         df['Longitude'] = pd.NA
#     if 'GeocodeStatus' not in df.columns:
#         df['GeocodeStatus'] = pd.NA

#     # ---- 4) Set up geocoder with a rate limiter ------------------------------
#     geolocator = Nominatim(user_agent=user_agent)
#     geocode = RateLimiter(
#         geolocator.geocode,
#         min_delay_seconds=min_delay_seconds,
#         max_retries=max_retries,
#         error_wait_seconds=error_wait_seconds,
#         swallow_exceptions=True  # we handle errors via None checks
#     )

#     # ---- 5) Cache to avoid duplicate lookups in this run ---------------------
#     # Cache stores: query_text -> (lat, lon) or None if no result
#     cache = {}

#     # ---- 6) Iterate rows needing geocoding -----------------------------------
#     # Only attempt for rows missing either Latitude or Longitude
#     mask_need = df['Latitude'].isna() | df['Longitude'].isna()
#     rows_to_process = df.loc[mask_need].index.tolist()

#     if verbose:
#         print(f"Geocoding {len(rows_to_process)} rows (rows with missing lat/lon).")

#     for idx in rows_to_process:
#         street = str(df.at[idx, 'Street']) if 'Street' in df.columns else ''
#         full_addr = str(df.at[idx, address_col])

#         # If both are empty, skip
#         if not (street or full_addr):
#             df.at[idx, 'GeocodeStatus'] = 'no_result'
#             continue

#         # --- Try full address first ------------------------------------------
#         lat, lon, status = None, None, None
#         query = full_addr.strip()

#         # Check cache
#         if query in cache:
#             cached = cache[query]
#             if cached is not None:
#                 lat, lon = cached
#                 status = 'full_address'
#         else:
#             # Call geocoder
#             try:
#                 loc = geocode(query, timeout=timeout, exactly_one=True, addressdetails=False)
#                 if loc:
#                     lat, lon = loc.latitude, loc.longitude
#                     status = 'full_address'
#                     cache[query] = (lat, lon)
#                 else:
#                     cache[query] = None  # no result for this query
#             except Exception as _:
#                 # swallow_exceptions=True should prevent raising, but keep defensive
#                 cache[query] = None

#         # --- Fallback: Street-only if needed ---------------------------------
#         if lat is None or lon is None:
#             street_query = street.strip()
#             if street_query:
#                 if street_query in cache:
#                     cached = cache[street_query]
#                     if cached is not None:
#                         lat, lon = cached
#                         status = 'street_fallback'
#                 else:
#                     try:
#                         loc2 = geocode(street_query, timeout=timeout, exactly_one=True, addressdetails=False)
#                         if loc2:
#                             lat, lon = loc2.latitude, loc2.longitude
#                             status = 'street_fallback'
#                             cache[street_query] = (lat, lon)
#                         else:
#                             cache[street_query] = None
#                     except Exception as _:
#                         cache[street_query] = None

#         # --- Persist results back to df --------------------------------------
#         if lat is not None and lon is not None:
#             df.at[idx, 'Latitude'] = lat
#             df.at[idx, 'Longitude'] = lon
#             df.at[idx, 'GeocodeStatus'] = status
#         else:
#             # Keep as missing if no result
#             df.at[idx, 'GeocodeStatus'] = 'no_result'

#         # Optional extra delay beyond RateLimiter (usually unnecessary)
#         # time.sleep(0.1)

#     # Mark rows that were already populated before this function
#     already_had = (~mask_need)
#     df.loc[already_had & df['GeocodeStatus'].isna(), 'GeocodeStatus'] = 'skipped_has_coords'

#     if verbose:
#         done = df['GeocodeStatus'].value_counts(dropna=False).to_dict()
#         print(f"Geocoding complete. Status counts: {done}")

#     return df

In [None]:
# get_latitude_and_longitude(df)