In [None]:
#!/usr/bin/env python3

import ssl
import csv
import json
import re
import time
import urllib.parse
import urllib.request
from html import escape

# ---------------- CONFIG ----------------
INPUT_CSV = r""
OUTPUT_CSV = ""
OUTPUT_HTML = "data_centres_map2.html"
USER_AGENT = ""
NOMINATIM_BASE = "https://nominatim.openstreetmap.org/search"
MAPBOX_TOKEN = ""
MAPBOX_BASE = "https://api.mapbox.com/geocoding/v5/mapbox.places/"
DELAY_SECONDS = 1.0 # delay due to API throttling
COUNTRYCODE = "gb"
MAX_ENTRIES = 1000

POSTCODE_RE = re.compile(r'([A-Z]{1,2}\d{1,2}[A-Z]?\s*\d[A-Z]{2})', re.IGNORECASE)

# ---------(UNSUSTAINABLE) SSL CONTEXT ----------------
ssl._create_default_https_context = ssl._create_unverified_context
ssl_ctx = ssl._create_unverified_context()

# ---------------- HELPERS ----------------
def normalize_postcode(pc: str) -> str:
    if not pc:
        return ""
    s = re.sub(r"[^A-Z0-9]", "", pc.upper())
    if len(s) <= 3:
        return s
    return s[:-3].strip() + " " + s[-3:]

def extract_fields(entry):
    parts = entry.split("*")
    org = parts[6].strip() if len(parts) > 6 else ""
    street = parts[7].strip() if len(parts) > 7 else ""
    town = parts[11].strip() if len(parts) > 11 else ""
    # search postcode anywhere
    pc = ""
    for p in reversed(parts):
        if not p:
            continue
        m = POSTCODE_RE.search(p)
        if m:
            pc = m.group(1).upper().strip()
            break
    return org, street, town, pc

def do_request(params, base_url):
    url = base_url + "?" + urllib.parse.urlencode(params, safe=",")
    headers = {"User-Agent": USER_AGENT}
    req = urllib.request.Request(url, headers=headers)
    try:
        with urllib.request.urlopen(req, context=ssl_ctx, timeout=30) as resp:
            data = json.loads(resp.read().decode("utf-8"))
            return data, url
    except Exception as e:
        return {"_error": str(e)}, url

# ---------------- GEOCODE FUNCTIONS ----------------
def geocode_structured(street, town, postcode):
    params = {}
    if street: params["street"] = street
    if town: params["city"] = town
    if postcode: params["postalcode"] = postcode
    if COUNTRYCODE: params["countrycodes"] = COUNTRYCODE

    data, url = do_request(params, NOMINATIM_BASE)
    if isinstance(data, dict) and "_error" in data:
        return None, None, f"error_structured: {data['_error']}", url
    if data:
        first = data[0]
        lat = float(first.get("lat"))
        lon = float(first.get("lon"))
        return lat, lon, "structured", url
    return None, None, "no_results_structured", url

def geocode_freeform(street, town, postcode):
    q = ", ".join(p for p in (street, town, postcode) if p)
    params = {"q": q, "countrycodes": COUNTRYCODE}
    data, url = do_request(params, NOMINATIM_BASE)
    if isinstance(data, dict) and "_error" in data:
        return None, None, f"error_freeform: {data['_error']}", url
    if data:
        first = data[0]
        lat = float(first.get("lat"))
        lon = float(first.get("lon"))
        return lat, lon, "freeform", url
    return None, None, "no_results_freeform", url

def geocode_mapbox(postcode):
    query = urllib.parse.quote(postcode)
    url = f"{MAPBOX_BASE}{query}.json?access_token={MAPBOX_TOKEN}&limit=1&country=GB"
    headers = {"User-Agent": USER_AGENT}
    req = urllib.request.Request(url, headers=headers)
    try:
        with urllib.request.urlopen(req, context=ssl_ctx, timeout=30) as resp:
            data = json.loads(resp.read().decode("utf-8"))
            features = data.get("features", [])
            if features:
                coords = features[0]["center"]
                lon, lat = coords[0], coords[1]
                return lat, lon, "mapbox_fallback", url
    except Exception as e:
        return None, None, f"error_mapbox: {e}", url
    return None, None, "no_results_mapbox", url

def geocode_with_fallback(entry):
    org, street, town, raw_pc = extract_fields(entry)
    pc = normalize_postcode(raw_pc) if raw_pc else ""
    # 1) structured
    lat, lon, status, url = geocode_structured(street, town, pc)
    print("STRUCTURED attempt ->", url)
    if lat and lon:
        return f"{org}, {street}, {town}, {pc}", lat, lon, "ok", "structured"
    time.sleep(DELAY_SECONDS)
    # 2) freeform
    lat, lon, status, url = geocode_freeform(street, town, pc)
    print("FREEFORM attempt ->", url)
    if lat and lon:
        return f"{org}, {street}, {town}, {pc}", lat, lon, "ok", "freeform"
    time.sleep(DELAY_SECONDS)
    # 3) Mapbox fallback (postcode only)
    if pc:
        lat, lon, status, url = geocode_mapbox(pc)
        print("MAPBOX fallback ->", url)
        if lat and lon:
            return f"{org}, {street}, {town}, {pc}", lat, lon, "ok_mapbox", "mapbox_fallback"
    return f"{org}, {street}, {town}, {pc}", None, None, "failed", "all_attempts_failed"

# ---------------- MAIN ----------------
def main():
    rows = []
    with open(INPUT_CSV, newline="", encoding="utf-8") as fh:
        reader = csv.reader(fh)
        for row in reader:
            if row and row[0].strip():
                rows.append(row[0].strip())
    print(f"Loaded {len(rows)} entries (first {MAX_ENTRIES}).")

    results = []
    for idx, entry in enumerate(rows[:MAX_ENTRIES], start=1):
        print("\n==== Entry", idx, "====")
        print(entry)
        used, lat, lon, status, note = geocode_with_fallback(entry)
        print("Result:", status, note, "->", used, lat, lon)
        results.append((entry, used, lat or "", lon or "", status, note))
        time.sleep(DELAY_SECONDS)

    # write CSV
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as fh:
        writer = csv.writer(fh)
        writer.writerow(["original_entry", "used_address_or_display", "lat", "lon", "status", "note"])
        writer.writerows(results)
    print("Wrote", OUTPUT_CSV)

    # build HTML map
    points = [r for r in results if r[2] != "" and r[3] != ""]
    if points:
        lats = [float(p[2]) for p in points]
        lons = [float(p[3]) for p in points]
        center_lat = sum(lats) / len(lats)
        center_lon = sum(lons) / len(lons)
    else:
        center_lat, center_lon = 54.5, -4.0

    html_head = f"""<!doctype html>
<html>
<head>
<meta charset="utf-8"/>
<title>VOA Map</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<style>#map{{position:absolute;top:0;bottom:0;left:0;right:0}}</style>
</head><body><div id="map"></div>
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<script>
var map = L.map('map').setView([{center_lat}, {center_lon}], 7);
L.tileLayer('https://tile.openstreetmap.org/{{z}}/{{x}}/{{y}}.png', {{maxZoom:19, attribution: '&copy; OpenStreetMap contributors'}}).addTo(map);
""".replace("{center_lat}", str(center_lat)).replace("{center_lon}", str(center_lon))

    markers_js = []
    for entry, used, lat, lon, status, note in results:
        if lat and lon:
            popup = escape(used) + "<br/>" + escape(entry)
            markers_js.append(f'L.marker([{lat}, {lon}]).addTo(map).bindPopup("{popup}");')

    html_tail = "</script></body></html>"

    with open(OUTPUT_HTML, "w", encoding="utf-8") as fh:
        fh.write(html_head)
        for m in markers_js:
            fh.write(m + "\n")
        fh.write(html_tail)

    print("Wrote", OUTPUT_HTML, " â€” open in browser to inspect markers & popups.")

if __name__ == "__main__":
    main()
