In [None]:
#!/usr/bin/env python3
# map_combined_postcodes_scrollable_search.py — plot Ofcom + VOA + EPC postcode data with scrollable, color-coded popups + search

import csv, json, os, re, time, urllib.parse, urllib.request
from html import escape

# ---------------- CONFIG ----------------
INPUT_CSV = r""
OUTPUT_CSV = r""
OUTPUT_HTML = r""

USER_AGENT = ""
MAPBOX_TOKEN = ""
MAPBOX_BASE = "https://api.mapbox.com/geocoding/v5/mapbox.places/"
DELAY_SECONDS = 0.6

# ---- Visual styles ----
RADIUS = 5
DOT_COLOUR = "#2A7AE2"  # blue for all markers
COLOUR_OFCOM = "#2A7AE2"
COLOUR_VOA = "#FF8C00"
COLOUR_EPC = "#E02424"

# ---------------- HELPERS ----------------
def normalize_postcode(pc):
    if not pc: return ""
    s = re.sub(r"[^A-Z0-9]", "", pc.upper())
    return s[:-3].strip() + " " + s[-3:] if len(s) > 3 else s

def do_request(url, headers=None, timeout=30):
    headers = headers or {"User-Agent": USER_AGENT}
    req = urllib.request.Request(url, headers=headers)
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return resp.read().decode("utf-8"), None
    except Exception as e:
        return None, str(e)

def mapbox_search(query):
    q = urllib.parse.quote(query)
    url = f"{MAPBOX_BASE}{q}.json?access_token={MAPBOX_TOKEN}&limit=1&country=GB"
    body, err = do_request(url)
    if err: return None, url, err
    try:
        data = json.loads(body)
        f = data.get("features", [])
        return (f[0], url, None) if f else (None, url, "no_results")
    except Exception as e:
        return None, url, str(e)

def geocode_postcode(postcode):
    if not postcode:
        return None, None, "no_postcode"
    feat, _, _ = mapbox_search(postcode)
    if feat and "center" in feat:
        lon, lat = feat["center"]
        return lat, lon, "mapbox_postcode"
    return None, None, "no_result"

# ---------------- Formatting helpers ----------------
def format_epc_dict(epc_str):
    if not epc_str or "{" not in epc_str:
        return "<p><i>No EPC data</i></p>"

    allowed_fields = [
        "ADDRESS","FLOOR_AREA","STANDARD_EMISSIONS","TARGET_EMISSIONS",
        "TYPICAL_EMISSIONS","BUILDING_EMISSIONS","ASSET_RATING_BAND",
        "ASSET_RATING","MAIN_HEATING_FUEL","BUILDING_ENVIRONMENT",
        "PROPERTY_TYPE","TRANSACTION_TYPE","INSPECTION_DATE"
    ]
    units = {
        "FLOOR_AREA": "m²",
        "STANDARD_EMISSIONS": "kg CO₂/m²/year",
        "TARGET_EMISSIONS": "kg CO₂/m²/year",
        "TYPICAL_EMISSIONS": "kg CO₂/m²/year",
        "BUILDING_EMISSIONS": "kg CO₂/m²/year",
    }

    parts = []
    try:
        dicts = re.findall(r"\{[^\}]+\}", epc_str)
        for d in dicts:
            epc = json.loads(d.replace("'", '"'))
            rows = []
            for key in allowed_fields:
                if key in epc and epc[key] not in ("", None, " "):
                    label = key.replace("_", " ").title()
                    unit = units.get(key, "")
                    value = f"{epc[key]} {unit}".strip()
                    rows.append(
                        f"<tr><td style='border:1px solid #ccc;padding:3px;width:45%;'><b>{escape(label)}</b></td>"
                        f"<td style='border:1px solid #ccc;padding:3px;'>{escape(value)}</td></tr>"
                    )
            if rows:
                parts.append("<table style='width:100%;border-collapse:collapse;font-size:11px;margin-top:4px;'>"
                             + "".join(rows) + "</table><br>")
    except Exception as e:
        return f"<pre>Error parsing EPC data: {escape(str(e))}</pre>"

    return "".join(parts) if parts else "<p><i>No EPC data</i></p>"

def build_voa_table(voa_raw):
    if not voa_raw:
        return "<p><i>No VOA data</i></p>"

    rows_html = []
    records = voa_raw.split("||")
    for raw in records:
        fields = raw.strip("*").split("*")
        if len(fields) < 10:
            continue
        try:
            desc = fields[5] if len(fields) > 5 else ""
            addr = fields[7] if len(fields) > 7 else ""
            rv = fields[17] if len(fields) > 17 else ""
            eff_date = fields[15] if len(fields) > 15 else ""
        except Exception:
            desc = addr = rv = eff_date = ""
        rows_html.append(
            "<table style='width:100%;border-collapse:collapse;font-size:11px;margin-top:6px;'>"
            f"<tr><td><b>Description</b></td><td>{escape(desc)}</td></tr>"
            f"<tr><td><b>Address</b></td><td>{escape(addr)}</td></tr>"
            f"<tr><td><b>Rateable Value</b></td><td>£{escape(rv)}</td></tr>"
            f"<tr><td><b>Effective Date</b></td><td>{escape(eff_date)}</td></tr>"
            "</table>"
        )
    return "".join(rows_html)

def build_popup(rec):
    pc = rec.get("postcode", "")
    ofcom = rec.get("ofcom_data", "")
    voa_raw = rec.get("voa_raw", "")
    epc_raw = rec.get("epc_raw", "")

    parts = [
        # container: fixed width, scrollable, allow word wrap, box-sizing so padding behaves predictably
        ("<div style='max-height:350px;overflow-y:auto;width:340px;"
         "font-family:sans-serif;font-size:12px;word-wrap:break-word;box-sizing:border-box;padding-right:10px;'>"),
        # heading: block so padding/margin applies to the whole line, allow wrapping,
        # and append a small inline-block spacer so the right side always has breathing room.
        (f"<h3 style='margin:0;font-size:15px;display:block;overflow-wrap:break-word;"
         f"word-break:break-word;padding-right:6px;'>"
         f"Data centre information for postcode: {escape(pc)}"
         f"<span style=\"display:inline-block;width:18px;vertical-align:middle;\"></span>"
         f"</h3>")
    ]

    if ofcom:
        parts.append(f"<h4 style='margin-top:8px;color:{COLOUR_OFCOM};'>Enriched Ofcom List</h4>")
        parts.append(f"<p style='white-space:pre-wrap;margin:0 0 6px 0;'>{escape(ofcom)}</p>")

    if voa_raw:
        parts.append(f"<h4 style='margin-top:8px;color:{COLOUR_VOA};'>VOA Valuation</h4>")
        parts.append(build_voa_table(voa_raw))

    if epc_raw:
        parts.append(f"<h4 style='margin-top:8px;color:{COLOUR_EPC};'>EPC Records</h4>")
        parts.append(format_epc_dict(epc_raw))

    parts.append("</div>")
    return "".join(parts)

# ---------------- MAIN ----------------
def main():
    print("Reading:", INPUT_CSV)
    with open(INPUT_CSV, newline="", encoding="utf-8", errors="replace") as f:
        rows = list(csv.DictReader(f))
    print("Loaded", len(rows), "rows")

    results = []
    for idx, r in enumerate(rows, start=1):
        pc = normalize_postcode(r.get("postcode", ""))
        if not pc:
            continue
        print(f"[{idx}/{len(rows)}] Geocoding {pc}...")
        lat, lon, method = geocode_postcode(pc)
        if lat and lon:
            r.update({"lat": lat, "lon": lon, "method": method})
            results.append(r)
        else:
            print(f"  ❌ No coords for {pc}")
        time.sleep(DELAY_SECONDS)

    if not results:
        print("No geocoded results found.")
        return

    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as out:
        fieldnames = list(results[0].keys())
        writer = csv.DictWriter(out, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(results)
    print("Wrote geocoded CSV:", OUTPUT_CSV)

    lats = [float(r["lat"]) for r in results]
    lons = [float(r["lon"]) for r in results]
    center_lat, center_lon = sum(lats)/len(lats), sum(lons)/len(lons)

    html_head = f"""<!doctype html>
<html><head>
<meta charset="utf-8"/>
<title>UK Data Centres by Postcode</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<style>
#map{{position:absolute;top:0;bottom:0;left:0;right:0}}
.search-container{{position:absolute;top:10px;left:50%;transform:translateX(-50%);z-index:1000;}}
.search-container input{{padding:6px 10px;font-size:14px;width:240px;border:1px solid #aaa;border-radius:4px;}}
</style>
</head><body>
<div id="map"></div>
<div class="search-container"><input id="searchBox" type="text" placeholder="Search postcode..."></div>
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<script>
var map = L.map('map').setView([{center_lat}, {center_lon}], 7);
L.tileLayer('https://tile.openstreetmap.org/{{z}}/{{x}}/{{y}}.png', {{maxZoom:19, attribution: '&copy; OpenStreetMap contributors'}}).addTo(map);
""".replace("{center_lat}", str(center_lat)).replace("{center_lon}", str(center_lon))

    # Add markers
    marker_lines = []
    for r in results:
        lat, lon = r["lat"], r["lon"]
        popup_html = build_popup(r).replace("\n", " ").replace('"', '&quot;')
        marker_lines.append(
            f"var marker = L.circleMarker([{lat}, {lon}], {{radius:{RADIUS}, color:'{DOT_COLOUR}', fillColor:'{DOT_COLOUR}', fillOpacity:0.9}})"
            f".addTo(map).bindPopup(\"{popup_html}\");"
        )

    # Add search JS
    html_tail = """
var searchBox = document.getElementById('searchBox');
searchBox.addEventListener('input', function(){
  var q = this.value.trim().toLowerCase();
  var found = null;
  map.eachLayer(function(layer){
    if (layer.getPopup && layer.getPopup()){
      var content = layer.getPopup().getContent().toLowerCase();
      if (content.includes(q) && q){
        found = layer;
        layer.setStyle({opacity:1, fillOpacity:0.9});
      } else {
        layer.setStyle({opacity:q?0.2:1, fillOpacity:q?0.2:0.9});
      }
    }
  });
  if (found && q.length > 2){
    map.setView(found.getLatLng(), 15, {animate:true});
    found.openPopup();
  }
});
</script></body></html>
"""

    with open(OUTPUT_HTML, "w", encoding="utf-8") as f:
        f.write(html_head)
        for line in marker_lines:
            f.write(line + "\n")
        f.write(html_tail)

    print("✅ Wrote HTML map:", OUTPUT_HTML)

if __name__ == "__main__":
    main()
