# EV Charging – Fixing unmapped postcodes with Google reverse geocoding

This notebook fixes a subset of OpenChargeMap sites whose postcodes could not be matched to the ONS Postcode Directory. It uses the Google Maps Geocoding API to infer a usable postcode from each site’s latitude/longitude.

Input:
- `sites_unmapped_postcodes.csv`: sites with missing, null or invalid `pcds_clean` values.

Output:
- `site_postcode_fixes.csv` with:
  - `site_id`
  - `old_pcds_clean` (current value in the database)
  - `new_pcds_clean` (cleaned postcode inferred from latitude/longitude)

The output is loaded back into Postgres and used to update `sites.pcds_clean`. 

In [None]:
%run "secrets/api_keys.ipynb"

import time
import requests
import pandas as pd

INPUT_CSV = "../Data/clean_data/sites_unmapped_postcodes.csv"
df_unmapped = pd.read_csv(INPUT_CSV)

print("Unmapped sites:", len(df_unmapped))
df_unmapped.head()
df_unmapped.info()


In [None]:
row0 = df_unmapped.iloc[0]
lat = row0["latitude"]
lon = row0["longitude"]

print("Testing with:", lat, lon)

BASE_URL = "https://maps.googleapis.com/maps/api/geocode/json"

params = {
    "latlng": f"{lat},{lon}",
    "key": GOOGLE_MAPS_API_KEY,
}

response = requests.get(BASE_URL, params=params)
data = response.json()

print("Status:", data.get("status"))

first_result = data["results"][0]
print("Formatted address:", first_result.get("formatted_address"))

print("Address components:")
for comp in first_result["address_components"]:
    print(comp.get("long_name"), "→", comp.get("types"))


In [None]:
def extract_postcode_from_data(data):
    
    if data.get("status") != "OK":
        return None

    for result in data.get("results", []):
        for comp in result.get("address_components", []):
            types = comp.get("types", [])
            if "postal_code" in types:
                return comp.get("long_name")

    return None


def clean_postcode_for_pcds(raw_postcode):

    if not isinstance(raw_postcode, str):
        return None
    p = raw_postcode.strip().upper()
    p = p.replace(" ", "")
    if len(p) < 5:
        return None
    return p


def get_clean_postcode_from_latlon(lat, lon, api_key):
    params = {
        "latlng": f"{lat},{lon}",
        "key": api_key,
    }
    resp = requests.get(BASE_URL, params=params)
    data = resp.json()

    raw_pc = extract_postcode_from_data(data)
    clean_pc = clean_postcode_for_pcds(raw_pc)
    return clean_pc


In [None]:

BASE_URL = "https://maps.googleapis.com/maps/api/geocode/json"

params = {
    "latlng": f"{lat},{lon}",
    "key": GOOGLE_MAPS_API_KEY,
}

response = requests.get(BASE_URL, params=params)
data = response.json()


print("Status:", data.get("status"))

first_result = data["results"][0]
print("Formatted address:", first_result.get("formatted_address"))

print("Address components:")
for comp in first_result["address_components"]:
    print(comp.get("long_name"), "→", comp.get("types"))


In [None]:
def extract_postcode_from_data(data):

    if data.get("status") != "OK":
        return None

    for result in data.get("results", []):
        for comp in result.get("address_components", []):
            types = comp.get("types", [])
            if "postal_code" in types:
                return comp.get("long_name")

    return None


In [None]:
postcode_raw = extract_postcode_from_data(data)
print("Raw postcode from API:", postcode_raw)


In [None]:
def clean_postcode_for_pcds(raw_postcode):

    if not isinstance(raw_postcode, str):
        return None
    p = raw_postcode.strip().upper()
    p = p.replace(" ", "")
    if len(p) < 5:
        return None
    return p

In [None]:
print(clean_postcode_for_pcds("SE24 0AA"))
print(clean_postcode_for_pcds("SE24"))


In [None]:
raw_pc = extract_postcode_from_data(data)
clean_pc = clean_postcode_for_pcds(raw_pc)

print("Raw:", raw_pc)
print("Cleaned:", clean_pc)


In [None]:
def get_clean_postcode_from_latlon(lat, lon, api_key):

    params = {
        "latlng": f"{lat},{lon}",
        "key": api_key,
    }
    resp = requests.get(BASE_URL, params=params)
    data = resp.json()

    raw_pc = extract_postcode_from_data(data)
    clean_pc = clean_postcode_for_pcds(raw_pc)
    return clean_pc


In [None]:
row0 = df_unmapped.iloc[0]
lat = row0["latitude"]
lon = row0["longitude"]

clean_pc = get_clean_postcode_from_latlon(lat, lon, GOOGLE_MAPS_API_KEY)
print("Clean postcode for row 0:", clean_pc)


In [None]:
for i in range(5):
    row = df_unmapped.iloc[i]
    lat, lon = row["latitude"], row["longitude"]
    print(i, lat, lon, "→", get_clean_postcode_from_latlon(lat, lon, GOOGLE_MAPS_API_KEY))


In [None]:
new_postcodes = []

for i, row in df_unmapped.iterrows():
    lat = row["latitude"]
    lon = row["longitude"]

    clean_pc = get_clean_postcode_from_latlon(lat, lon, GOOGLE_MAPS_API_KEY)
    new_postcodes.append(clean_pc)

    if i % 50 == 0:
        print(f"Processed {i} / {len(df_unmapped)} rows")


In [None]:
df_unmapped["new_pcds_clean"] = new_postcodes
df_unmapped[["site_id", "current_pcds_clean", "new_pcds_clean"]].head(10)


In [None]:

len(df_unmapped)

In [None]:
num_success = df_unmapped["new_pcds_clean"].notna().sum()

In [None]:
num_missing = df_unmapped["new_pcds_clean"].isna().sum()

In [None]:
num_success, num_missing

In [None]:
df_fixes = df_unmapped[["site_id", "current_pcds_clean", "new_pcds_clean"]].rename(
    columns={"current_pcds_clean": "old_pcds_clean"}
)

df_fixes.head()


In [None]:
OUTPUT_CSV = "../Data/clean_data/site_postcode_fixes.csv"
df_fixes.to_csv(OUTPUT_CSV, index=False)
OUTPUT_CSV