In [3]:
# 1. Import Libraries

import pandas as pd
import numpy as np

# 2. Load Cleaned Dataset (After EDA)

try:
    df = pd.read_csv("cleaned_uk_stations_enriched.csv")
except:
    df = pd.read_csv("uk_stations_enriched.csv")  # backup load

print("Dataset loaded. Shape:", df.shape)
df.head()
   

Dataset loaded. Shape: (10000, 29)


Unnamed: 0,ocm_id,operator,usage_type,status,is_operational,address1,address2,town,state_province,postcode,...,borough,borough_density_km2,uk_avg_util_pct,uk_avg_energy_kWh,uk_usage_rows,priority_score,last_status_update,last_verified,submission_status,data_provider
0,253415,Ev Dot,Public - Pay At Location,Not Operational,False,Rainsford Road,,Chelmsford,England,CM1 2XB,...,Outer London,9318.948649,50.216667,36.881026,78,0.131437,2023-05-04 08:44:00+00:00,2023-05-04 08:44:00+00:00,Imported and Published,UK National Charge Point Registry
1,4396,Independent Operator,Public,Operational,True,Spring Garden,Westminster,London,London,SW1A 2BN,...,Westminster,13608.4,50.216667,36.881026,78,0.209204,2011-05-17 17:23:00+00:00,2011-05-17 17:23:00+00:00,Submission Published,Open Charge Map Contributors
2,52877,Bp Pulse (Uk),Public - Membership Required,Operational,True,Spring Gardens,City of Westminster,London,London,SW1A 2TS,...,Westminster,13608.4,50.216667,36.881026,78,0.22101,2023-04-03 16:58:00+00:00,2023-04-03 16:58:00+00:00,Submission Published,Open Charge Map Contributors
3,146490,Virta,"Private - For Staff, Visitors Or Customers",Operational,True,440 Strand,Covent Garden,London,London,WC2R 0QS,...,Westminster,9318.948649,50.216667,36.881026,78,0.131437,2020-01-10 10:18:00+00:00,2020-01-10 10:18:00+00:00,Submission Published,Open Charge Map Contributors
4,4399,Bp Pulse (Uk),Public - Membership Required,Operational,True,Whitcomb Street,Westminster,London,London,WC2H 7DT,...,Westminster,13608.4,50.216667,36.881026,78,0.212507,2023-04-03 17:00:00+00:00,2023-04-03 17:00:00+00:00,Submission Published,Open Charge Map Contributors


In [6]:
# 3. Feature: Power Category
# Slow (<7kW), Fast (7-22kW), Rapid (22-50kW), UltraRapid (>50kW)
def categorize_power(power):
    try:
        p = float(power)
        if p < 7:
            return "Slow"
        elif p < 22:
            return "Fast"
        elif p < 50:
            return "Rapid"
        else:
            return "Ultra-Rapid"
    except:
        return "Unknown"
df["power_category"] = df["max_power_kw"].apply(categorize_power)
df[["max_power_kw", "power_category"]].head()

Unnamed: 0,max_power_kw,power_category
0,7.0,Fast
1,4.8,Slow
2,7.0,Fast
3,7.0,Fast
4,4.0,Slow


In [9]:
# 4. Feature: Borough Station Density
# Number of stations present in each borough
borough_counts = df.groupby("borough").size().rename("stations_per_borough")
df = df.merge(borough_counts, on="borough", how="left")
# Preview
print(df[["borough", "stations_per_borough"]].head())

        borough  stations_per_borough
0  Outer London                  7950
1   Westminster                   476
2   Westminster                   476
3   Westminster                   476
4   Westminster                   476


In [11]:
# 5. Feature: Operational Flag
# Convert boolean to numeric (1 = operational, 0 = not operational)
if "is_operational" in df.columns:
    df["operational_flag"] = df["is_operational"].astype(int)
else:
    df["operational_flag"] = 0

# 6. One-Hot Encoding for Usage Type
# Convert usage_type column to machine-learning-friendly format

if "usage_type" in df.columns:
    df = pd.get_dummies(df, columns=["usage_type"], prefix="usage", drop_first=True)



In [13]:
# 6. Cleaning Remaining Unknown / Unspecified Values
# Unknown’ here doesn’t mean a random category — it actually means the borough is missing / not available.
replacement_map = {
    "Unknown": "Unknown",
    "unknown": "Unknown",
    "Unspecified": "Unknown",
    "": "Unknown",
    np.nan: "Unknown",
}

cols_to_clean = ["operator", "usage_type", "borough", "town", "state_province"]

for col in cols_to_clean:
    if col in df.columns:
        df[col] = df[col].replace(replacement_map)

# Borough: Unknown → Missing
if "borough" in df.columns:
    df["borough"] = df["borough"].replace({"Unknown": "Missing"})



In [15]:
# 7. Feature: Connector Count
# Count the number of connectors from comma-separated list
#Instead of giving this long text to the model, I extract the number of connectors.”
#The model now gets a numeric feature connector count which is more meaningful.
try:
    df["connector_count"] = df["connector_types"].str.count(",") + 1
except:
    df["connector_count"] = 1

In [18]:
# 8. AVERAGE POWER PER CONNECTOR
# gives measure of charger capacity.
#This helps the model understand which stations actually deliver higher per connector charging power.”

df["avg_power_per_connector"] = df.apply(
    lambda row: row["max_power_kw"] / row["connector_count"] 
    if row["connector_count"] > 0 
    else np.nan,
    axis=1
)
#print(df[["max_power_kw", "connector_count", "avg_power_per_connector"]].head())


In [20]:
# 9. FAST CHARGER FLAG
#This feature indicates whether the station supports fast charging

df["has_fast_charger"] = df["max_power_kw"].apply(
    lambda x: 1 if pd.to_numeric(x, errors="coerce") >= 22 else 0
)

#print(df[["max_power_kw", "has_fast_charger"]].head())
df[df["max_power_kw"] >= 22].head()



Unnamed: 0,ocm_id,operator,status,is_operational,address1,address2,town,state_province,postcode,country,...,stations_per_borough,operational_flag,usage_Private - Restricted Access,usage_Privately Owned - Notice Required,usage_Public,usage_Public - Membership Required,usage_Public - Pay At Location,connector_count,avg_power_per_connector,has_fast_charger
6,104898,Bp Pulse (Uk),Operational,True,Saint Martins Lane Hotel,,45 Saint Martin's Lane,London,WC2N 4HX,GB,...,476,1,False,False,False,True,False,1.0,50.0,1
13,128396,Bp Pulse (Uk),Operational,True,Q-Park Chinatown,,20 Newport Place,London,WC2H 7PR,GB,...,476,1,False,False,False,True,False,1.0,50.0,1
14,46743,Total Energies (Uk),Operational,True,Southampton Street,City of Westminster,London,London,WC2E 7HE,GB,...,476,1,False,False,False,False,True,1.0,22.0,1
17,253610,Bp Pulse (Uk),Not Operational,False,20 Newport Place,,London,London,WC2H 7PR,GB,...,476,0,False,False,False,True,False,1.0,22.0,1
24,132780,Total Energies (Uk),Operational,True,33 Golden Square,,London,London,W1F 9JT,GB,...,476,1,False,False,False,True,False,1.0,22.0,1


In [22]:
# Save Final Engineered Dataset
output_path = "uk_stations_final_engineered.csv"
df.to_csv(output_path, index=False)
print(f"Final engineered dataset saved as: {output_path}")

Final engineered dataset saved as: uk_stations_final_engineered.csv


In [24]:
df.head(10)

Unnamed: 0,ocm_id,operator,status,is_operational,address1,address2,town,state_province,postcode,country,...,stations_per_borough,operational_flag,usage_Private - Restricted Access,usage_Privately Owned - Notice Required,usage_Public,usage_Public - Membership Required,usage_Public - Pay At Location,connector_count,avg_power_per_connector,has_fast_charger
0,253415,Ev Dot,Not Operational,False,Rainsford Road,,Chelmsford,England,CM1 2XB,GB,...,7950,0,False,False,False,False,True,1.0,7.0,0
1,4396,Independent Operator,Operational,True,Spring Garden,Westminster,London,London,SW1A 2BN,GB,...,476,1,False,False,True,False,False,,,0
2,52877,Bp Pulse (Uk),Operational,True,Spring Gardens,City of Westminster,London,London,SW1A 2TS,GB,...,476,1,False,False,False,True,False,1.0,7.0,0
3,146490,Virta,Operational,True,440 Strand,Covent Garden,London,London,WC2R 0QS,GB,...,476,1,False,False,False,False,False,1.0,7.0,0
4,4399,Bp Pulse (Uk),Operational,True,Whitcomb Street,Westminster,London,London,WC2H 7DT,GB,...,476,1,False,False,False,True,False,1.0,4.0,0
5,170689,Pod Point (Uk),Operational,True,Whitehall Place,,Westminster,London,SW1A 2BD,GB,...,476,1,False,False,False,True,False,1.0,7.0,0
6,104898,Bp Pulse (Uk),Operational,True,Saint Martins Lane Hotel,,45 Saint Martin's Lane,London,WC2N 4HX,GB,...,476,1,False,False,False,True,False,1.0,50.0,1
7,253530,Shell Recharge Solutions (Uk),Not Operational,False,Junc. Bedfordbury,,London,London,WC2N 4DQ,GB,...,476,0,False,False,False,False,True,1.0,5.0,0
8,99712,Pod Point (Uk),Operational,True,5 - 7 Carlton Gardens,,London,London,SW1Y 5AD,GB,...,476,1,False,False,False,False,False,1.0,7.0,0
9,107862,Bp Pulse (Uk),Operational,True,St James Square,Westminster,London,London,SW1Y 4PD,GB,...,476,1,False,False,False,True,False,1.0,7.0,0


In [30]:
df.columns

Index(['ocm_id', 'operator', 'status', 'is_operational', 'address1',
       'address2', 'town', 'state_province', 'postcode', 'country', 'latitude',
       'longitude', 'title', 'num_points', 'connector_types', 'max_power_kw',
       'all_connector_powers_kw', 'connection_statuses', 'borough',
       'borough_density_km2', 'uk_avg_util_pct', 'uk_avg_energy_kWh',
       'uk_usage_rows', 'priority_score', 'last_status_update',
       'last_verified', 'submission_status', 'data_provider', 'power_category',
       'stations_per_borough', 'operational_flag',
       'usage_Private - Restricted Access',
       'usage_Privately Owned - Notice Required', 'usage_Public',
       'usage_Public - Membership Required', 'usage_Public - Pay At Location',
       'connector_count', 'avg_power_per_connector', 'has_fast_charger'],
      dtype='object')

In [34]:
df.head(10)

Unnamed: 0,ocm_id,operator,status,is_operational,address1,address2,town,state_province,postcode,country,...,stations_per_borough,operational_flag,usage_Private - Restricted Access,usage_Privately Owned - Notice Required,usage_Public,usage_Public - Membership Required,usage_Public - Pay At Location,connector_count,avg_power_per_connector,has_fast_charger
0,253415,Ev Dot,Not Operational,False,Rainsford Road,,Chelmsford,England,CM1 2XB,GB,...,7950,0,False,False,False,False,True,1.0,7.0,0
1,4396,Independent Operator,Operational,True,Spring Garden,Westminster,London,London,SW1A 2BN,GB,...,476,1,False,False,True,False,False,,,0
2,52877,Bp Pulse (Uk),Operational,True,Spring Gardens,City of Westminster,London,London,SW1A 2TS,GB,...,476,1,False,False,False,True,False,1.0,7.0,0
3,146490,Virta,Operational,True,440 Strand,Covent Garden,London,London,WC2R 0QS,GB,...,476,1,False,False,False,False,False,1.0,7.0,0
4,4399,Bp Pulse (Uk),Operational,True,Whitcomb Street,Westminster,London,London,WC2H 7DT,GB,...,476,1,False,False,False,True,False,1.0,4.0,0
5,170689,Pod Point (Uk),Operational,True,Whitehall Place,,Westminster,London,SW1A 2BD,GB,...,476,1,False,False,False,True,False,1.0,7.0,0
6,104898,Bp Pulse (Uk),Operational,True,Saint Martins Lane Hotel,,45 Saint Martin's Lane,London,WC2N 4HX,GB,...,476,1,False,False,False,True,False,1.0,50.0,1
7,253530,Shell Recharge Solutions (Uk),Not Operational,False,Junc. Bedfordbury,,London,London,WC2N 4DQ,GB,...,476,0,False,False,False,False,True,1.0,5.0,0
8,99712,Pod Point (Uk),Operational,True,5 - 7 Carlton Gardens,,London,London,SW1Y 5AD,GB,...,476,1,False,False,False,False,False,1.0,7.0,0
9,107862,Bp Pulse (Uk),Operational,True,St James Square,Westminster,London,London,SW1Y 4PD,GB,...,476,1,False,False,False,True,False,1.0,7.0,0
