In [11]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [13]:
pip install xgboost

Collecting xgboost
  Downloading xgboost-3.1.2-py3-none-macosx_12_0_arm64.whl.metadata (2.1 kB)
Downloading xgboost-3.1.2-py3-none-macosx_12_0_arm64.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-3.1.2
Note: you may need to restart the kernel to use updated packages.


In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor


In [17]:
df = pd.read_csv("uk_stations_clustered.csv")
print("Dataset loaded:", df.shape)

Dataset loaded: (10000, 23)


In [19]:
# Target variable
target = "priority_score"

# Features (same as DNN)
features = [
    "is_operational",
    "latitude",
    "longitude",
    "num_points",
    "max_power_kw",
    "borough_density_km2",
    "uk_avg_util_pct",
    "uk_avg_energy_kWh",
    "stations_per_borough",
    "operational_flag",
    "avg_power_per_connector",
    "has_fast_charger",
    "cluster"
]

X = df[features]
y = df[target]

print("Using features:", features)

Using features: ['is_operational', 'latitude', 'longitude', 'num_points', 'max_power_kw', 'borough_density_km2', 'uk_avg_util_pct', 'uk_avg_energy_kWh', 'stations_per_borough', 'operational_flag', 'avg_power_per_connector', 'has_fast_charger', 'cluster']


In [21]:
# 3. TRAIN–TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [23]:
# Optional Scaling (XGBoost does NOT require scaling)
# But scaling helps to keep all models consistent.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [25]:
# 4. BUILD XGBOOST MODEL

model_xgb = XGBRegressor(
    n_estimators=500,           # number of trees
    learning_rate=0.05,        # step size
    max_depth=6,               # tree depth
    subsample=0.8,             # % of rows used per tree
    colsample_bytree=0.8,      # % of columns used per tree
    objective='reg:squarederror',
    random_state=42,
    n_jobs=-1
)

In [29]:
# 5. TRAIN MODEL
model_xgb.fit(X_train_scaled, y_train)
print("\nModel training completed!")
# 6. PREDICTIONS
y_pred = model_xgb.predict(X_test_scaled)


Model training completed!


In [31]:
# 7. METRICS
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\n XGBoost RESULTS")
print("Test MSE :", mse)
print("Test RMSE:", rmse)
print("Test MAE :", mae)
print("Test R²  :", r2)



 XGBoost RESULTS
Test MSE : 7.382024098824905e-07
Test RMSE: 0.0008591870633817123
Test MAE : 0.00016928227837775394
Test R²  : 0.999257122704489


In [33]:
# 8. SAMPLE PREDICTIONS
print("\nSample Predictions:")
print(y_pred[:10])


Sample Predictions:
[0.12812704 0.12812704 0.1294331  0.12816507 0.1294811  0.12923764
 0.12813191 0.12817077 0.17402242 0.12812306]


In [35]:
# 9. SAVE MODEL
model_xgb.save_model("xgboost_priority_model.json")
print("\nXGBoost model saved as xgboost_priority_model.json")


XGBoost model saved as xgboost_priority_model.json
