In [4]:
pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl.metadata (17 kB)
Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: lightgbm
Successfully installed lightgbm-4.6.0
Note: you may need to restart the kernel to use updated packages.


In [38]:
# 1. IMPORTS
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import lightgbm as lgb

In [40]:
# 2. LOAD DATA
df = pd.read_csv("uk_stations_clustered.csv")
print("Dataset loaded:", df.shape)

Dataset loaded: (10000, 23)


In [42]:
# Target variable
target = "priority_score"

# Same features as DNN & XGBoost
features = [
    "is_operational",
    "latitude",
    "longitude",
    "num_points",
    "max_power_kw",
    "borough_density_km2",
    "uk_avg_util_pct",
    "uk_avg_energy_kWh",
    "stations_per_borough",
    "operational_flag",
    "avg_power_per_connector",
    "has_fast_charger",
    "cluster"
]

X = df[features]
y = df[target]

print("Using features:", features)

Using features: ['is_operational', 'latitude', 'longitude', 'num_points', 'max_power_kw', 'borough_density_km2', 'uk_avg_util_pct', 'uk_avg_energy_kWh', 'stations_per_borough', 'operational_flag', 'avg_power_per_connector', 'has_fast_charger', 'cluster']


In [44]:
#3. TRAIN-TEST SPLIT (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [46]:
# 4. OPTIONAL SCALING (LightGBM works without scaling)
# But kept for consistency 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [48]:
# 5. BUILD LIGHTGBM MODEL
model_lgb = lgb.LGBMRegressor(
    boosting_type='gbdt',      
    num_leaves=31,             # complexity of tree leaves
    max_depth=-1,              # model decide best depth
    learning_rate=0.05,        
    n_estimators=500,          # number of trees
    subsample=0.8,             # row sampling per tree
    colsample_bytree=0.8,      # feature sampling per tree
    random_state=42
)

In [50]:
#  6. TRAIN THE MODEL
model_lgb.fit(X_train_scaled, y_train)
print("\nLightGBM training completed")

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001332 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 646
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 11
[LightGBM] [Info] Start training from score 0.136597

LightGBM training completed


In [22]:
#7. PREDICT
y_pred = model_lgb.predict(X_test_scaled)



In [52]:
# 8. METRICS
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\n LightGBM results")
print("Test MSE :", mse)
print("Test RMSE:", rmse)
print("Test MAE :", mae)
print("Test R²  :", r2)


 LightGBM results
Test MSE : 4.6551739181653615e-06
Test RMSE: 0.002157585205308324
Test MAE : 0.00037820856839420556
Test R²  : 0.9953153458127958


In [32]:
#9. SAMPLE PREDICTIONS
print("\nSample Predictions:")
print(y_pred[:10])


Sample Predictions:
[0.12812552 0.12812163 0.12898016 0.12816014 0.12950784 0.12921806
 0.12819883 0.12811247 0.1756902  0.12812646]


In [34]:
# 10. SAVE MODEL
model_lgb.booster_.save_model("lightgbm_priority_model.txt")
print("\nLightGBM model saved as lightgbm_priority_model.txt")


LightGBM model saved as lightgbm_priority_model.txt


In [58]:
#final comparison
Model         MSE                     RMSE                      MAE                     R²

DNN           9.096550638787448e-05    0.009537583886282442     0.005014899652451277    0.9084584137166204

XGBoost       7.382024098824905e-07    0.0008591870633817123    0.00016928227837775394   0.999257122704489

LightGBM      4.6551739181653615e-06    0.002157585205308324     0.00037820856839420556   0.9953153458127958