In [1]:
# imported libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from sklearn.metrics import r2_score
import joblib
import json

In [2]:
df = pd.read_csv("uk_stations_clustered.csv")

print("Dataset loaded:", df.shape)
df.head()

Dataset loaded: (10000, 23)


Unnamed: 0,operator,status,is_operational,latitude,longitude,num_points,connector_types,max_power_kw,borough,borough_density_km2,...,stations_per_borough,operational_flag,usage_Private - Restricted Access,usage_Privately Owned - Notice Required,usage_Public,usage_Public - Membership Required,usage_Public - Pay At Location,avg_power_per_connector,has_fast_charger,cluster
0,Ev Dot,Not Operational,False,51.507351,-0.127758,1.0,Type 2 (Socket Only); Type 2 (Socket Only),7.0,Outer London,9318.948649,...,7950,0,False,False,False,False,True,7.0,0,0
1,Independent Operator,Operational,True,51.507291,-0.128896,1.0,0,4.8,Westminster,13608.4,...,476,1,False,False,True,False,False,4.8,0,2
2,Bp Pulse (Uk),Operational,True,51.507099,-0.130117,8.0,BS1363 3 Pin 13 Amp; Type 2 (Socket Only),7.0,Westminster,13608.4,...,476,1,False,False,False,True,False,7.0,0,2
3,Virta,Operational,True,51.508903,-0.125534,1.0,Type 2 (Socket Only),7.0,Westminster,9318.948649,...,476,1,False,False,False,False,False,7.0,0,1
4,Bp Pulse (Uk),Operational,True,51.509162,-0.13065,4.0,Type 2 (Socket Only),4.0,Westminster,13608.4,...,476,1,False,False,False,True,False,4.0,0,2


In [13]:
target = "priority_score"

features = [
    "is_operational",
    "latitude",
    "longitude",
    "num_points",
    "max_power_kw",
    "borough_density_km2",
    "uk_avg_util_pct",
    "uk_avg_energy_kWh",
    "stations_per_borough",
    "operational_flag",
    "avg_power_per_connector",
    "has_fast_charger",
    "cluster"  
]

X = df[features]
y = df[target]
print("Using features:", features)

Using features: ['is_operational', 'latitude', 'longitude', 'num_points', 'max_power_kw', 'borough_density_km2', 'uk_avg_util_pct', 'uk_avg_energy_kWh', 'stations_per_borough', 'operational_flag', 'avg_power_per_connector', 'has_fast_charger', 'cluster']


In [15]:
# 3. TRAIN-TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [17]:
# 4. SCALE FEATURES
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save the scaler
joblib.dump(scaler, "priority_scaler.pkl")
print("Scaler saved as priority_scaler.pkl")

# Save feature names
with open("dnn_features.json", "w") as f:
    json.dump(features, f)
print("Feature list saved as dnn_features.json")

Scaler saved as priority_scaler.pkl
Feature list saved as dnn_features.json


In [19]:
# 5. BUILD DNN MODEL

model = Sequential([
    Dense(128, input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),
    LeakyReLU(alpha=0.1),
    Dropout(0.3),

    Dense(64),
    BatchNormalization(),
    LeakyReLU(alpha=0.1),
    Dropout(0.3),

    Dense(32),
    BatchNormalization(),
    LeakyReLU(alpha=0.1),

    Dense(16),
    BatchNormalization(),
    LeakyReLU(alpha=0.1),

    Dense(1, activation='linear')
])

model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [21]:
# 6. TRAIN MODEL
history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    verbose=1
)

Epoch 1/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.2055 - mae: 0.3040 - val_loss: 0.0071 - val_mae: 0.0473
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 893us/step - loss: 0.0129 - mae: 0.0817 - val_loss: 0.0017 - val_mae: 0.0256
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 876us/step - loss: 0.0061 - mae: 0.0555 - val_loss: 0.0015 - val_mae: 0.0226
Epoch 4/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 872us/step - loss: 0.0043 - mae: 0.0487 - val_loss: 4.5757e-04 - val_mae: 0.0145
Epoch 5/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 868us/step - loss: 0.0030 - mae: 0.0409 - val_loss: 4.3724e-04 - val_mae: 0.0137
Epoch 6/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 867us/step - loss: 0.0024 - mae: 0.0361 - val_loss: 4.3927e-04 - val_mae: 0.0122
Epoch 7/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [23]:
# 7. EVALUATE MODEL
loss, mae = model.evaluate(X_test_scaled, y_test)

print("\nTest MSE:", loss)
print("Test MAE:", mae)

# R-SQUARE
y_pred = model.predict(X_test_scaled).flatten()
r2 = r2_score(y_test, y_pred)

print("R² Score:", r2)

# RMSE
rmse = np.sqrt(loss)
print("RMSE:", rmse)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 539us/step - loss: 9.2184e-05 - mae: 0.0052

Test MSE: 9.096550638787448e-05
Test MAE: 0.005014899652451277
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 936us/step
R² Score: 0.9084584137166204
RMSE: 0.009537583886282442


In [25]:
# 8. SAMPLE PREDICTIONS
print("\nSample Predictions:")
print(y_pred[:10])


Sample Predictions:
[0.12795599 0.12781554 0.13897064 0.12717122 0.12714753 0.12795481
 0.13665207 0.1383043  0.18334466 0.13724932]


In [27]:
# 9. SAVE MODEL
model.save("dnn_priority_model.h5")
print("Model saved as dnn_priority_model_improved.h5")

metadata = {
    "model_name": "DNN Priority Score Model",
    "version": "1.0",
    "description": "Modified original DNN with LeakyReLU, BatchNorm, wider layers, and scaler saving.",
    "features_used": features
}

with open("model_metadata.json", "w") as f:
    json.dump(metadata, f)

print("Metadata saved as model_metadata.json")



Model saved as dnn_priority_model_improved.h5
Metadata saved as model_metadata.json
