In [None]:
# Libraries used
import random
import numpy as np
import pandas as pd
import torch

def set_seeds(seed: int = 48):
    """Seed Python, NumPy, and PyTorch for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Set a default seed for the session
set_seeds(48)

In [None]:
df1 = pd.read_csv('Dataset 1_EV charging reports.csv',sep=';')
df1.head()
df1.info()
#df1.nunique()

In [None]:
df2 = pd.read_csv('Dataset 6_Local traffic distribution.csv',sep=';')
df2.head()
df2.info()

In [None]:
# Convert datetime columns to proper datetime format
df1['Start_plugin'] = pd.to_datetime(df1['Start_plugin'], format='%d.%m.%Y %H:%M')
df2['Date_from'] = pd.to_datetime(df2['Date_from'], format='%d.%m.%Y %H:%M')

# Sort both dataframes by their datetime columns (required for merge_asof)
df1_sorted = df1.sort_values('Start_plugin')
df2_sorted = df2.sort_values('Date_from')

# Perform merge_asof using the datetime columns with a 1-hour tolerance to avoid distant matches
tolerance = pd.Timedelta('1H')
ev_charging_traffic = pd.merge_asof(
    df1_sorted,
    df2_sorted,
    left_on='Start_plugin',
    right_on='Date_from',
    direction='nearest',
    tolerance=tolerance
)

ev_charging_traffic.head()
# ev_charging_traffic.shape

What merge_asof does?
    Instead of matching exact values (like merge), it:
        Matches each row to the nearest timestamp
        Usually the previous one
        Works only on sorted data

In [57]:
#These will be dropped from the dataframe since they are not needed for the analysis
drop_clos = ['session_ID', 'Garage_ID', 'User_ID', 'Shared_ID', 'Plugin_category',
 'Duration_category', 'Start_plugin_hour', 'End_plugout_hour', 'End_plugout', 
 'Start_plugin', 'Date_from' , 'Date_to']

ev_charging_traffic.drop(columns=drop_clos, inplace=True)

# Identify numeric columns that need conversion (exclude categorical columns)
numeric_cols = ['El_kWh', 'Duration_hours', 'KROPPAN BRU', 'MOHOLTLIA', 'SELSBAKK', 
                'MOHOLT RAMPE 2', 'Jonsvannsveien vest for Steinanvegen']

rows_before_cleaning = len(ev_charging_traffic)

# Replace comma with dot for numeric columns that are strings, then convert to float
# Use pd.to_numeric with errors='coerce' to handle any invalid values (like '-') by converting them to NaN
for col in numeric_cols:
    if col in ev_charging_traffic.columns:
        if ev_charging_traffic[col].dtype == 'object':
            # Replace comma with dot, then convert to float (invalid values become NaN)
            ev_charging_traffic[col] = ev_charging_traffic[col].str.replace(',', '.')
            ev_charging_traffic[col] = pd.to_numeric(ev_charging_traffic[col], errors='coerce')
        else:
            # Already numeric, just ensure it's float
            ev_charging_traffic[col] = ev_charging_traffic[col].astype(float)

# Drop rows with missing values introduced by coercion
before_dropna = len(ev_charging_traffic)
ev_charging_traffic.dropna(inplace=True)
rows_after_cleaning = len(ev_charging_traffic)
print(f"Rows before cleaning: {rows_before_cleaning}")
print(f"Rows before dropna: {before_dropna}")
print(f"Rows after cleaning: {rows_after_cleaning} (dropped {before_dropna - rows_after_cleaning})")


Rows before cleaning: 6878
Rows before dropna: 6878
Rows after cleaning: 6833 (dropped 45)


In [58]:
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

categorical_cols = ['User_type', 'month_plugin', 'weekdays_plugin']
encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    ev_charging_traffic[col] = le.fit_transform(ev_charging_traffic[col])
    encoders[col] = le

analysis_features = ev_charging_traffic.drop(columns=['El_kWh'], axis=1).columns

X = ev_charging_traffic[analysis_features]
y = ev_charging_traffic['El_kWh']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale data
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()



In [59]:
import torch
import torch.nn as nn
from torch import optim

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float)
y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float).view(-1,1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float)
y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float).view(-1,1)

model = nn.Sequential(
    nn.Linear(9,56),
    nn.ReLU(), 
    nn.Linear(56, 28),
    nn.ReLU(),
    nn.Linear(28,1)
)

loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.007)

num_epochs = 3000 # number of training iterations
for epoch in range(num_epochs):
    outputs = model(X_train_tensor) # forward pass 
    mse = loss(outputs, y_train_tensor) # calculate the loss 
    mse.backward() # backward pass
    optimizer.step() # update the weights and biases
    optimizer.zero_grad() # reset the gradients to zero

    # keep track of the loss during training
    if (epoch + 1) % 500 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], MSE Loss: {mse.item()}')

Epoch [500/3000], MSE Loss: 0.6058117747306824
Epoch [1000/3000], MSE Loss: 0.576505720615387
Epoch [1500/3000], MSE Loss: 0.5625731348991394
Epoch [2000/3000], MSE Loss: 0.5512992143630981
Epoch [2500/3000], MSE Loss: 0.532048761844635
Epoch [3000/3000], MSE Loss: 0.5218437314033508


In [60]:

torch.manual_seed(48)
class nn_Regretion(nn.Module):
        def __init__(self):
            super(nn_Regretion, self).__init__()
            self.layer1 = nn.Linear(9,56)
            self.layer2 = nn.Linear(56, 28)
            self.layer3 = nn.Linear(28,1)
            self.relu = nn.ReLU()

        def forward(self, x):
            #this defines the forward pass
            x = self.layer1(x)
            x = self.relu(x)
            x = self.layer2(x)
            x = self.relu(x)
            x = self.layer3(x)
            return x 
# Optional class-based model (not used for training/evaluation below)
optional_model = nn_Regretion()
optional_model(X_train_tensor)

tensor([[-0.0144],
        [ 0.0818],
        [ 0.1194],
        ...,
        [ 0.1135],
        [ 0.0553],
        [ 0.0845]], grad_fn=<AddmmBackward0>)

This is an optional class for appling the neural network class. the code above is simpler however the class can offer reusability if needed. For the purpose of this project, the class is not used.

In [61]:
model.eval()  # set the model to evaluation mode
with torch.no_grad():  # turns off gradient calculations (which we donâ€™t need outside training)
    # Predict on the same scale as training
    predictions_scaled = model(X_test_tensor)
    test_MSE_pytorch = loss(predictions_scaled, y_test_tensor)

# Convert to numpy and back to original kWh scale for interpretability
predictions_np = predictions_scaled.detach().numpy()
y_test_np = y_test_tensor.detach().numpy()
predictions_original = scaler_y.inverse_transform(predictions_np)
y_test_original = scaler_y.inverse_transform(y_test_np)

# Flatten for metric calculations
y_test_original_flat = y_test_original.ravel()
predictions_original_flat = predictions_original.ravel()

# Baseline predictor: mean of training target
baseline_pred = np.full_like(y_test_original_flat, fill_value=y_train.mean())

# Metrics
mse_scaled = test_MSE_pytorch.item()
mse_original = mean_squared_error(y_test_original_flat, predictions_original_flat)
mae_original = mean_absolute_error(y_test_original_flat, predictions_original_flat)
r2_original = r2_score(y_test_original_flat, predictions_original_flat)

baseline_mse = mean_squared_error(y_test_original_flat, baseline_pred)
baseline_mae = mean_absolute_error(y_test_original_flat, baseline_pred)
baseline_r2 = r2_score(y_test_original_flat, baseline_pred)

print('Neural Network - Test Set MSE (scaled):', mse_scaled)
print('Neural Network - Test Set MSE (kWh):', float(mse_original))
print('Neural Network - Test Set MAE (kWh):', float(mae_original))
print('Neural Network - Test Set R^2:', float(r2_original))
print('Baseline (mean) - MSE (kWh):', float(baseline_mse))
print('Baseline (mean) - MAE (kWh):', float(baseline_mae))
print('Baseline (mean) - R^2:', float(baseline_r2))

Neural Network - Test Set MSE (scaled): 0.8194160461425781
Neural Network - Test Set MSE (kWh): 115.8997802734375
Neural Network - Test Set MAE (kWh): 7.198432445526123
Neural Network - Test Set R^2: 0.10924720764160156
Baseline (mean) - MSE (kWh): 130.11444091796875
Baseline (mean) - MAE (kWh): 8.006174087524414
Baseline (mean) - R^2: -1.1920928955078125e-07


In [62]:
# Save only the trained weights for portability
torch.save(model.state_dict(), 'model_state_dict.pth')