In [1]:
import sys
print(sys.executable)

C:\Users\Johanna\anaconda3\envs\AI_project\python.exe


In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score
from sklearn.model_selection import GridSearchCV, train_test_split, RandomizedSearchCV
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from tensorflow.keras.layers import  Dropout
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.optimizers import Adam
from keras.models import load_model
from tensorflow.keras.layers import Input
import joblib
import matplotlib.pyplot as plt
from datetime import time


In [5]:
#read the necessary dataframes

#df_lagged_all = pd.read_parquet("data/processed/df_lagged_30_all.parquet")
df_lagged_all = pd.read_parquet("data/processed/df_lagged_all_v2.parquet")
df_2_full = pd.read_parquet("data/processed/df_2_full_v2.parquet")

df_all=pd.read_parquet("data/processed/df_all_v2.parquet")
df_all_peak= df_all.set_index("Datetime").between_time("07:15", "08:30").reset_index()



Variables

In [6]:
##define target sensor, sensor in same and neighbouring portal
target_sensor = 1076
same_portal="55620"
neighbour_portal = "56160"

same_portal_sensors = df_2_full[df_2_full['PORTAL_clean'] == same_portal]['DP_ID'].unique()
same_sensors = [s for s in same_portal_sensors if s != target_sensor]


neighbour_sensors = df_2_full[df_2_full['PORTAL_clean'] == neighbour_portal]['DP_ID'].unique()
bothportals_sensors = df_2_full[df_2_full['PORTAL_clean'].isin([neighbour_portal, same_portal])]['DP_ID'].unique()
except_target_sensors = [s for s in bothportals_sensors if s != target_sensor]




In [7]:
nlags=15
#nlags=30

In [9]:
#group features into speed vs flow and same portal vs neighbour portal (not lagged)
flow_features_same_nl = [
    f'SENSOR_{sensor}_FLOW'
    for sensor in same_sensors

]
flow_features_neighbour_nl = [
    f'SENSOR_{sensor}_FLOW'
    for sensor in neighbour_sensors

]
speed_features_same_nl = [
    f'SENSOR_{sensor}_SPEED'
    for sensor in same_sensors
]
speed_features_neighbour_nl = [
    f'SENSOR_{sensor}_SPEED'
    for sensor in neighbour_sensors
]

In [10]:
#group features into speed vs flow and same portal vs neighbour portal (lagged)
flow_features_same = [
    f'SENSOR_{sensor}_FLOW_lag_{i+1}'
    for sensor in same_sensors
    for i in range(nlags)
]
flow_features_neighbour = [
    f'SENSOR_{sensor}_FLOW_lag_{i+1}'
    for sensor in neighbour_sensors
    for i in range(nlags)
]
speed_features_same = [
    f'SENSOR_{sensor}_SPEED_lag_{i+1}'
    for sensor in same_sensors
    for i in range(nlags)
]
speed_features_neighbour = [
    f'SENSOR_{sensor}_SPEED_lag_{i+1}'
    for sensor in neighbour_sensors
    for i in range(nlags)
]        

Train -Test Split

In [12]:
#randomly split data for training and test
df_train, df_test = train_test_split(df_lagged_all, test_size=0.2, random_state=42)

Scaling

In [13]:
# FLOW same
scaler_flow_same = StandardScaler()
X_train_scaled_flow_same = scaler_flow_same.fit_transform(df_train[flow_features_same])
X_test_scaled_flow_same  = scaler_flow_same.transform(df_test[flow_features_same])
joblib.dump(scaler_flow_same, "model/scaler_flow_same.pkl")

# SPEED same
scaler_speed_same = StandardScaler()
X_train_scaled_speed_same = scaler_speed_same.fit_transform(df_train[speed_features_same])
X_test_scaled_speed_same  = scaler_speed_same.transform(df_test[speed_features_same])
joblib.dump(scaler_flow_same, "model/scaler_speed_same.pkl")


# FLOW neighbour
scaler_flow_neigh = StandardScaler()
X_train_scaled_flow_neighbour = scaler_flow_neigh.fit_transform(df_train[flow_features_neighbour])
X_test_scaled_flow_neighbour  = scaler_flow_neigh.transform(df_test[flow_features_neighbour])
joblib.dump(scaler_flow_same, "model/scaler_flow_neighbour.pkl")


# SPEED neighbour
scaler_speed_neighbour = StandardScaler()
X_train_scaled_speed_neighbour = scaler_speed_neighbour.fit_transform(df_train[speed_features_neighbour])
X_test_scaled_speed_neighbour  = scaler_speed_neighbour.transform(df_test[speed_features_neighbour])
joblib.dump(scaler_flow_same, "model/scaler_speed_neighbour.pkl")



['model/scaler_speed_neighbour.pkl']

Linear Regression

In [18]:
#prediction from sensors in the same portal

#flow model
model_flow = LinearRegression()
model_flow.fit(df_train[flow_features_same], df_train['FLOW_future_sum'])
#speed model
model_speed = LinearRegression()
model_speed.fit(df_train[speed_features_same], df_train['SPEED_future_mean'])


# prediciton flow
y_pred = model_flow.predict(df_test[flow_features_same])
y_test=df_test['FLOW_future_sum']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Baseline Linear Regression (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

#rediction speed
y_pred = model_speed.predict(df_test[speed_features_same])
y_test=df_test['SPEED_future_mean']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Baseline Linear Regression (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

Baseline Linear Regression (FLOW) -> RMSE: 33.543, MAE: 23.809, R2: 0.836
Baseline Linear Regression (SPEED) -> RMSE: 0.861, MAE: 0.462, R2: 0.709




In [19]:
#prediction from sensors in the neighbour portal
#flow model
model_flow = LinearRegression()
model_flow.fit(df_train[flow_features_neighbour], df_train['FLOW_future_sum'])
#speed model
model_speed = LinearRegression()
model_speed.fit(df_train[speed_features_neighbour], df_train['SPEED_future_mean'])


#prediction flow
y_pred = model_flow.predict(df_test[flow_features_neighbour])
y_test=df_test['FLOW_future_sum']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Baseline Linear Regression (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

#prediction speed
y_pred = model_speed.predict(df_test[speed_features_neighbour])
y_test=df_test['SPEED_future_mean']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Baseline Linear Regression (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

Baseline Linear Regression (FLOW) -> RMSE: 28.474, MAE: 19.559, R2: 0.882
Baseline Linear Regression (SPEED) -> RMSE: 1.051, MAE: 0.513, R2: 0.566




XGBoost

In [20]:
#prediction from sensors in the same portal

#flow model
model_flow = XGBRegressor(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model_flow.fit(df_train[flow_features_same], df_train['FLOW_future_sum'])
#speed model
model_speed = XGBRegressor(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model_speed.fit(df_train[speed_features_same], df_train['SPEED_future_mean'])



#prediction flow
y_pred = model_flow.predict(df_test[flow_features_same])
y_test = df_test['FLOW_future_sum']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"XGBoost Regression (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

#prediction speed
y_pred = model_speed.predict(df_test[speed_features_same])
y_test = df_test['SPEED_future_mean']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"XGBoost Regression (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

XGBoost Regression (FLOW) -> RMSE: 28.647, MAE: 20.634, R²: 0.880
XGBoost Regression (SPEED) -> RMSE: 0.810, MAE: 0.401, R²: 0.742




In [21]:
#prediction from sensors in the neighbour portal

#flow model
model_flow = XGBRegressor(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model_flow.fit(df_train[flow_features_neighbour], df_train['FLOW_future_sum'])
#speed model
model_speed = XGBRegressor(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model_speed.fit(df_train[speed_features_neighbour], df_train['SPEED_future_mean'])



#prediction flow
y_pred = model_flow.predict(df_test[flow_features_neighbour])
y_test = df_test['FLOW_future_sum']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"XGBoost Regression (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

#prediction speed
y_pred = model_speed.predict(df_test[speed_features_neighbour])
y_test = df_test['SPEED_future_mean']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"XGBoost Regression (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

XGBoost Regression (FLOW) -> RMSE: 24.460, MAE: 17.192, R²: 0.913
XGBoost Regression (SPEED) -> RMSE: 0.936, MAE: 0.424, R²: 0.655




xgboost with hyperparameter tuning

In [220]:
#parameters that should be tested
param_grid = {
    "n_estimators": [100, 200, 300],
    "max_depth": [3, 5, 7],
    "learning_rate": [0.001,0.005,0.01, 0.05],
    "subsample": [0.6, 0.8, 1.0],
    "colsample_bytree": [0.6, 0.8, 1.0]
}

In [251]:
#empty dataframe for results
results_df = pd.DataFrame(columns=["Type", "MAE", "RMSE", "R²"])

In [253]:
#function for adding results
def add_result(results_df, type, y_true, y_pred):
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    r2   = r2_score(y_true, y_pred)

    new_row = {
        "Type": type,
        "MAE": mae,
        "RMSE": rmse,
        "R²": r2
    }

    return pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)

In [257]:
#prediction from sensors in the same portal
#random search flow
random_search_flow = RandomizedSearchCV(
    estimator=XGBRegressor(random_state=42),
    param_distributions=param_grid,
    n_iter=30, 
    scoring="neg_root_mean_squared_error",
    cv=3,
    verbose=2,
    n_jobs=-2
)

random_search_flow.fit(df_train[flow_features_same], df_train["FLOW_future_sum"])
#extract best parameters
print("Beste Parameter (FLOW):", random_search_flow.best_params_)
model_flow = random_search_flow.best_estimator_
#save best model
model_flow.save_model("model/xgb_flow_same.json")
#save features (because in evaluation they have to be put in in same order)
features_used = df_train[flow_features_same].columns.tolist()
joblib.dump(features_used, "model/features_flow_same.pkl")
#predection flow
y_pred = model_flow.predict(df_test[flow_features_same])
y_test = df_test['FLOW_future_sum']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
#add results to dataframe
results_df = add_result(results_df, "FLOW -same portal", y_test, y_pred)
print(f"XGBoost Regression (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

#random search speed
random_search_speed = RandomizedSearchCV(
    estimator=XGBRegressor(random_state=42),
    param_distributions=param_grid,
    n_iter=30,  
    scoring="neg_root_mean_squared_error",
    cv=3,
    verbose=2,
    n_jobs=-2
)

random_search_speed.fit(df_train[speed_features_same], df_train["SPEED_future_mean"])
#extract best parameters
print("Beste Parameter (SPEED):", random_search_speed.best_params_)
#save best model
model_speed = random_search_speed.best_estimator_
model_speed.save_model("model/xgb_speed_same.json")
#save features (to reuse them in same order on evaluation set)
features_used = df_train[speed_features_same].columns.tolist()
joblib.dump(features_used, "model/features_speed_same.pkl")

#prediction speed
y_pred = model_speed.predict(df_test[speed_features_same])
y_test = df_test['SPEED_future_mean']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
#add results to table
results_df = add_result(results_df, "SPEED -same portal", y_test, y_pred)


print(f"XGBoost Regression (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

Fitting 3 folds for each of 30 candidates, totalling 90 fits
Beste Parameter (FLOW): {'subsample': 1.0, 'n_estimators': 200, 'max_depth': 7, 'learning_rate': 0.05, 'colsample_bytree': 0.8}
XGBoost Regression (FLOW) -> RMSE: 28.311, MAE: 20.292, R²: 0.883
Fitting 3 folds for each of 30 candidates, totalling 90 fits


  return pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)


Beste Parameter (FLOW): {'subsample': 0.6, 'n_estimators': 200, 'max_depth': 5, 'learning_rate': 0.05, 'colsample_bytree': 0.6}
XGBoost Regression (SPEED) -> RMSE: 0.812, MAE: 0.401, R²: 0.740




In [258]:
#prediction from sensors in the neighbour portal
#flow random search
random_search_flow = RandomizedSearchCV(
    estimator=XGBRegressor(random_state=42),
    param_distributions=param_grid,
    n_iter=30,  
    scoring="neg_root_mean_squared_error",
    cv=3,
    verbose=2,
    n_jobs=-2
)

random_search_flow.fit(df_train[flow_features_neighbour], df_train["FLOW_future_sum"])
#save best model
print("Beste Parameter (FLOW):", random_search_flow.best_params_)
model_flow = random_search_flow.best_estimator_
model_flow.save_model("model/xgb_flow_neighbour.json")
#save features (to be able to reuse in same order on evaluation set)
features_used = df_train[flow_features_neighbour].columns.tolist()
joblib.dump(features_used, "model/features_flow_neighbour.pkl")

#prediction flwo
y_pred = model_flow.predict(df_test[flow_features_neighbour])
y_test = df_test['FLOW_future_sum']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
#add results to dataframe
results_df = add_result(results_df, "FLOW -neighbour portal", y_test, y_pred)


print(f"XGBoost Regression (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

#speed random search
random_search_speed = RandomizedSearchCV(
    estimator=XGBRegressor(random_state=42),
    param_distributions=param_grid,
    n_iter=30, 
    scoring="neg_root_mean_squared_error",
    cv=3,
    verbose=2,
    n_jobs=-2
)


random_search_speed.fit(df_train[speed_features_neighbour], df_train["SPEED_future_mean"])
print("Beste Parameter (FLOW):", random_search_speed.best_params_)
#save best model
model_speed = random_search_speed.best_estimator_
model_speed.save_model("model/xgb_speed_neighbour.json")
#save features order
features_used = df_train[speed_features_neighbour].columns.tolist()
joblib.dump(features_used, "model/features_speed_neighbour.pkl")

#prediction speed
y_pred = model_speed.predict(df_test[speed_features_neighbour])
y_test = df_test['SPEED_future_mean']
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
#add to dataframe
results_df = add_result(results_df, "SPEED -neighbour portal", y_test, y_pred)


print(f"XGBoost Regression (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

Fitting 3 folds for each of 30 candidates, totalling 90 fits
Beste Parameter (FLOW): {'subsample': 0.8, 'n_estimators': 200, 'max_depth': 7, 'learning_rate': 0.05, 'colsample_bytree': 0.6}
XGBoost Regression (FLOW) -> RMSE: 24.096, MAE: 16.871, R²: 0.915
Fitting 3 folds for each of 30 candidates, totalling 90 fits




Beste Parameter (FLOW): {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.05, 'colsample_bytree': 0.6}
XGBoost Regression (SPEED) -> RMSE: 0.930, MAE: 0.420, R²: 0.660




In [260]:
results_df.sort_values("Type")

Unnamed: 0,Type,MAE,RMSE,R²
2,FLOW -neighbour portal,16.871492,24.09554,0.915146
0,FLOW -same portal,20.29155,28.310582,0.882863
3,SPEED -neighbour portal,0.4198,0.930045,0.659704
1,SPEED -same portal,0.401327,0.812237,0.740454


Feedforward Neural Network (NN)

In [278]:
#prediction from sensors in the same portal
#flow mdoel
model_flow= Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled_flow_same.shape[1],)),  
    Dense(32, activation='relu'),                                           
    Dense(16, activation='relu'),                                         
    Dense(1)                                                              
])
#compile model with mse as loss and mae as metrics
model_flow.compile(optimizer='adam', loss='mse', metrics=['mae'])

#train model on training data with 20% for validation
history_flow = model_flow.fit(
    X_train_scaled_flow_same, df_train["FLOW_future_sum"],
    validation_split=0.2,  
    epochs=50,
    batch_size=32,
    verbose=0
)


#prediction flow
y_pred = model_flow.predict(X_test_scaled_flow_same)
y_test=df_test["FLOW_future_sum"]
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"Fast forward NN (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

#speed model
model_speed = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled_speed_same.shape[1],)),  
    Dense(32, activation='relu'),                                          
    Dense(16, activation='relu'),                                           
    Dense(1)                                                                
])
#compile model with mse as loss and mae as metrics
model_speed.compile(optimizer='adam', loss='mse', metrics=['mae'])

#train model on training data with 20% validation
history_speed = model_speed.fit(
    X_train_scaled_speed_same, df_train["SPEED_future_mean"],
    validation_split=0.2,  
    epochs=50,
    batch_size=32,
    verbose=0
)


y_pred = model_speed.predict(X_test_scaled_speed_same)
y_test=df_test["SPEED_future_mean"]
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"Fast forward NN (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")



[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Fast forward NN (FLOW) -> RMSE: 29.248, MAE: 20.778, R²: 0.875
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Fast forward NN (SPEED) -> RMSE: 0.819, MAE: 0.433, R²: 0.736




In [279]:
#prediction from sensors in the neighbour portal
#flow model
model_flow= Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled_flow_neighbour.shape[1],)),  
    Dense(32, activation='relu'),                                           
    Dense(16, activation='relu'),                                           
    Dense(1)                                                                 
])
#compile model with mse as loss and mea as metrics
model_flow.compile(optimizer='adam', loss='mse', metrics=['mae'])

#train model with 20% for validation
history_flow = model_flow.fit(
    X_train_scaled_flow_neighbour, df_train["FLOW_future_sum"],
    validation_split=0.2,  
    epochs=50,
    batch_size=32,
    verbose=0
)


#prediction flow
y_pred = model_flow.predict(X_test_scaled_flow_neighbour)
y_test=df_test["FLOW_future_sum"]
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"Fast forward NN (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")


#model speed
model_speed = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled_speed_neighbour.shape[1],)), 
    Dense(32, activation='relu'),                          
    Dense(16, activation='relu'),         
    Dense(1)                                                   
])
#compile using adam optimiser
model_speed.compile(optimizer='adam', loss='mse', metrics=['mae'])

#train model using 20% for validation
history_speed = model_speed.fit(
    X_train_scaled_speed_neighbour, df_train["SPEED_future_mean"],
    validation_split=0.2,  
    epochs=50,
    batch_size=32,
    verbose=0
)

#prediction speed
y_pred = model_speed.predict(X_test_scaled_speed_neighbour)
y_test=df_test["SPEED_future_mean"]
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"Fast forward NN (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Fast forward NN (FLOW) -> RMSE: 24.794, MAE: 17.303, R²: 0.910


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Fast forward NN (SPEED) -> RMSE: 0.969, MAE: 0.458, R²: 0.631




NN with GridSearch

In [87]:
#prediction from sensors in the same portal -flow

y_train = df_train["FLOW_future_sum"]
y_test = df_test["FLOW_future_sum"]

#different parameters that will be tried during gridsearch
number_neurons = [16, 32, 64, 128]
number_layers = [2, 3,4]
dropout_rates = [0.0, 0.05, 0.1]
#dataframe for results
grid_search_df = pd.DataFrame(columns=["number_neurons","number_layers","dropout_rate","train_rmse", "val_rmse"])
#self-made gridsearch
#loop over number neurons
for neurons in number_neurons:
    #loop over number of layers
    for num_layer in number_layers:
        #loop over drop out rate
        for dropout_rate in dropout_rates:
            #built model
            model = Sequential()
            #loop over number of layers
            for _ in range(num_layer):
                #construct model
                model.add(Dense(neurons, activation='relu'))
                if dropout_rate > 0:
                    model.add(Dropout(dropout_rate))
            #finalise model
            model.add(Dense(1))
            optimizer = Adam(learning_rate=0.01)
            model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])

            #early stop to avoid overfitting to training data
            early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
            reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
            #train model
            hist = model.fit(
                X_train_scaled_flow_same, y_train,
                validation_split=0.2,
                epochs=100,
                batch_size=32,
                callbacks=[early_stop, reduce_lr],
                verbose=0
            )
            #calculate metrics and add to table
            min_err = min(hist.history['rmse'])
            min_val_err = min(hist.history['val_rmse'])
            this_df = pd.DataFrame({
                "number_neurons": [neurons],
                "number_layers": [num_layer],
                "dropout_rate":[dropout_rate],
                "train_rmse": [min_err],
                "val_rmse": [min_val_err]
            })
            grid_search_df = pd.concat([grid_search_df, this_df], axis=0)
            print(neurons, num_layer,dropout_rate, "done")

#print all combinations
grid_search_df.reset_index(drop=True, inplace=True)
print(grid_search_df)
#find best set of parameters
best_config = grid_search_df.loc[grid_search_df['val_rmse'].idxmin()]
best_neurons = int(best_config['number_neurons'])
best_layers = int(best_config['number_layers'])
best_dropout = int(best_config['dropout_rate'])

# retrain model with best parameters
best_model = Sequential()
for _ in range(best_layers):
    best_model.add(Dense(best_neurons, activation='relu'))
    if best_dropout > 0:
        best_model.add(Dropout(best_dropout))
best_model.add(Dense(1))

optimizer = Adam(learning_rate=0.01)
best_model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])

early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
checkpoint = ModelCheckpoint("model/NN_model_flow_same_best.keras", monitor='val_rmse', save_best_only=True, mode='min')


best_model.fit(
    X_train_scaled_flow_same, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop, reduce_lr, checkpoint],
    verbose=0
)

#prediction with best model
y_pred = best_model.predict(X_test_scaled_flow_same)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"Best FLOW NN -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")






  grid_search_df = pd.concat([grid_search_df, it_df], axis=0)


16 2 0.0 done
16 2 0.05 done
16 2 0.1 done
16 3 0.0 done
16 3 0.05 done
16 3 0.1 done
16 4 0.0 done
16 4 0.05 done
16 4 0.1 done
32 2 0.0 done
32 2 0.05 done
32 2 0.1 done
32 3 0.0 done
32 3 0.05 done
32 3 0.1 done
32 4 0.0 done
32 4 0.05 done
32 4 0.1 done
64 2 0.0 done
64 2 0.05 done
64 2 0.1 done
64 3 0.0 done
64 3 0.05 done
64 3 0.1 done
64 4 0.0 done
64 4 0.05 done
64 4 0.1 done
128 2 0.0 done
128 2 0.05 done
128 2 0.1 done
128 3 0.0 done
128 3 0.05 done
128 3 0.1 done
128 4 0.0 done
128 4 0.05 done
128 4 0.1 done
   number_neurons number_layers  dropout_rate  train_rmse   val_rmse
0              16             2          0.00   29.371403  29.733335
1              16             2          0.05   37.075043  33.153324
2              16             2          0.10   42.041279  33.635246
3              16             3          0.00   27.912714  28.976200
4              16             3          0.05   39.897415  33.503639
5              16             3          0.10   43.846443  34



In [89]:
#prediction from sensors in the same portal -speed
y_train = df_train["SPEED_future_mean"]
y_test = df_test["SPEED_future_mean"]

#different parameters that will be tried during gridsearch
number_neurons = [16, 32, 64, 128]
number_layers = [2, 3,4]
dropout_rates = [0.0, 0.05, 0.1]
#dataframe for gridsearch results
grid_search_df = pd.DataFrame(columns=["number_neurons","number_layers","dropout_rate","train_rmse", "val_rmse"])
#manual gridsearch
#loop over number of neurons
for neurons in number_neurons:
    #loop over number of layers
    for num_layer in number_layers:
        #loop over different dropoutrates
        for dropout_rate in dropout_rates:
            #start building model
            model = Sequential()
            #loop over the number of layers
            for _ in range(num_layer):
                #continue building model
                model.add(Dense(neurons, activation='relu'))
                if dropout_rate > 0:
                    model.add(Dropout(dropout_rate))
            #finalise model
            model.add(Dense(1))
            optimizer = Adam(learning_rate=0.01)
            model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
            #early stop to prevent overfitting
            early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
            reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
            #training of model with 20% validation
            hist = model.fit(
                X_train_scaled_speed_same, y_train,
                validation_split=0.2,
                epochs=100,
                batch_size=32,
                callbacks=[early_stop, reduce_lr],
                verbose=0
            )
            #extract metrics and add to dataframe
            min_err = min(hist.history['rmse'])
            min_val_err = min(hist.history['val_rmse'])
            this_df = pd.DataFrame({
                "number_neurons": [neurons],
                "number_layers": [num_layer],
                "dropout_rate":[dropout_rate],
                "train_rmse": [min_err],
                "val_rmse": [min_val_err]
            })
            grid_search_df = pd.concat([grid_search_df, this_df], axis=0)
            print(neurons, num_layer,dropout_rate, "done")

#print dtaframe with all results
grid_search_df.reset_index(drop=True, inplace=True)
print(grid_search_df)
#extract best parameters
best_config = grid_search_df.loc[grid_search_df['val_rmse'].idxmin()]
best_neurons = int(best_config['number_neurons'])
best_layers = int(best_config['number_layers'])
best_dropout = int(best_config['dropout_rate'])

#retrain model with best parameters
best_model = Sequential()
for _ in range(best_layers):
    best_model.add(Dense(best_neurons, activation='relu'))
    if best_dropout > 0:
        best_model.add(Dropout(best_dropout))
best_model.add(Dense(1))

optimizer = Adam(learning_rate=0.01)
best_model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])

early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
checkpoint = ModelCheckpoint("model/NN_model_speed_same_best.keras", monitor='val_rmse', save_best_only=True, mode='min')


best_model.fit(
    X_train_scaled_speed_same, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop, reduce_lr, checkpoint],
    verbose=0
)

# prediction speed with best model
y_pred = best_model.predict(X_test_scaled_speed_same)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"Best SPEED NN -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")






  grid_search_df = pd.concat([grid_search_df, it_df], axis=0)


16 2 0.0 done
16 2 0.05 done
16 2 0.1 done
16 3 0.0 done
16 3 0.05 done
16 3 0.1 done
16 4 0.0 done
16 4 0.05 done
16 4 0.1 done
32 2 0.0 done
32 2 0.05 done
32 2 0.1 done
32 3 0.0 done
32 3 0.05 done
32 3 0.1 done
32 4 0.0 done
32 4 0.05 done
32 4 0.1 done
64 2 0.0 done
64 2 0.05 done
64 2 0.1 done
64 3 0.0 done
64 3 0.05 done
64 3 0.1 done
64 4 0.0 done
64 4 0.05 done
64 4 0.1 done
128 2 0.0 done
128 2 0.05 done
128 2 0.1 done
128 3 0.0 done
128 3 0.05 done
128 3 0.1 done
128 4 0.0 done
128 4 0.05 done
128 4 0.1 done
   number_neurons number_layers  dropout_rate  train_rmse  val_rmse
0              16             2          0.00    0.805148  0.790606
1              16             2          0.05    0.973746  0.811842
2              16             2          0.10    0.822249  0.793180
3              16             3          0.00    0.733872  0.737610
4              16             3          0.05    0.812134  0.754808
5              16             3          0.10    0.878491  0.784833



In [91]:
#prediction from sensors in the neighbour portal -flow
y_train = df_train["FLOW_future_sum"]
y_test = df_test["FLOW_future_sum"]
#parameters that are addapted in gridsearch
number_neurons = [16, 32, 64, 128]
number_layers = [2, 3,4]
dropout_rates = [0.0, 0.05, 0.1]
#prepare dataframe for gridsearch
grid_search_df = pd.DataFrame(columns=["number_neurons","number_layers","dropout_rate","train_rmse", "val_rmse"])
#manual gridsearch
#loop over the number of neurons
for neurons in number_neurons:
    #loop over number of layers
    for num_layer in number_layers:
        #loop over drop out rates
        for dropout_rate in dropout_rates:
            #start building model
            model = Sequential()
            #add the number of layers defined
            for _ in range(num_layer):
                model.add(Dense(neurons, activation='relu'))
                if dropout_rate > 0:
                    model.add(Dropout(dropout_rate))
            #finalise model construction
            model.add(Dense(1))
            optimizer = Adam(learning_rate=0.01)
            model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
            #add early stop to prevent overfitting
            early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
            reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
            #fit model
            hist = model.fit(
                X_train_scaled_flow_neighbour, y_train,
                validation_split=0.2,
                epochs=100,
                batch_size=32,
                callbacks=[early_stop, reduce_lr],
                verbose=0
            )
            #extract metrics and put in dataframe
            min_err = min(hist.history['rmse'])
            min_val_err = min(hist.history['val_rmse'])
            this_df = pd.DataFrame({
                "number_neurons": [neurons],
                "number_layers": [num_layer],
                "dropout_rate":[dropout_rate],
                "train_rmse": [min_err],
                "val_rmse": [min_val_err]
            })
            grid_search_df = pd.concat([grid_search_df, this_df], axis=0)
            print(neurons, num_layer,dropout_rate, "done")

#print whole dataframe
grid_search_df.reset_index(drop=True, inplace=True)
print(grid_search_df)
best_config = grid_search_df.loc[grid_search_df['val_rmse'].idxmin()]
best_neurons = int(best_config['number_neurons'])
best_layers = int(best_config['number_layers'])
best_dropout = int(best_config['dropout_rate'])
print(best_neurons, best_layers, best_dropout)

# retrain model with best parameters
best_model = Sequential()
for _ in range(best_layers):
    best_model.add(Dense(best_neurons, activation='relu'))
    if best_dropout > 0:
        best_model.add(Dropout(best_dropout))
best_model.add(Dense(1))

optimizer = Adam(learning_rate=0.01)
best_model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])

early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
checkpoint = ModelCheckpoint("model/NN_model_flow_neighbour_best.keras", monitor='val_rmse', save_best_only=True, mode='min')


best_model.fit(
    X_train_scaled_flow_neighbour, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop, reduce_lr, checkpoint],
    verbose=0
)

#prediction on testset
y_pred = best_model.predict(X_test_scaled_flow_neighbour)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"Best FLOW NN -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")






  grid_search_df = pd.concat([grid_search_df, it_df], axis=0)


16 2 0.0 done
16 2 0.05 done
16 2 0.1 done
16 3 0.0 done
16 3 0.05 done
16 3 0.1 done
16 4 0.0 done
16 4 0.05 done
16 4 0.1 done
32 2 0.0 done
32 2 0.05 done
32 2 0.1 done
32 3 0.0 done
32 3 0.05 done
32 3 0.1 done
32 4 0.0 done
32 4 0.05 done
32 4 0.1 done
64 2 0.0 done
64 2 0.05 done
64 2 0.1 done
64 3 0.0 done
64 3 0.05 done
64 3 0.1 done
64 4 0.0 done
64 4 0.05 done
64 4 0.1 done
128 2 0.0 done
128 2 0.05 done
128 2 0.1 done
128 3 0.0 done
128 3 0.05 done
128 3 0.1 done
128 4 0.0 done
128 4 0.05 done
128 4 0.1 done
   number_neurons number_layers  dropout_rate  train_rmse   val_rmse
0              16             2          0.00   24.768108  25.254890
1              16             2          0.05   29.870770  26.075983
2              16             2          0.10   33.876259  25.873125
3              16             3          0.00   23.947803  24.581074
4              16             3          0.05   32.065853  28.567739
5              16             3          0.10   34.205040  26



In [93]:
#prediction from sensors in the neighbour portal -speed

y_train = df_train["SPEED_future_mean"]
y_test = df_test["SPEED_future_mean"]
#values for gridsearch
number_neurons = [16, 32, 64, 128]
number_layers = [2, 3,4]
dropout_rates = [0.0, 0.05, 0.1]
#prerpare dataframe for gridserach
grid_search_df = pd.DataFrame(columns=["number_neurons","number_layers","dropout_rate","train_rmse", "val_rmse"])
#,manual gridsearch
#manual gridsearch
#loop over number of neurons
for neurons in number_neurons:
    #loop over number of laers
    for num_layer in number_layers:
        #loop over different dropoutrates
        for dropout_rate in dropout_rates:
            #start constructi g model
            model = Sequential()
            #add the number of layers specified
            for _ in range(num_layer):
                model.add(Dense(neurons, activation='relu'))
                if dropout_rate > 0:
                    model.add(Dropout(dropout_rate))
            #finalise model
            model.add(Dense(1))
            optimizer = Adam(learning_rate=0.01)
            model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
            #earily stop to prevent overfittng
            early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
            reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
    
            #train model with 20% validation
            hist = model.fit(
                X_train_scaled_speed_neighbour, y_train,
                validation_split=0.2,
                epochs=100,
                batch_size=32,
                callbacks=[early_stop, reduce_lr],
                verbose=0
            )
    
            # save metrics in dataframe
            min_err = min(hist.history['rmse'])
            min_val_err = min(hist.history['val_rmse'])
            it_df = pd.DataFrame({
                "number_neurons": [neurons],
                "number_layers": [num_layer],
                "dropout_rate":[dropout_rate],
                "train_rmse": [min_err],
                "val_rmse": [min_val_err]
            })
            grid_search_df = pd.concat([grid_search_df, it_df], axis=0)
            print(neurons, num_layer,dropout_rate, "done")

# print whole dataframe
grid_search_df.reset_index(drop=True, inplace=True)
print(grid_search_df)
#extract best set of parameters
best_config = grid_search_df.loc[grid_search_df['val_rmse'].idxmin()]
best_neurons = int(best_config['number_neurons'])
best_layers = int(best_config['number_layers'])
best_dropout = int(best_config['dropout_rate'])
print(best_neurons, best_layers, best_dropout)

# retrain best model
best_model = Sequential()
for _ in range(best_layers):
    best_model.add(Dense(best_neurons, activation='relu'))
    if best_dropout > 0:
        best_model.add(Dropout(best_dropout))
best_model.add(Dense(1))

optimizer = Adam(learning_rate=0.01)
best_model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])

early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
checkpoint = ModelCheckpoint("model/NN_model_speed_neighbour_best.keras", monitor='val_rmse', save_best_only=True, mode='min')

best_model.fit(
    X_train_scaled_speed_neighbour, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop, reduce_lr, checkpoint],
    verbose=0
)

# prediction on testset
y_pred = best_model.predict(X_test_scaled_speed_neighbour)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"Best SPEED NN -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")






  grid_search_df = pd.concat([grid_search_df, it_df], axis=0)


16 2 0.0 done
16 2 0.05 done
16 2 0.1 done
16 3 0.0 done
16 3 0.05 done
16 3 0.1 done
16 4 0.0 done
16 4 0.05 done
16 4 0.1 done
32 2 0.0 done
32 2 0.05 done
32 2 0.1 done
32 3 0.0 done
32 3 0.05 done
32 3 0.1 done
32 4 0.0 done
32 4 0.05 done
32 4 0.1 done
64 2 0.0 done
64 2 0.05 done
64 2 0.1 done
64 3 0.0 done
64 3 0.05 done
64 3 0.1 done
64 4 0.0 done
64 4 0.05 done
64 4 0.1 done
128 2 0.0 done
128 2 0.05 done
128 2 0.1 done
128 3 0.0 done
128 3 0.05 done
128 3 0.1 done
128 4 0.0 done
128 4 0.05 done
128 4 0.1 done
   number_neurons number_layers  dropout_rate  train_rmse  val_rmse
0              16             2          0.00    0.917047  0.926511
1              16             2          0.05    0.908428  0.900218
2              16             2          0.10    0.936143  0.904106
3              16             3          0.00    0.855502  0.891089
4              16             3          0.05    0.913606  0.893798
5              16             3          0.10    0.961896  0.917284



LSTM -15 output features 

In [23]:
#function to create sequences as needed by lstm model with 15 input and 15 output features
def create_sequences(X, y, seq_length=15, horizon=15):
    Xs, ys = [], []
    for i in range(len(X) - seq_length - horizon + 1):
        Xs.append(X[i:i+seq_length])
        ys.append(y[i+seq_length:i+seq_length+horizon])
    return np.array(Xs), np.array(ys)


In [24]:
#prediction from sensors in the same portal-flow

X = df_all[flow_features_same_nl].values
y = df_all[f'SENSOR_{target_sensor}_FLOW'].values
#create sequences
X_seq, y_seq = create_sequences(X, y, seq_length=15, horizon=15)
X_scaler = StandardScaler()
#scale values
X_scaled = X_scaler.fit_transform(X_seq.reshape(-1, X_seq.shape[2])).reshape(X_seq.shape)
X_seq=X_scaled

n_features = X_seq.shape[2]
#create model
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(15,n_features), return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(15)  
])
#compile model
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
#split test-training data
split = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]
#fit model
early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
checkpoint = ModelCheckpoint("model/LSTM_model_flow_same_15output.keras", monitor='val_rmse', save_best_only=True, mode='min')


model.fit(X_train, y_train, epochs=20, batch_size=32,callbacks=[early_stop, reduce_lr, checkpoint], validation_split=0.2,verbose=1)

#predict 15 output values
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"LSTM (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

#sum 15 output values for summed flow
y_test_sum = y_test.sum(axis=1)
y_pred_sum = y_pred.sum(axis=1)
#metrics for summed flow
rmse = mean_squared_error(y_test_sum, y_pred_sum, squared=False)
mae = mean_absolute_error(y_test_sum, y_pred_sum)
r2 = r2_score(y_test_sum, y_pred_sum)

print(f"LSTM Sum Forecast (FLOW) → RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")





  super().__init__(**kwargs)


Epoch 1/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 13ms/step - loss: 31.8710 - rmse: 5.6454 - val_loss: 19.3437 - val_rmse: 4.3981 - learning_rate: 0.0010
Epoch 2/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 13ms/step - loss: 17.1737 - rmse: 4.1441 - val_loss: 19.2898 - val_rmse: 4.3920 - learning_rate: 0.0010
Epoch 3/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - loss: 17.0409 - rmse: 4.1281 - val_loss: 19.3172 - val_rmse: 4.3951 - learning_rate: 0.0010
Epoch 4/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 13ms/step - loss: 16.9772 - rmse: 4.1203 - val_loss: 19.2657 - val_rmse: 4.3893 - learning_rate: 0.0010
Epoch 5/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - loss: 16.9141 - rmse: 4.1127 - val_loss: 19.7873 - val_rmse: 4.4483 - learning_rate: 0.0010
Epoch 6/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 



In [25]:
#prediction from sensors in the same portal-speed

X = df_all[speed_features_same_nl].values
y = df_all[f'SENSOR_{target_sensor}_SPEED'].values


#create sequences
X_seq, y_seq = create_sequences(X, y, seq_length=15, horizon=15)
#scale
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X_seq.reshape(-1, X_seq.shape[2])).reshape(X_seq.shape)
X_seq=X_scaled

n_features = X_seq.shape[2]
#create model
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(15,n_features), return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(15)  
])
#compile model
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
#make train-test-split
split = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]

#train model and save it, early stop to avoid overfitting
early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
checkpoint = ModelCheckpoint("model/LSTM_model_speed_same_15output.keras", monitor='val_rmse', save_best_only=True, mode='min')

model.fit(X_train, y_train, epochs=20, batch_size=32,callbacks=[early_stop, reduce_lr, checkpoint], validation_split=0.2,verbose=1)

#predict 15 output values
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"LSTM (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

#take mean for average speed
y_test_mean = y_test.mean(axis=1)
y_pred_mean = y_pred.mean(axis=1)

#metrics for average speed
rmse = mean_squared_error(y_test_mean, y_pred_mean, squared=False)
mae = mean_absolute_error(y_test_mean, y_pred_mean)
r2 = r2_score(y_test_mean, y_pred_mean)

print(f"LSTM Mean Forecast (SPEED) → RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")





Epoch 1/20


  super().__init__(**kwargs)


[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 14ms/step - loss: 22.1927 - rmse: 4.7109 - val_loss: 2.9518 - val_rmse: 1.7181 - learning_rate: 0.0010
Epoch 2/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 2.4795 - rmse: 1.5747 - val_loss: 1.6654 - val_rmse: 1.2905 - learning_rate: 0.0010
Epoch 3/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 1.7792 - rmse: 1.3339 - val_loss: 1.6525 - val_rmse: 1.2855 - learning_rate: 0.0010
Epoch 4/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 1.7465 - rmse: 1.3215 - val_loss: 1.6705 - val_rmse: 1.2925 - learning_rate: 0.0010
Epoch 5/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 13ms/step - loss: 1.7276 - rmse: 1.3144 - val_loss: 1.6751 - val_rmse: 1.2943 - learning_rate: 0.0010
Epoch 6/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 1.



In [26]:
#prediction from sensors in the neighbour portal-flow

X = df_all[flow_features_neighbour_nl].values
y = df_all[f'SENSOR_{target_sensor}_FLOW'].values


#create sequence
X_seq, y_seq = create_sequences(X, y, seq_length=15, horizon=15)
X_scaler = StandardScaler()
#scale values
X_scaled = X_scaler.fit_transform(X_seq.reshape(-1, X_seq.shape[2])).reshape(X_seq.shape)
X_seq=X_scaled

n_features = X_seq.shape[2]
#create model
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(15,n_features), return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(15)  
])
#compile model
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
#split train-test data
split = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]

#train model with early stop and save it
early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
checkpoint = ModelCheckpoint("model/LSTM_model_flow_neighbour_15output.keras", monitor='val_rmse', save_best_only=True, mode='min')
model.fit(X_train, y_train, epochs=20, batch_size=32,callbacks=[early_stop, reduce_lr, checkpoint], validation_split=0.2,verbose=1)

#predict 15 output values
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"LSTM (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")
#sum 15 output values for summed flow
y_test_sum = y_test.sum(axis=1)
y_pred_sum = y_pred.sum(axis=1)

# calculate metrics for summed flow
rmse = mean_squared_error(y_test_sum, y_pred_sum, squared=False)
mae = mean_absolute_error(y_test_sum, y_pred_sum)
r2 = r2_score(y_test_sum, y_pred_sum)

print(f"LSTM Sum Forecast (FLOW) → RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")

print(y_pred.shape)
print(y_pred_sum.shape)



Epoch 1/20


  super().__init__(**kwargs)


[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 14ms/step - loss: 31.2194 - rmse: 5.5874 - val_loss: 18.1018 - val_rmse: 4.2546 - learning_rate: 0.0010
Epoch 2/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 16.2833 - rmse: 4.0353 - val_loss: 18.0064 - val_rmse: 4.2434 - learning_rate: 0.0010
Epoch 3/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 16.2089 - rmse: 4.0260 - val_loss: 18.1152 - val_rmse: 4.2562 - learning_rate: 0.0010
Epoch 4/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 16.1241 - rmse: 4.0155 - val_loss: 17.9899 - val_rmse: 4.2415 - learning_rate: 0.0010
Epoch 5/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 13ms/step - loss: 16.0133 - rmse: 4.0017 - val_loss: 18.1974 - val_rmse: 4.2658 - learning_rate: 0.0010
Epoch 6/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step -



In [27]:
#prediction from sensors in the neighbour portal-speed

X = df_all[speed_features_neighbour_nl].values
y = df_all[f'SENSOR_{target_sensor}_SPEED'].values


#create sequences
X_seq, y_seq = create_sequences(X, y, seq_length=15, horizon=15)
#scale it
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X_seq.reshape(-1, X_seq.shape[2])).reshape(X_seq.shape)
X_seq=X_scaled

n_features = X_seq.shape[2]
#create model
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(15,n_features), return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(15)  
])
#compile model
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
#train test split
split = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]

#fit mdoel with early stop to prevent overfitting and save
early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)
checkpoint = ModelCheckpoint("model/LSTM_model_speed_neighbour_15output.keras", monitor='val_rmse', save_best_only=True, mode='min')
model.fit(X_train, y_train, epochs=20, batch_size=32,callbacks=[early_stop, reduce_lr, checkpoint], validation_split=0.2,verbose=1)

#predict 15 output values
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"LSTM (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")
#take mean of 15 output to get average speed
y_test_mean = y_test.mean(axis=1)
y_pred_mean = y_pred.mean(axis=1)
#compute metrics for mean speed
rmse = mean_squared_error(y_test_mean, y_pred_mean, squared=False)
mae = mean_absolute_error(y_test_mean, y_pred_mean)
r2 = r2_score(y_test_mean, y_pred_mean)

print(f"LSTM Mean Forecast (SPEED) → RMSE: {rmse:.3f}, MAE: {mae:.3f}, R²: {r2:.3f}")





Epoch 1/20


  super().__init__(**kwargs)


[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 14ms/step - loss: 22.7986 - rmse: 4.7748 - val_loss: 2.9560 - val_rmse: 1.7193 - learning_rate: 0.0010
Epoch 2/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - loss: 2.2857 - rmse: 1.5119 - val_loss: 2.5317 - val_rmse: 1.5911 - learning_rate: 0.0010
Epoch 3/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - loss: 2.0106 - rmse: 1.4179 - val_loss: 2.6996 - val_rmse: 1.6430 - learning_rate: 0.0010
Epoch 4/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 1.9894 - rmse: 1.4105 - val_loss: 2.3272 - val_rmse: 1.5255 - learning_rate: 0.0010
Epoch 5/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 13ms/step - loss: 1.9724 - rmse: 1.4044 - val_loss: 2.5430 - val_rmse: 1.5947 - learning_rate: 0.0010
Epoch 6/20
[1m1523/1523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 1.



LSTM -directly one output feature

In [31]:
#adds the target value (summed flow for upcoming 15 mins)
df_all['FLOW_future_sum'] = (
    df_all[f'SENSOR_{target_sensor}_FLOW']
    .rolling(15, min_periods=15)
    .sum()
    .shift(-14)  # so that the sum at  t=07:02 is the sum of the values 07:02–07:16 
    .reset_index(level=0, drop=True)
)
#adds the target value (mean speed for upcoming 15 mins)
df_all['SPEED_future_mean'] = (
    df_all[f'SENSOR_{target_sensor}_SPEED']
    .rolling(15, min_periods=15)
    .mean()
    .shift(-14)  # so that the mean at  t=07:02 is the mean of the values 07:02–07:16 
    .reset_index(level=0, drop=True)
)


print(df_all.shape)
df_all = df_all[df_all['Datetime'].dt.time <= time(9, 45)]
print(df_all.shape)


(73006, 15)
(73006, 15)


In [32]:
def create_sequences_sum(X, y, seq_length=15, horizon=15): #create sequences with target sum flow directly
    Xs, ys = [], []
    for i in range(len(X) - seq_length - horizon + 1):
        Xs.append(X[i:i+seq_length])
        ys.append(y[i+seq_length:i+seq_length+horizon].sum())  
    return np.array(Xs), np.array(ys)

def create_sequences_mean(X, y, seq_length=15, horizon=15): #create sequences with speedmean as target directly
    Xs, ys = [], []
    for i in range(len(X) - seq_length - horizon + 1):
        Xs.append(X[i:i+seq_length])
        # statt array von 15 Werten → Summe
        ys.append(y[i+seq_length:i+seq_length+horizon].mean())  
    return np.array(Xs), np.array(ys)


In [33]:
#early stop to prevent overfitting and reduce learning rate
early_stop = EarlyStopping(monitor='val_rmse', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3)            

In [236]:
#prediction from sensors in the same portal-flow
#split data
split = int(len(X) * 0.8)
df_train, df_test = df_all[:split], df_all[split:]
X_train = df_train[flow_features_same].values
y_train = df_train['FLOW_future_sum'].values
X_test = df_test[flow_features_same].values
y_test = df_test['FLOW_future_sum'].values

#create sequences
X_train_seq, y_train_seq = create_sequences_sum(X_train, y_train, seq_length=15, horizon=15)
X_test_seq, y_test_seq = create_sequences_sum(X_test, y_test, seq_length=15, horizon=15)

#scale data
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train_scaled = X_scaler.fit_transform(X_train_seq.reshape(-1, X_train_seq.shape[2])).reshape(X_train_seq.shape)
X_test_scaled  = X_scaler.transform(X_test_seq.reshape(-1, X_test_seq.shape[2])).reshape(X_test_seq.shape)
y_train_scaled = y_scaler.fit_transform(y_train_seq.reshape(-1, 1))
y_test_scaled  = y_scaler.transform(y_test_seq.reshape(-1, 1))


n_features = X_train_scaled.shape[2]
#for saving
checkpoint = ModelCheckpoint("model/LSTM_model_flow_same_best.keras", monitor='val_rmse', save_best_only=True, mode='min')
#create model
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(15, n_features), return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(64, activation='tanh'),
    Dense(1)
])
#compile model
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
#train model with 20% validation
hist = model.fit(
                X_train_scaled, y_train_scaled,
                validation_split=0.2,
                epochs=100,
                batch_size=32,
                callbacks=[early_stop, reduce_lr, checkpoint],
                verbose=1
            )

#predict (directly 1 output)
y_pred = model.predict(X_test_scaled)
rmse = mean_squared_error(y_test_scaled, y_pred, squared=False)
mae = mean_absolute_error(y_test_scaled, y_pred)
r2 = r2_score(y_test_scaled, y_pred)

print(f"LSTM 1 output (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

Epoch 1/100


  super().__init__(**kwargs)


[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 16ms/step - loss: 0.2134 - rmse: 0.4619 - val_loss: 0.2563 - val_rmse: 0.5063 - learning_rate: 0.0010
Epoch 2/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 16ms/step - loss: 0.1929 - rmse: 0.4392 - val_loss: 0.2326 - val_rmse: 0.4823 - learning_rate: 0.0010
Epoch 3/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 16ms/step - loss: 0.1865 - rmse: 0.4318 - val_loss: 0.2620 - val_rmse: 0.5119 - learning_rate: 0.0010
Epoch 4/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 15ms/step - loss: 0.1817 - rmse: 0.4263 - val_loss: 0.2755 - val_rmse: 0.5249 - learning_rate: 5.0000e-04
Epoch 5/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 15ms/step - loss: 0.1803 - rmse: 0.4246 - val_loss: 0.2622 - val_rmse: 0.5120 - learning_rate: 5.0000e-04
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step
LSTM 1 output 



In [302]:
#prediction from sensors in the same portal-speed

#split data
split = int(len(X) * 0.8)
df_train, df_test = df_all[:split], df_all[split:]
X_train = df_train[speed_features_same_nl].values
y_train = df_train['SPEED_future_mean'].values
X_test = df_test[speed_features_same_nl].values
y_test = df_test['SPEED_future_mean'].values

#create sequences
X_train_seq, y_train_seq = create_sequences_mean(X_train, y_train, seq_length=15, horizon=15)
X_test_seq, y_test_seq = create_sequences_mean(X_test, y_test, seq_length=15, horizon=15)

#scale data
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train_scaled = X_scaler.fit_transform(X_train_seq.reshape(-1, X_train_seq.shape[2])).reshape(X_train_seq.shape)
X_test_scaled  = X_scaler.transform(X_test_seq.reshape(-1, X_test_seq.shape[2])).reshape(X_test_seq.shape)
y_train_scaled = y_scaler.fit_transform(y_train_seq.reshape(-1, 1))
y_test_scaled  = y_scaler.transform(y_test_seq.reshape(-1, 1))


n_features = X_train_scaled.shape[2]

#for saving
checkpoint = ModelCheckpoint("model/LSTM_model_speed_same_best.keras", monitor='val_rmse', save_best_only=True, mode='min')
#create model
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(15, n_features), return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(64, activation='tanh'),
    Dense(1)
])
#compile model
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
#fit training data wit h20% validation
hist = model.fit(
                X_train_scaled, y_train_scaled,
                validation_split=0.2,
                epochs=100,
                batch_size=32,
                callbacks=[early_stop, reduce_lr, checkpoint],
                verbose=1
            )


#predict (directly 1 output)
y_pred_scaled = model.predict(X_test_scaled)
y_pred = y_scaler.inverse_transform(y_pred_scaled)
y_true = y_scaler.inverse_transform(y_test_scaled)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f"LSTM 1 output (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

Epoch 1/100


  super().__init__(**kwargs)


[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 18ms/step - loss: 0.4508 - rmse: 0.6714 - val_loss: 0.3082 - val_rmse: 0.5552 - learning_rate: 0.0010
Epoch 2/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 17ms/step - loss: 0.4152 - rmse: 0.6443 - val_loss: 0.3066 - val_rmse: 0.5537 - learning_rate: 0.0010
Epoch 3/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 17ms/step - loss: 0.4037 - rmse: 0.6354 - val_loss: 0.3015 - val_rmse: 0.5491 - learning_rate: 0.0010
Epoch 4/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 17ms/step - loss: 0.3975 - rmse: 0.6304 - val_loss: 0.3394 - val_rmse: 0.5826 - learning_rate: 0.0010
Epoch 5/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 17ms/step - loss: 0.3908 - rmse: 0.6251 - val_loss: 0.2969 - val_rmse: 0.5448 - learning_rate: 0.0010
Epoch 6/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 17ms/step - loss



In [205]:
#prediction from sensors in the neighbour portal-flow

#split data
split = int(len(X) * 0.8)
df_train, df_test = df_all[:split], df_all[split:]
X_train = df_train[flow_features_neighbour_nl].values
y_train = df_train['FLOW_future_sum'].values
X_test = df_test[flow_features_neighbour_nl].values
y_test = df_test['FLOW_future_sum'].values

#create sequence
X_train_seq, y_train_seq = create_sequences_sum(X_train, y_train, seq_length=15, horizon=15)
X_test_seq, y_test_seq = create_sequences_sum(X_test, y_test, seq_length=15, horizon=15)

#scale data
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train_scaled = X_scaler.fit_transform(X_train_seq.reshape(-1, X_train_seq.shape[2])).reshape(X_train_seq.shape)
X_test_scaled  = X_scaler.transform(X_test_seq.reshape(-1, X_test_seq.shape[2])).reshape(X_test_seq.shape)
y_train_scaled = y_scaler.fit_transform(y_train_seq.reshape(-1, 1))
y_test_scaled  = y_scaler.transform(y_test_seq.reshape(-1, 1))


n_features = X_train_scaled.shape[2]
#for saving
checkpoint = ModelCheckpoint("model/LSTM_model_flow_neighbour_best.keras", monitor='val_rmse', save_best_only=True, mode='min')
#create model
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(15, n_features), return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(64, activation='tanh'),
    Dense(1)
])
#compile model
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
#train model 
hist = model.fit(
                X_train_scaled, y_train_scaled,
                validation_split=0.2,
                epochs=100,
                batch_size=32,
                callbacks=[early_stop, reduce_lr, checkpoint],
                verbose=1
            )

#predict model (directly one output)
y_pred = model.predict(X_test_scaled)
rmse = mean_squared_error(y_test_scaled, y_pred, squared=False)
mae = mean_absolute_error(y_test_scaled, y_pred)
r2 = r2_score(y_test_scaled, y_pred)

print(f"LSTM 1 output (FLOW) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

Epoch 1/100


  super().__init__(**kwargs)


[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - loss: 0.1518 - rmse: 0.3896 - val_loss: 0.2617 - val_rmse: 0.5116 - learning_rate: 0.0010
Epoch 2/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.1194 - rmse: 0.3455 - val_loss: 0.2489 - val_rmse: 0.4989 - learning_rate: 0.0010
Epoch 3/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.1129 - rmse: 0.3361 - val_loss: 0.2693 - val_rmse: 0.5190 - learning_rate: 0.0010
Epoch 4/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.1097 - rmse: 0.3313 - val_loss: 0.2553 - val_rmse: 0.5053 - learning_rate: 0.0010
Epoch 5/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.1082 - rmse: 0.3290 - val_loss: 0.2957 - val_rmse: 0.5438 - learning_rate: 0.0010
Epoch 6/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.1018 - rmse: 0



In [305]:
#prediction from sensors in the neighbour portal-speed
#split data
split = int(len(X) * 0.8)
df_train, df_test = df_all[:split], df_all[split:]
X_train = df_train[speed_features_neighbour_nl].values
y_train = df_train['SPEED_future_mean'].values
X_test = df_test[speed_features_neighbour_nl].values
y_test = df_test['SPEED_future_mean'].values

#create sequence
X_train_seq, y_train_seq = create_sequences_mean(X_train, y_train, seq_length=15, horizon=15)
X_test_seq, y_test_seq = create_sequences_mean(X_test, y_test, seq_length=15, horizon=15)

#scale data
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train_scaled = X_scaler.fit_transform(X_train_seq.reshape(-1, X_train_seq.shape[2])).reshape(X_train_seq.shape)
X_test_scaled  = X_scaler.transform(X_test_seq.reshape(-1, X_test_seq.shape[2])).reshape(X_test_seq.shape)
y_train_scaled = y_scaler.fit_transform(y_train_seq.reshape(-1, 1))
y_test_scaled  = y_scaler.transform(y_test_seq.reshape(-1, 1))


n_features = X_train_scaled.shape[2]
#for saving
checkpoint = ModelCheckpoint("model/LSTM_model_speed_same_best.keras", monitor='val_rmse', save_best_only=True, mode='min')
#create model
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(15, n_features), return_sequences=True),
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(64, activation='tanh'),
    Dense(1)
])
#compile model
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
#train model
hist = model.fit(
                X_train_scaled, y_train_scaled,
                validation_split=0.2,
                epochs=100,
                batch_size=32,
                callbacks=[early_stop, reduce_lr, checkpoint],
                verbose=1
            )

#predict (directly 1 output)
y_pred_scaled = model.predict(X_test_scaled)
y_pred = y_scaler.inverse_transform(y_pred_scaled)
y_true = y_scaler.inverse_transform(y_test_scaled)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f"LSTM 1 output (SPEED) -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R2: {r2:.3f}")

Epoch 1/100


  super().__init__(**kwargs)


[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 16ms/step - loss: 0.4918 - rmse: 0.7013 - val_loss: 0.3625 - val_rmse: 0.6020 - learning_rate: 0.0010
Epoch 2/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 15ms/step - loss: 0.4606 - rmse: 0.6787 - val_loss: 0.3754 - val_rmse: 0.6127 - learning_rate: 0.0010
Epoch 3/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 15ms/step - loss: 0.4534 - rmse: 0.6733 - val_loss: 0.3723 - val_rmse: 0.6102 - learning_rate: 0.0010
Epoch 4/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 16ms/step - loss: 0.4425 - rmse: 0.6652 - val_loss: 0.4009 - val_rmse: 0.6332 - learning_rate: 5.0000e-04
Epoch 5/100
[1m1460/1460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 16ms/step - loss: 0.4383 - rmse: 0.6620 - val_loss: 0.3892 - val_rmse: 0.6239 - learning_rate: 5.0000e-04
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step
LSTM 1 output 

