In [1]:
import pandas as pd
import numpy as np
import sys
import os
import warnings
import requests
import re
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.optimizers import Adam
warnings.filterwarnings("ignore")
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense
from tensorflow.keras.callbacks import EarlyStopping
import json
# Add the path # ADD TO README
sys.path.append('/Users/parvathyanilkumar/WorkSpace/Visualbasicgithub/TunnelTrafficCongestion/')  # Replace with the correct path
from src.util.helper_functions import read_data,preprocess,save_to_csv,write_to_file


In [2]:
##### ML Models and Experiments #########

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputRegressor
def configset(configPath,folderName,tunnelName):
    # Load the configuration
    with open("/Users/parvathyanilkumar/WorkSpace/Visualbasicgithub/TunnelTrafficCongestion/config.json", "r") as f:
   #Uncomment if not jupyter notebook file  #with open("./config.json", "r") as f:
        config = json.load(f)

    # Access the file paths
    os.chdir("/Users/parvathyanilkumar/WorkSpace/Visualbasicgithub/TunnelTrafficCongestion/")
    data_processed_path = config["data_processed_path"]
    print("data_processed_path")
    result_path = config["result_path"]
    #For jupter notebook
    result_path=os.path.join(os.getcwd(),result_path)
    data_processed_path=os.path.join(os.getcwd(), data_processed_path+folderName)
    print("data_processed_path again",data_processed_path)
    os.makedirs(result_path+tunnelName,exist_ok=True) #create a folder with the tunnel name for saving all the plots    
    #tunnel_name = config["tunnel_name"]
    return data_processed_path,result_path


def encoding(df_tunnel_data, categorical_features, target_features):
    # Convert column Dato to datetime format
    df_tunnel_data['Dato'] = pd.to_datetime(df_tunnel_data['Dato'])
    
    # Encoding categorical features
    label_encoders = {}
    for feature in categorical_features:
        encoder = LabelEncoder()
        df_tunnel_data[feature + '_encoded'] = encoder.fit_transform(df_tunnel_data[feature].astype('category'))
        label_encoders[feature] = encoder

    return df_tunnel_data, label_encoders

def split_dataset(df_tunnel_data, features, target, test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(df_tunnel_data[features], df_tunnel_data[target], test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

def train_model(X_train, y_train, model):
    model.fit(X_train, y_train)
    return model

def model_evaluation(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    return mae, mse, rmse, r2

def create_sequences(df1, numerical_features, target_col,sequence_length=24):
    X, y = [], []
    for i in range(len(df1) - sequence_length):
        seq_X = df1[numerical_features].iloc[i:i + sequence_length].values
        seq_y = df1[target_col].iloc[i + sequence_length - 1]
        X.append(seq_X)
        y.append(seq_y)
    return np.array(X), np.array(y).reshape(-1, 1)
def build_rnn_model(input_shape):
    model = Sequential()
    model.add(SimpleRNN(50, activation='relu', input_shape=input_shape, return_sequences=False))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model


def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, activation='tanh', input_shape=input_shape, return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model


def build_gru_model(input_shape):
    model = Sequential([
        GRU(50, activation='relu', input_shape=input_shape, return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model



def run_experiment(df, features, target, model,model_name,comment,file_path,test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = split_dataset(df, features, target, test_size, random_state)
    trained_model = train_model(X_train, y_train, model)
    mae, mse, rmse, r2 = model_evaluation(trained_model, X_test, y_test)
    print(f'Model: {model.__class__.__name__}')
    print(f'Mean Absolute Error: {mae}')
    print(f'Mean Squared Error: {mse}')
    print(f'Root Mean Squared Error: {rmse}')
    print(f'R2 Score: {r2}\n')
    result_dict={
        "Predicting for ": comment,
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        }
    write_to_file(file_path+"/Experiment_ML",model_name,result_dict)
def multi_tunnelrun(tunnel_files): 
    for tunnel_name, file_path in tunnel_files.items():
    # Gudvangtunnel trafficdata
        data_processed_path,result_path=configset("/Users/parvathyanilkumar/WorkSpace/Visual basic github/TunnelTrafficCongestion/config.json",file_path,tunnel_name)
        #data = read_data(data_raw_path)
    #file_path = '/Users/parvathyanilkumar/WorkSpace/Visualbasicgithub/TunnelTrafficCongestion/data/Processed/GUDVANGATUNNELEN_preprocessed.csv'
        result_path=result_path+tunnel_name
        df_tunnel_data = pd.read_csv(data_processed_path)

        categorical_features = ['Måned', 'Dag','Season']
        target = 'Trafikkmengde'
        features = ['Dag_encoded','Season_encoded']

        df_prepared, _ = encoding(df_tunnel_data, categorical_features, target)
    # apply ML and run experiment for LR:
        print("EXPERIMENTS WITH RUSH HOUR:")


        print("------LINEAR REGRESSION-------")
        df_prepared['IsRushHour'] = df_prepared['Hour'].apply(lambda x: 1 if 7 <= x <= 10 or 15 <= x <= 18 else 0)
        rush_hour_counts_normalized = df_prepared['IsRushHour'].value_counts(normalize=True)

    # features
        rush_features = ['IsRushHour','Måned_encoded', 'Dag_encoded', 'Season_encoded']
    #target variable
        lr_model = LinearRegression()
        run_experiment(df_prepared, rush_features, target, lr_model,"Linear Regression","Experiment with Rush hour",result_path)
    
        print("------RANDOM FOREST-------")
        rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
        run_experiment(df_prepared, rush_features, target, rf_model,"Random Forest","Experiment with Rush Hour",result_path)



        print("-----With features Dag_encoded and Season-----")
    
        print("------LINEAR REGRESSION-------")
        lr_model = LinearRegression()
        run_experiment(df_prepared, features, target, lr_model,"Linear Regression","Experiment with "+', '.join(features),result_path)
        print("------RANDOM FOREST-------")
        rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
        run_experiment(df_prepared, features, target, rf_model,"Random Forest","Experiment with "+', '.join(features),result_path)



        print("MULTIOUTPUT REGRESSOR")

        # target variables are Trafikkmengde and Gjennomsnittshastighet
        features_MR=['Trafikkmengde', 'Gjennomsnittshastighet']
        multi_target = df_prepared[features_MR]

        # Splitting the data into training and testing sets for multivariate regression
        X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(df_prepared[features], multi_target, test_size=0.2, random_state=42)

        # Initializing the MultiOutput Regressor with Random Forest as the base estimator
        multi_rf_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))

        # Fitting the model to the training data
        multi_rf_model.fit(X_train_multi, y_train_multi)

        # Predicting on the test set
        y_pred_multi = multi_rf_model.predict(X_test_multi)

        # calculatre metrics:
        mae_multi = mean_absolute_error(y_test_multi, y_pred_multi, multioutput='raw_values')
        mse_multi = mean_squared_error(y_test_multi, y_pred_multi, multioutput='raw_values')
        rmse_multi = np.sqrt(mse_multi)
        r2_multi = r2_score(y_test_multi, y_pred_multi, multioutput='raw_values')

        mae_multi, mse_multi, rmse_multi, r2_multi
        print(f'Mean absolute Error: {mae_multi}')
        print(f'Mean squared Error: {mse_multi}')
        print(f'Root Mean Squared Error: {rmse_multi}')
        print(f'R2 score: { r2_multi}')
        result_dict={
        "Features":', '.join(features_MR),
        "MAE": mae_multi,
        "MSE": mse_multi,
        "RMSE": rmse_multi,
        }
        write_to_file(result_path+"/Experiment_ML","Multioutput regressor",result_dict)


        print("Experiments with Deep Neural Networks")
        #Creating sequence to create the train and test data sets

        numerical_features = [
        'Felt', 'Trafikkmengde', 'DayOfWeek', 'IsWeekend', 'Hour',
        'DayOfMonth', 'Måned_encoded', 'Dag_encoded', 'Season_encoded'
        ]

        target_col = 'Gjennomsnittshastighet'

        X, y = create_sequences(df_tunnel_data, numerical_features, target_col=target_col)

        # Splitting the dataset into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

        print(f"X_train shape: {X_train.shape}")
        print(f"y_train shape: {y_train.shape}")
        print(f"X_test shape: {X_test.shape}")
        print(f"y_test shape: {y_test.shape}")

        #Simple RNN
        # The input shape for RNN sequence_length, num_features
        print("--RNN--")
        input_shape = (X_train.shape[1], X_train.shape[2])
        rnn_model = build_rnn_model(input_shape)

        # Training the model
        history_rnn = rnn_model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, verbose=2)

        #rnn_model.save("C:/Users/Vinothini Aravindan/Vino-UIS/Vår 2024 - Thesis/Project1/output folder/Model_RNN.h5")

        predictions_rnn = rnn_model.predict(X_test)

        mae = mean_absolute_error(y_test, predictions_rnn)
        print(f"Mean Absolute Error (MAE): {mae}")

        mse = mean_squared_error(y_test, predictions_rnn)
        print(f"Mean Squared Error (MSE): {mse}")

        rmse = np.sqrt(mse)
        print(f"Root Mean Squared Error (RMSE): {rmse}")
        result_dict={
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        }
        write_to_file(result_path+"/Experiment_DL","RNN",result_dict)

        print("---LSTM----")
        input_shape = (X_train.shape[1], X_train.shape[2])
    # creaing lstm model
        lstm_model = build_lstm_model(input_shape)

    # Define the EarlyStopping callback
        early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=10,         
        verbose=1,           
        mode='min',          
        restore_best_weights=True  
        )

        # Training the model with the EarlyStopping callback
        history_lstm = lstm_model.fit(
        X_train, y_train,
        epochs=100,
        batch_size=64,
        validation_split=0.2,
        verbose=2,
        callbacks=[early_stopping]  
        )
        predictions_lstm = lstm_model.predict(X_test)

        mae = mean_absolute_error(y_test, predictions_lstm)
        print(f"Mean Absolute Error (MAE): {mae}")

        mse = mean_squared_error(y_test, predictions_lstm)
        print(f"Mean Squared Error (MSE): {mse}")

        rmse = np.sqrt(mse)
        print(f"Root Mean Squared Error (RMSE): {rmse}")
        result_dict={
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        }
        write_to_file(result_path+"/Experiment_DL","LSTM",result_dict)
        print("----GRU----")
        input_shape = (X_train.shape[1], X_train.shape[2])
        gru_model = build_gru_model(input_shape)

        # Train the GRU
        history_gru = gru_model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, verbose=2)

        predictions_gru = gru_model.predict(X_test)

        #evaluation gru
        mae_gru = mean_absolute_error(y_test, predictions_gru)
        mse_gru = mean_squared_error(y_test, predictions_gru)
        rmse_gru = np.sqrt(mse_gru)
        print(f"GRU Model - MAE: {mae_gru}, MSE: {mse_gru}, RMSE: {rmse_gru}")
        result_dict={
        "MAE": mae_gru,
        "MSE": mse_gru,
        "RMSE": rmse_gru,
        }
        write_to_file(result_path+"/Experiment_DL","GRU",result_dict)



In [None]:
if __name__ == "__main__":
   

     # multiple tunnels
    tunnel_files = {
        #'FANNEFJORDTUNNELEN': 'FANNEFJORDTUNNELEN_preprocessed.csv',
        #'GUDVANGATUNNELEN': 'GUDVANGATUNNELEN_preprocessed.csv',
        'Ilsviktunnelen': 'Ilsviktunnelen_preprocessed.csv',
        #'TUSSENTUNNELEN': 'TUSSENTUNNELEN_preprocessed.csv',
        #'Jondalstunnelen': 'Jondalstunnelen_preprocessed.csv',
        'RYFYLKETUNNELEN MOT STAVANGER': 'RYFYLKETUNNELEN MOT STAVANGER_preprocessed.csv',
        
        
    } 
        
   # for value in tunnel_files.values():
    multi_tunnelrun(tunnel_files)

data_processed_path
data_processed_path again /Users/parvathyanilkumar/WorkSpace/Visualbasicgithub/TunnelTrafficCongestion/data/Processed/Ilsviktunnelen_preprocessed.csv
EXPERIMENTS WITH RUSH HOUR:
------LINEAR REGRESSION-------
Model: LinearRegression
Mean Absolute Error: 60.486808971930714
Mean Squared Error: 5926.560560614049
Root Mean Squared Error: 76.98415785480834
R2 Score: 0.17242940370238558

------RANDOM FOREST-------
Model: RandomForestRegressor
Mean Absolute Error: 60.43299197708735
Mean Squared Error: 5616.27312632048
Root Mean Squared Error: 74.94179825918563
R2 Score: 0.2157571912776094

-----With features Dag_encoded and Season-----
------LINEAR REGRESSION-------
Model: LinearRegression
Mean Absolute Error: 68.4931896623624
Mean Squared Error: 7166.531499336341
Root Mean Squared Error: 84.65536899297257
R2 Score: -0.0007171420310136956

------RANDOM FOREST-------
Model: RandomForestRegressor
Mean Absolute Error: 66.85699112948255
Mean Squared Error: 6865.22878792581
Roo

Epoch 82/100
89/89 - 0s - loss: 1.8929 - val_loss: 10.1652 - 425ms/epoch - 5ms/step
Epoch 83/100
89/89 - 0s - loss: 2.0798 - val_loss: 10.3415 - 440ms/epoch - 5ms/step
Epoch 84/100
89/89 - 0s - loss: 1.9679 - val_loss: 14.9808 - 423ms/epoch - 5ms/step
Epoch 85/100
89/89 - 0s - loss: 2.0516 - val_loss: 12.0085 - 424ms/epoch - 5ms/step
Epoch 86/100
89/89 - 0s - loss: 1.8265 - val_loss: 8.1325 - 429ms/epoch - 5ms/step
Epoch 87/100
89/89 - 0s - loss: 2.1490 - val_loss: 10.5107 - 431ms/epoch - 5ms/step
Epoch 88/100
89/89 - 0s - loss: 1.8433 - val_loss: 12.3810 - 438ms/epoch - 5ms/step
Epoch 89/100
89/89 - 0s - loss: 1.9336 - val_loss: 8.8590 - 422ms/epoch - 5ms/step
Epoch 90/100
89/89 - 0s - loss: 2.0909 - val_loss: 11.5365 - 446ms/epoch - 5ms/step
Epoch 91/100
89/89 - 0s - loss: 1.8962 - val_loss: 9.1694 - 427ms/epoch - 5ms/step
Epoch 92/100
89/89 - 0s - loss: 1.7578 - val_loss: 7.0434 - 425ms/epoch - 5ms/step
Epoch 93/100
89/89 - 0s - loss: 1.8312 - val_loss: 6.9559 - 419ms/epoch - 5ms/st

Epoch 47/100
89/89 - 1s - loss: 1.5866 - val_loss: 7.6986 - 934ms/epoch - 10ms/step
Epoch 48/100
89/89 - 1s - loss: 1.5499 - val_loss: 8.8768 - 926ms/epoch - 10ms/step
Epoch 49/100
89/89 - 1s - loss: 1.5658 - val_loss: 8.6637 - 935ms/epoch - 11ms/step
Epoch 50/100
89/89 - 1s - loss: 1.5248 - val_loss: 8.0513 - 927ms/epoch - 10ms/step
Epoch 51/100
89/89 - 1s - loss: 1.5532 - val_loss: 10.2073 - 923ms/epoch - 10ms/step
Epoch 52/100
89/89 - 1s - loss: 1.5809 - val_loss: 8.3229 - 932ms/epoch - 10ms/step
Epoch 53/100
89/89 - 1s - loss: 1.5156 - val_loss: 7.0559 - 933ms/epoch - 10ms/step
Epoch 54/100
89/89 - 1s - loss: 1.5259 - val_loss: 8.7270 - 928ms/epoch - 10ms/step
Epoch 55/100
89/89 - 1s - loss: 1.4561 - val_loss: 8.2566 - 924ms/epoch - 10ms/step
Epoch 56/100
89/89 - 1s - loss: 1.6185 - val_loss: 7.5322 - 921ms/epoch - 10ms/step
Epoch 57/100
89/89 - 1s - loss: 1.4326 - val_loss: 8.7468 - 928ms/epoch - 10ms/step
Epoch 58/100
89/89 - 1s - loss: 1.4491 - val_loss: 9.3751 - 954ms/epoch - 1

In [68]:


def build_rnn_model(input_shape):
    model = Sequential()
    model.add(SimpleRNN(50, activation='relu', input_shape=input_shape, return_sequences=False))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

# The input shape for RNN sequence_length, num_features
input_shape = (X_train.shape[1], X_train.shape[2])
rnn_model = build_rnn_model(input_shape)

# Training the model
history_rnn = rnn_model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, verbose=2)

rnn_model.save("C:/Users/Vinothini Aravindan/Vino-UIS/Vår 2024 - Thesis/Project1/output folder/Model_RNN.h5")


NameError: name 'X_train' is not defined

(RNN) :
Considerable reduction in the validation loss but at some point there is a fluctuation, epoch 26 with a validation loss of 2.4028 and epoch 37 with a validation loss of 2.4227. also towards end there is dip in the loss but fluctuation in eg epoch 98 with a validation loss of 3.0803 . these Could be due to overfitting


In [87]:
predictions = rnn_model.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
print(f"Mean Absolute Error (MAE): {mae}")

mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error (MSE): {mse}")

rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse}")


Mean Absolute Error (MAE): 1.4732557728378455
Mean Squared Error (MSE): 3.5197166391874175
Root Mean Squared Error (RMSE): 1.8760907864992615


In [88]:
from tensorflow.keras.callbacks import EarlyStopping

def build_rnn_model(input_shape):
    model = Sequential()
    model.add(SimpleRNN(50, activation='tanh', input_shape=input_shape, return_sequences=False))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

# Define the EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=10,         
    verbose=1,           
    mode='min',          
    restore_best_weights=True  
)

# Training the model with the EarlyStopping callback
history_rnn = rnn_model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_split=0.2,
    verbose=2,
    callbacks=[early_stopping]  
)


Epoch 1/100
79/79 - 0s - loss: 0.7608 - val_loss: 2.1654 - 270ms/epoch - 3ms/step
Epoch 2/100
79/79 - 0s - loss: 0.7468 - val_loss: 2.1809 - 255ms/epoch - 3ms/step
Epoch 3/100
79/79 - 0s - loss: 0.7218 - val_loss: 2.0307 - 369ms/epoch - 5ms/step
Epoch 4/100
79/79 - 0s - loss: 0.7468 - val_loss: 2.2265 - 260ms/epoch - 3ms/step
Epoch 5/100
79/79 - 0s - loss: 0.7469 - val_loss: 1.8340 - 252ms/epoch - 3ms/step
Epoch 6/100
79/79 - 0s - loss: 0.7800 - val_loss: 2.0977 - 233ms/epoch - 3ms/step
Epoch 7/100
79/79 - 0s - loss: 0.7392 - val_loss: 1.9504 - 233ms/epoch - 3ms/step
Epoch 8/100
79/79 - 0s - loss: 0.6922 - val_loss: 1.9551 - 232ms/epoch - 3ms/step
Epoch 9/100
79/79 - 0s - loss: 0.7516 - val_loss: 2.0086 - 238ms/epoch - 3ms/step
Epoch 10/100
79/79 - 0s - loss: 0.7193 - val_loss: 1.8225 - 243ms/epoch - 3ms/step
Epoch 11/100
79/79 - 0s - loss: 0.7073 - val_loss: 1.9548 - 239ms/epoch - 3ms/step
Epoch 12/100
79/79 - 0s - loss: 0.7193 - val_loss: 2.1227 - 254ms/epoch - 3ms/step
Epoch 13/100


In [89]:
predictions = rnn_model.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
print(f"Mean Absolute Error (MAE): {mae}")

mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error (MSE): {mse}")

rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse}")


Mean Absolute Error (MAE): 1.36652906094142
Mean Squared Error (MSE): 3.199274813577221
Root Mean Squared Error (RMSE): 1.7886516747475516


In [90]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense
from tensorflow.keras.optimizers import Adam

def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, activation='tanh', input_shape=input_shape, return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

input_shape = (X_train.shape[1], X_train.shape[2])
# creaing lstm model
lstm_model = build_lstm_model(input_shape)

# Define the EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=10,         
    verbose=1,           
    mode='min',          
    restore_best_weights=True  
)

# Training the model with the EarlyStopping callback
history_lstm = lstm_model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_split=0.2,
    verbose=2,
    callbacks=[early_stopping]  
)


Epoch 1/100
79/79 - 2s - loss: 1.7288 - val_loss: 1.3736 - 2s/epoch - 32ms/step
Epoch 2/100
79/79 - 1s - loss: 1.5874 - val_loss: 1.2941 - 617ms/epoch - 8ms/step
Epoch 3/100
79/79 - 1s - loss: 1.5111 - val_loss: 1.1937 - 600ms/epoch - 8ms/step
Epoch 4/100
79/79 - 1s - loss: 1.4084 - val_loss: 1.1654 - 600ms/epoch - 8ms/step
Epoch 5/100
79/79 - 1s - loss: 1.3474 - val_loss: 1.2895 - 616ms/epoch - 8ms/step
Epoch 6/100
79/79 - 1s - loss: 1.2740 - val_loss: 1.1749 - 600ms/epoch - 8ms/step
Epoch 7/100
79/79 - 1s - loss: 1.2447 - val_loss: 1.3026 - 705ms/epoch - 9ms/step
Epoch 8/100
79/79 - 1s - loss: 1.1942 - val_loss: 1.4616 - 584ms/epoch - 7ms/step
Epoch 9/100
79/79 - 1s - loss: 1.1722 - val_loss: 1.2668 - 600ms/epoch - 8ms/step
Epoch 10/100
79/79 - 1s - loss: 1.1652 - val_loss: 1.2662 - 601ms/epoch - 8ms/step
Epoch 11/100
79/79 - 1s - loss: 1.1188 - val_loss: 1.3608 - 613ms/epoch - 8ms/step
Epoch 12/100
79/79 - 1s - loss: 1.1295 - val_loss: 1.3803 - 589ms/epoch - 7ms/step
Epoch 13/100
79

In [91]:
predictions = lstm_model.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
print(f"Mean Absolute Error (MAE): {mae}")

mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error (MSE): {mse}")

rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse}")

Mean Absolute Error (MAE): 0.9348118221490812
Mean Squared Error (MSE): 1.9637535404276347
Root Mean Squared Error (RMSE): 1.4013399089541534


In [92]:
def build_gru_model(input_shape):
    model = Sequential([
        GRU(50, activation='relu', input_shape=input_shape, return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

input_shape = (X_train.shape[1], X_train.shape[2])
gru_model = build_gru_model(input_shape)

# Train the GRU
history_gru = gru_model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, verbose=2)

predictions_gru = gru_model.predict(X_test)

#evaluation gru
mae_gru = mean_absolute_error(y_test, predictions_gru)
mse_gru = mean_squared_error(y_test, predictions_gru)
rmse_gru = np.sqrt(mse_gru)
print(f"GRU Model - MAE: {mae_gru}, MSE: {mse_gru}, RMSE: {rmse_gru}")

Epoch 1/100
79/79 - 2s - loss: 11.5669 - val_loss: 1.3616 - 2s/epoch - 21ms/step
Epoch 2/100
79/79 - 1s - loss: 1.8016 - val_loss: 1.3972 - 507ms/epoch - 6ms/step
Epoch 3/100
79/79 - 1s - loss: 1.5850 - val_loss: 1.3869 - 522ms/epoch - 7ms/step
Epoch 4/100
79/79 - 0s - loss: 1.4931 - val_loss: 1.2576 - 496ms/epoch - 6ms/step
Epoch 5/100
79/79 - 1s - loss: 1.4039 - val_loss: 1.2027 - 505ms/epoch - 6ms/step
Epoch 6/100
79/79 - 0s - loss: 1.3540 - val_loss: 1.2867 - 480ms/epoch - 6ms/step
Epoch 7/100
79/79 - 1s - loss: 1.3509 - val_loss: 1.1673 - 502ms/epoch - 6ms/step
Epoch 8/100
79/79 - 0s - loss: 1.3167 - val_loss: 1.1643 - 481ms/epoch - 6ms/step
Epoch 9/100
79/79 - 1s - loss: 1.3068 - val_loss: 1.6411 - 535ms/epoch - 7ms/step
Epoch 10/100
79/79 - 0s - loss: 1.2622 - val_loss: 1.2831 - 482ms/epoch - 6ms/step
Epoch 11/100
79/79 - 0s - loss: 1.2289 - val_loss: 1.2735 - 486ms/epoch - 6ms/step
Epoch 12/100
79/79 - 0s - loss: 1.1866 - val_loss: 1.4162 - 497ms/epoch - 6ms/step
Epoch 13/100
7

In [93]:
#### Experiment 2####

###### Experiment 1:##########

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#feature selection
numeric_features = [
    'Felt', 'Gjennomsnittshastighet', 'DayOfWeek', 'IsWeekend', 'Hour',
    'DayOfMonth', 'Måned_encoded', 'Dag_encoded', 'PartOfDay_encoded', 'Season_encoded'
]

# target variable(avg speed)
target = 'Trafikkmengde'

# filtering features and the target variables
df_features = df_tunnel_data[numeric_features + [target]].copy()

# handling missing vales
missing_values = df_features.isnull().sum()

# Scaling numerical features
scaler = StandardScaler()

df_features[numeric_features] = scaler.fit_transform(df_features[numeric_features])

# spliting dataset into traing and testing
X_train, X_test, y_train, y_test = train_test_split(df_features[numeric_features], df_features[target], test_size=0.2, random_state=42)

missing_values, X_train.head()



(Felt                      0
 Gjennomsnittshastighet    0
 DayOfWeek                 0
 IsWeekend                 0
 Hour                      0
 DayOfMonth                0
 Måned_encoded             0
 Dag_encoded               0
 PartOfDay_encoded         0
 Season_encoded            0
 Trafikkmengde             0
 dtype: int64,
       Felt  Gjennomsnittshastighet  DayOfWeek  IsWeekend      Hour  \
 2968   0.0                0.991519  -0.001657  -0.627140 -0.940258   
 2439   0.0                1.073670  -0.505495  -0.627140 -1.084712   
 1765   0.0                0.071425  -0.505495  -0.627140 -1.373620   
 6417   0.0                2.470241   1.509859   1.594542  1.515461   
 7692   0.0                0.071425   0.502182  -0.627140 -1.518074   
 
       DayOfMonth  Måned_encoded  Dag_encoded  PartOfDay_encoded  \
 2968   -0.295158       0.129256     1.494856           0.448085   
 2439   -0.596490       0.411583     0.001637           1.342558   
 1765   -0.897822      -1.564708  

In [94]:
def create_sequences(df, numerical_features, sequence_length=24, target_col='Trafikkmengde'):
    X, y = [], []
    for i in range(len(df) - sequence_length):
        seq_X = df[numerical_features].iloc[i:i + sequence_length].values
        seq_y = df[target_col].iloc[i + sequence_length - 1]
        X.append(seq_X)
        y.append(seq_y)
    return np.array(X), np.array(y).reshape(-1, 1)

numerical_features = [
    'Felt', 'Gjennomsnittshastighet', 'DayOfWeek', 'IsWeekend', 'Hour',
    'DayOfMonth', 'Måned_encoded', 'Dag_encoded', 'PartOfDay_encoded', 'Season_encoded'
]
sequence_length = 24
target_col = 'Trafikkmengde'

X, y = create_sequences(df_scaled, numerical_features, sequence_length=sequence_length, target_col=target_col)

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")


X_train shape: (6306, 24, 10)
y_train shape: (6306, 1)
X_test shape: (1577, 24, 10)
y_test shape: (1577, 1)


In [95]:
from tensorflow.keras.callbacks import EarlyStopping

def build_rnn_model1(input_shape):
    model = Sequential()
    model.add(SimpleRNN(50, activation='tanh', input_shape=input_shape, return_sequences=False))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

# Define the EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=10,         
    verbose=1,           
    mode='min',          
    restore_best_weights=True  
)

# The input shape for RNN sequence_length, num_features
input_shape = (X_train.shape[1], X_train.shape[2])
rnn_model1 = build_rnn_model1(input_shape)

# Training the model with the EarlyStopping callback
history_rnn1 = rnn_model1.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_split=0.2,
    verbose=2,
    callbacks=[early_stopping]  
)



Epoch 1/100
79/79 - 1s - loss: 6639.8872 - val_loss: 2586.6687 - 1s/epoch - 18ms/step
Epoch 2/100
79/79 - 0s - loss: 5965.4316 - val_loss: 2278.5352 - 267ms/epoch - 3ms/step
Epoch 3/100
79/79 - 0s - loss: 5606.8716 - val_loss: 2074.4548 - 250ms/epoch - 3ms/step
Epoch 4/100
79/79 - 0s - loss: 5313.2119 - val_loss: 1916.0000 - 262ms/epoch - 3ms/step
Epoch 5/100
79/79 - 0s - loss: 5064.3726 - val_loss: 1790.6804 - 255ms/epoch - 3ms/step
Epoch 6/100
79/79 - 0s - loss: 4853.7490 - val_loss: 1694.1896 - 270ms/epoch - 3ms/step
Epoch 7/100
79/79 - 0s - loss: 4675.9053 - val_loss: 1622.8033 - 249ms/epoch - 3ms/step
Epoch 8/100
79/79 - 0s - loss: 4525.7495 - val_loss: 1563.8820 - 247ms/epoch - 3ms/step
Epoch 9/100
79/79 - 0s - loss: 4389.1846 - val_loss: 1509.4821 - 253ms/epoch - 3ms/step
Epoch 10/100
79/79 - 0s - loss: 4250.0747 - val_loss: 1442.5021 - 247ms/epoch - 3ms/step
Epoch 11/100
79/79 - 0s - loss: 4095.9663 - val_loss: 1354.3212 - 253ms/epoch - 3ms/step
Epoch 12/100
79/79 - 0s - loss: 

In [96]:
predictions = rnn_model1.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
print(f"Mean Absolute Error (MAE): {mae}")

mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error (MSE): {mse}")

rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse}")


Mean Absolute Error (MAE): 25.087237820981944
Mean Squared Error (MSE): 1110.3476631876852
Root Mean Squared Error (RMSE): 33.32187964667787


In [97]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense
from tensorflow.keras.optimizers import Adam

def build_lstm_model1(input_shape):
    model = Sequential([
        LSTM(50, activation='tanh', input_shape=input_shape, return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

input_shape = (X_train.shape[1], X_train.shape[2])
# creaing lstm model
lstm_model1 = build_lstm_model1(input_shape)

# Define the EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=10,         
    verbose=1,           
    mode='min',          
    restore_best_weights=True  
)

# Training the model with the EarlyStopping callback
history_lstm1 = lstm_model1.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_split=0.2,
    verbose=2,
    callbacks=[early_stopping]  
)


predictions = lstm_model1.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
print(f"Mean Absolute Error (MAE): {mae}")

mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error (MSE): {mse}")

rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse}")

Epoch 1/100
79/79 - 3s - loss: 6489.3140 - val_loss: 2415.6106 - 3s/epoch - 32ms/step
Epoch 2/100
79/79 - 1s - loss: 5752.1401 - val_loss: 2139.5710 - 990ms/epoch - 13ms/step
Epoch 3/100
79/79 - 1s - loss: 5400.0938 - val_loss: 1951.8845 - 765ms/epoch - 10ms/step
Epoch 4/100
79/79 - 1s - loss: 5120.9653 - val_loss: 1813.0239 - 788ms/epoch - 10ms/step
Epoch 5/100
79/79 - 1s - loss: 4890.6987 - val_loss: 1706.6835 - 757ms/epoch - 10ms/step
Epoch 6/100
79/79 - 1s - loss: 4698.4204 - val_loss: 1629.4783 - 833ms/epoch - 11ms/step
Epoch 7/100
79/79 - 1s - loss: 4539.3623 - val_loss: 1574.3417 - 746ms/epoch - 9ms/step
Epoch 8/100
79/79 - 1s - loss: 4407.0967 - val_loss: 1539.3895 - 755ms/epoch - 10ms/step
Epoch 9/100
79/79 - 1s - loss: 4299.3076 - val_loss: 1519.5045 - 862ms/epoch - 11ms/step
Epoch 10/100
79/79 - 1s - loss: 4211.9775 - val_loss: 1512.3462 - 775ms/epoch - 10ms/step
Epoch 11/100
79/79 - 1s - loss: 4142.2617 - val_loss: 1515.1305 - 817ms/epoch - 10ms/step
Epoch 12/100
79/79 - 1s

In [98]:
def build_gru_model1(input_shape):
    model = Sequential([
        GRU(50, activation='relu', input_shape=input_shape, return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

input_shape = (X_train.shape[1], X_train.shape[2])
gru_model1 = build_gru_model1(input_shape)

# Train the GRU
history_gru1 = gru_model1.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, verbose=2)

predictions_gru1 = gru_model1.predict(X_test)

#evaluation gru
mae_gru = mean_absolute_error(y_test, predictions_gru1)
mse_gru = mean_squared_error(y_test, predictions_gru1)
rmse_gru = np.sqrt(mse_gru)
print(f"GRU Model - MAE: {mae_gru}, MSE: {mse_gru}, RMSE: {rmse_gru}")

Epoch 1/100
79/79 - 1s - loss: 5319.2114 - val_loss: 1417.6221 - 1s/epoch - 18ms/step
Epoch 2/100
79/79 - 1s - loss: 1955.6138 - val_loss: 968.4203 - 689ms/epoch - 9ms/step
Epoch 3/100
79/79 - 1s - loss: 866.4975 - val_loss: 529.7051 - 705ms/epoch - 9ms/step
Epoch 4/100
79/79 - 1s - loss: 755.6846 - val_loss: 997.3108 - 760ms/epoch - 10ms/step
Epoch 5/100
79/79 - 1s - loss: 658.8499 - val_loss: 360.2706 - 690ms/epoch - 9ms/step
Epoch 6/100
79/79 - 1s - loss: 647.2573 - val_loss: 470.0004 - 662ms/epoch - 8ms/step
Epoch 7/100
79/79 - 1s - loss: 590.4156 - val_loss: 749.2847 - 656ms/epoch - 8ms/step
Epoch 8/100
79/79 - 1s - loss: 622.2477 - val_loss: 404.7096 - 637ms/epoch - 8ms/step
Epoch 9/100
79/79 - 1s - loss: 581.0413 - val_loss: 657.8297 - 686ms/epoch - 9ms/step
Epoch 10/100
79/79 - 1s - loss: 556.6829 - val_loss: 461.3833 - 660ms/epoch - 8ms/step
Epoch 11/100
79/79 - 1s - loss: 557.4713 - val_loss: 524.9714 - 1s/epoch - 14ms/step
Epoch 12/100
79/79 - 1s - loss: 546.3956 - val_loss: