# **Assignment task: Find the best neural network model for the bus delay prediction**

How can I make the NN better than the Regression model?
- change number of layers (check)
- change the width (check)
- implement drop-out to prevent over-fitting (check)
- change the batch size (check)
- change the training/test split
- change the input (feature engineering) (check)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.models import load_model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

filepath=""

#-------------------------------------------------data preprocessing------------------------------------------------------------------

# Define the URL of a CSV file containing data.
url = 'https://raw.githubusercontent.com/zhenliangma/Applied-AI-in-Transportation/master/Exercise_7_Neural_networks/Exercise7data.csv'

# Read the CSV data from the specified URL into a DataFrame (assuming you have the pandas library imported as 'pd').
df = pd.read_csv(url)

# Limit the DataFrame to the first 1000 rows (selecting a subset of the data).
df = df.iloc[:1000]

# Drop specific columns (Arrival_time, Stop_id, Bus_id, Line_id) from the DataFrame.
df = df.drop(['Arrival_time', 'Stop_id', 'Bus_id', 'Line_id'], axis=1)

# Extract the features (input variables) by dropping the 'Arrival_delay' column.
x = df.drop(['Arrival_delay'], axis=1)

# Extract the target variable ('Arrival_delay') as the variable to predict.
y = df['Arrival_delay']

# splite the train and test data
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

#-------------------------------------------------network construction------------------------------------------------------------------

#************************************************Here you can change the layer to construct your network(add the dense layers, dropout layer, etc.)***************************
# Create a Sequential model, which is a linear stack of layers.
layer_width = [16, 32, 64, 128]

for first_width in layer_width:
  for second_width in layer_width:
    model = Sequential()

# Add a Dense layer with 32 units, ReLU activation, and an input dimension of 4.
    model.add(Dense(first_width, activation='relu', input_dim=4))

# Add a Dropout layer with a dropout rate of 0.5.
    model.add(Dropout(0.2))

# Add another Dense layer with 64 units and ReLU activation.
    model.add(Dense(second_width, activation='relu'))

# Add a Dropout layer with a dropout rate of 0.5.
    model.add(Dropout(0.2))

# Add a final Dense layer with 1 unit (typically used for regression tasks).
    model.add(Dense(1))

# Compile the model with the Adam optimizer, Mean Absolute Error (MAE) loss function,
# and MAE metric to be used during training.
    model.compile(optimizer='adam', loss='mae', metrics=['mae'])
#************************************************Here you can change the layer to construct your network(add the layers, dropout layer, etc.)*****************

#-------------------------------------------------model train------------------------------------------------------------------
#************************************************Here you can use the callback function **************
# use the callback function to early stop, learning rate ajusting, save the best model
# Create an EarlyStopping callback to monitor the validation mean absolute error (val_mae).
# It will stop training if val_mae doesn't improve for 5 consecutive epochs and restores the best weights.
    early_stop = EarlyStopping(monitor='val_mae', patience=5, restore_best_weights=True)

# Create a ReduceLROnPlateau callback to monitor val_mae.
# It reduces the learning rate by a factor of 0.5 if val_mae doesn't improve for 3 consecutive epochs.
    reduce_lr = ReduceLROnPlateau(monitor='val_mae', factor=0.5, patience=3)

# Define the file path where the best model weights will be saved.
    filepath = "weights.best.h5.keras"
# Create a ModelCheckpoint callback to monitor the validation mae (val_mae).
# The callback will save the model's weights only if the validation mae improves.
    checkpoint = ModelCheckpoint(filepath, monitor='val_mae', verbose=0, save_best_only=True, mode='min')

# Train the model using the fit method.
    hist = model.fit(X_train, y_train, validation_split=0.2, epochs=200, batch_size=16, callbacks=[early_stop, reduce_lr,checkpoint],verbose=0)

#************************************************Here you can choose to use the callback function *************

# Set the style of the plots using seaborn.
    sns.set()

# Extract the training and validation Mean Absolute Error (MAE) from the training history.
    err = hist.history['mae']
    val_err = hist.history['val_mae']

# Define the number of epochs.
    epochs = range(1, len(err) + 1)

# Plot the Training MAE and Validation MAE over epochs.
    # plt.plot(epochs, err, '-', label='Training MAE')
    # plt.plot(epochs, val_err, ':', label='Validation MAE')
    # plt.title('Training and Validation MAE')
    # plt.xlabel('Epoch')
    # plt.ylabel('Mean Absolute Error')
    # plt.legend(loc='upper right')
    # plt.plot()

# Use the trained model to predict on the test data.
    y_pred = model.predict(X_test)

# Calculate Mean Absolute Error (MAE), Mean Squared Error (MSE), and R-squared (R2) for the predictions.
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics.
    print('----------'+'first layer: ' + str(first_width)+ ' second layer: ' + str(second_width) + '----------')
    print(f"Mean Absolute Error: {mae}")
    print(f"Mean Squared Error: {mse}")
    print(f"R-squared: {r2}")





  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
----------first layer: 16 second layer: 16----------
Mean Absolute Error: 33.91363559838384
Mean Squared Error: 2010.0310159823339
R-squared: 0.8296293616294861


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
----------first layer: 16 second layer: 32----------
Mean Absolute Error: 30.319218217507004
Mean Squared Error: 1621.945400845749
R-squared: 0.8625235557556152


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
----------first layer: 16 second layer: 64----------
Mean Absolute Error: 32.82623918442056
Mean Squared Error: 1826.5358593403434
R-squared: 0.845182478427887


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
----------first layer: 16 second layer: 128----------
Mean Absolute Error: 31.770772235393522
Mean Squared Error: 1759.3833747868855
R-squared: 0.8508743047714233


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
----------first layer: 32 second layer: 16----------
Mean Absolute Error: 30.518497517704965
Mean Squared Error: 1601.4002492125987
R-squared: 0.864264965057373


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
----------first layer: 32 second layer: 32----------
Mean Absolute Error: 30.133348811864852
Mean Squared Error: 1604.2317737816927
R-squared: 0.8640249967575073


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
----------first layer: 32 second layer: 64----------
Mean Absolute Error: 32.871589564085006
Mean Squared Error: 1869.826984476187
R-squared: 0.8415130972862244


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
----------first layer: 32 second layer: 128----------
Mean Absolute Error: 28.787088957410305
Mean Squared Error: 1445.5153447374864
R-squared: 0.8774778246879578


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
----------first layer: 64 second layer: 16----------
Mean Absolute Error: 30.34771065937355
Mean Squared Error: 1627.0916692915687
R-squared: 0.8620873689651489


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
----------first layer: 64 second layer: 32----------
Mean Absolute Error: 29.516938097029925
Mean Squared Error: 1521.0355184103237
R-squared: 0.8710767030715942


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
----------first layer: 64 second layer: 64----------
Mean Absolute Error: 29.12258728981018
Mean Squared Error: 1505.8872314555815
R-squared: 0.8723607063293457


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
----------first layer: 64 second layer: 128----------
Mean Absolute Error: 30.358121343702077
Mean Squared Error: 1626.632721947566
R-squared: 0.8621262907981873


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
----------first layer: 128 second layer: 16----------
Mean Absolute Error: 28.40508300989866
Mean Squared Error: 1462.4869243257351
R-squared: 0.8760393261909485


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
----------first layer: 128 second layer: 32----------
Mean Absolute Error: 30.044170664697887
Mean Squared Error: 1589.4186612998078
R-squared: 0.8652805685997009


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
----------first layer: 128 second layer: 64----------
Mean Absolute Error: 27.745652836114168
Mean Squared Error: 1403.253361433227
R-squared: 0.8810599446296692


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
----------first layer: 128 second layer: 128----------
Mean Absolute Error: 28.252569402977823
Mean Squared Error: 1415.7491891125005
R-squared: 0.8800008296966553
