# Data preprocessing

In [1]:
import csv
import numpy as np
from sklearn.preprocessing import MinMaxScaler

data = []
scaler = MinMaxScaler()

def create_dataset(dataset, look_back=12):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        aux = dataset[i:(i+look_back)]
        dataX.append(aux)
        dataY.append(dataset[i + look_back])
    return np.array(dataX), np.array(dataY)

with open('./datasets/traffic-prediction-dataset.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    next(csv_reader)
    for row in csv_reader:
        row_to_append = [float(x) for x in row[0:4]]
        data.append(row_to_append)

scaler.fit(data)
normalized_data = scaler.transform(data)
                
normalized_data




array([[0.29787234, 0.1589404 , 0.13761468, 0.19871795],
       [0.2751773 , 0.13576159, 0.14678899, 0.17628205],
       [0.21560284, 0.15562914, 0.20183486, 0.18589744],
       ...,
       [0.23546099, 0.11258278, 0.1559633 , 0.19551282],
       [0.25248227, 0.12913907, 0.1146789 , 0.15384615],
       [0.18723404, 0.1192053 , 0.11926606, 0.16025641]])

# Recurrent Neural Networks

## Train and Test Sets

In [2]:
train_size = int(len(normalized_data) * 0.75)
test_size = len(normalized_data) - train_size

train, test = normalized_data[0:train_size,:], normalized_data[train_size:len(normalized_data),:]
x_train, y_train = create_dataset(train)
x_test, y_test = create_dataset(test)

x_train.shape

(12083, 12, 4)

## Training

In [3]:
from keras import Input
from keras.models import Sequential 
from keras.layers import Dense, Dropout
from keras.layers import LSTM

model = Sequential()
model.add(Input(shape=(12, 4)))
model.add(LSTM(64, return_sequences=True, input_shape=(12, 4)))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(4))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
model.summary()

model.fit(x_train, y_train, epochs=50, batch_size=128)


2022-10-17 21:48:12.439359: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-17 21:48:12.604748: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-10-17 21:48:12.611327: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-10-17 21:48:12.611343: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if yo

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 12, 64)            17664     
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 4)                 260       
                                                                 
Total params: 50,948
Trainable params: 50,948
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
E

<keras.callbacks.History at 0x7f069c3dba90>

## Test

In [13]:
y_pred = model.predict(x_test)
y_pred



array([[0.09067962, 0.03969803, 0.06927239, 0.06536179],
       [0.08943988, 0.03405983, 0.06021421, 0.05097923],
       [0.10608035, 0.0367779 , 0.06870245, 0.05145346],
       ...,
       [0.22340705, 0.11582818, 0.16798273, 0.18116175],
       [0.20923865, 0.12448627, 0.16822943, 0.16662633],
       [0.21829917, 0.12371694, 0.1596344 , 0.16696627]], dtype=float32)

In [15]:
from sklearn.metrics import mean_absolute_error


final_y_real = scaler.inverse_transform(y_test)
final_y_pred = scaler.inverse_transform(y_pred)
mean_absolute_error(final_y_real, final_y_pred)

10.939831443013253