BIDIRECTIONAL LSTM FOR 2 FEATURE DURATION 24 HOURS

Defining feature and duration

In [None]:
num_feature = 13
time_step = 24

Importing library

In [None]:
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential, load_model
from keras.layers import Bidirectional, LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OneHotEncoder

Importing data

In [3]:
filename = './data/07-09-2023.csv'
data = []
with open(filename, 'r') as file:
    csv_reader = csv.reader(file)
    for row in csv_reader:
        data.append(row)
        
df = pd.DataFrame(data)
df.columns = ["Record_ID","Time","Timestamp","Temperature","Disolved Oxygen","Salinity","pH","Turbidity"]
df['Record_ID'] = df["Record_ID"].astype(int)
df['Time'] = df["Time"].astype(str)
df['Temperature'] = df["Temperature"].astype(float)
df['Disolved Oxygen'] = df["Disolved Oxygen"].astype(float)
df['Salinity'] = df["Salinity"].astype(float)
df['pH'] = df["pH"].astype(float)
df['Turbidity'] = df["Turbidity"].astype(float)
df = df.drop(columns='Timestamp')
df = df.drop(columns='Record_ID')
df['Time'] = pd.to_datetime(df['Time'],format="%d/%m/%Y %H:%M:%S")
df.set_index('Time', inplace=True)
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True)
df = df.dropna()

TEMPERATURE SECTION

In [4]:
temp = df['Temperature']
temp = temp[temp>=18]
temp = temp.resample('H').mean()

Splitting train and test data

In [5]:
train = temp[(temp.index.year<2022) | ((temp.index.year==2022)&(temp.index.month<9))]
test = temp[(temp.index.year>2022) | ((temp.index.year==2022)&(temp.index.month>=9))]

In [6]:
month = np.array(train.index.month).reshape(-1,1)
encoder = OneHotEncoder()
month = encoder.fit(month).transform(month).toarray()
train = np.column_stack((train.to_numpy(), month)).reshape(-1,1,13)

month = np.array(test.index.month).reshape(-1,1)
month = encoder.fit(month).transform(month).toarray()
test = np.column_stack((test.to_numpy(), month)).reshape(-1,1,13)

In [7]:
def split_train(array: np.array, duration: int):
    x = np.empty((0,duration,num_feature))
    y = np.empty((0,1))
    for i in range(array.shape[0]-duration-1):
        new_row_x = array[i:i+duration,0,:].reshape(1, duration,num_feature)
        new_row_y = array[i+duration,0,0].reshape(1, 1)
        if (np.any(np.isnan(new_row_x)) or np.any(np.isnan(new_row_y))): continue
        else:
            x = np.concatenate((x,new_row_x),axis=0)
            y = np.concatenate((y,new_row_y),axis=0)
    return x.reshape(x.shape[0],x.shape[1],num_feature), y.reshape(y.shape[0],1)

Create Model

In [9]:
model = Sequential()
model.add(Bidirectional(LSTM(128,activation='relu', input_shape=(time_step,num_feature))))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.build(input_shape=(1,time_step,num_feature))
model.summary()
mse = 1
while mse>0.17:
    x_train, y_train = split_train(train,time_step)
    model.fit(x_train, y_train, epochs=10)
    model.save('./model/Bidirect-LSTM_24h_2feature.h5')
    x_test, y_test = split_train(test,time_step)
    y_predict = np.array(model.predict(x_test))
    y_test = y_test.ravel()
    y_predict = y_predict.ravel()
    mse = mean_squared_error(y_test, y_predict)
    mae = mean_absolute_error(y_test, y_predict)
    print("Mean Squared Error (MSE):", mse)
    print("Mean Absolute Error (MAE):", mae)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_1 (Bidirecti  (1, 256)                  145408    
 onal)                                                           
                                                                 
 dense_1 (Dense)             (1, 1)                    257       
                                                                 
Total params: 145665 (569.00 KB)
Trainable params: 145665 (569.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


Mean Squared Error (MSE): 0.2916567382689566
Mean Absolute Error (MAE): 0.4022655901467619
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


Mean Squared Error (MSE): 0.18612799641708036
Mean Absolute Error (MAE): 0.2632943178723545
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


Mean Squared Error (MSE): 0.18236685836969538
Mean Absolute Error (MAE): 0.2944413743986883
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


Mean Squared Error (MSE): 0.1664684129569363
Mean Absolute Error (MAE): 0.2591720069291833


Load Model

In [None]:
# model = load_model('./model/Bidirect-LSTM_24h_2feature.h5')
# model.build(input_shape=(1,time_step,num_feature))
# model.summary()

Predicting 1 hour ahead of 24 hour data point

In [None]:
time = np.arange(len(y_predict))
plt.figure(figsize=(100,6))
plt.plot(time, y_test, label="Real data", color='blue')
plt.plot(time, y_predict, label="Predict",color='red')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.legend()
plt.show()

Predicting 3 day ahead of 24 hour data point

In [None]:
def period_predict(array: np.array, hours_predict:int, time: np.array):
    y_hat = np.empty((array.shape[0],0))
    for i in range(hours_predict):
        y_hat = np.column_stack((y_hat,np.array(model.predict(array[:,i:,:])).reshape(-1,1)))
        time = time + np.timedelta64(1,'h')
        months = np.array([d.astype('datetime64[M]').astype(int) % 12 + 1 for d in time]).reshape(-1,1)
        months = encoder.fit(months).transform(months).toarray()
        data = np.copy(y_hat[:,-1]).reshape(-1,1)
        data = np.column_stack((data,months)).reshape(-1,1,num_feature)
        array = np.column_stack((array,data))
    return y_hat.reshape(y_hat.shape[0],y_hat.shape[1],1)

def split_test(array: np.array, duration: int, hours_predict:int, time_index: np.array):
    x = np.empty((0,duration,num_feature))
    y = np.empty((0,hours_predict))
    time = np.empty((0,1), dtype='datetime64')
    for i in range(array.shape[0]-duration-hours_predict-1):
        new_row_x = array[i:i+duration].reshape(1, duration, num_feature)
        new_row_y = array[i+duration:i+duration+hours_predict,0,0].reshape(1, hours_predict)
        if (np.any(np.isnan(new_row_x)) or np.any(np.isnan(new_row_y))): continue
        else:
            x = np.concatenate((x,new_row_x),axis=0)
            y = np.concatenate((y,new_row_y),axis=0)
            time = np.row_stack((time, [time_index[i]]))
    return x.reshape(x.shape[0],x.shape[1],num_feature), y.reshape(y.shape[0],y.shape[1],1),time

In [None]:
test = temp[(temp.index.year>2022) | ((temp.index.year==2022)&(temp.index.month>=9))]
time = np.array(test.index, dtype='datetime64')
month = np.array(test.index.month).reshape(-1,1)
month = encoder.fit(month).transform(month).toarray()
test = np.column_stack((test.to_numpy(), month)).reshape(-1,1,13)
time = time + np.timedelta64(23,'h')

In [None]:
new_x_test, new_y_test, new_time = split_test(test,24,72,time)
new_y_predict = period_predict(new_x_test,72,new_time)

In [None]:
for i in range(int(72/6)):
    frac_y_predict = new_y_predict[:,6*i:6*(i+1)]
    frac_y_test = new_y_test[:,6*i:6*(i+1)]
    frac_y_predict = frac_y_predict.ravel()
    frac_y_test = frac_y_test.ravel()
    print("Errors from " + str(6*i) + "h to " + str(6*(i+1))+" h:")
    new_mse = mean_squared_error(frac_y_test, frac_y_predict)
    new_mae = mean_absolute_error(frac_y_test, frac_y_predict)
    print("Mean Squared Error (MSE):", new_mse)
    print("Mean Absolute Error (MAE):", new_mae)

new_y_test = new_y_test.ravel()
new_y_predict = new_y_predict.ravel()
new_mse = mean_squared_error(new_y_test, new_y_predict)
new_mae = mean_absolute_error(new_y_test, new_y_predict)
print("Errors in 72 hours")
print("Mean Squared Error (MSE):", new_mse)
print("Mean Absolute Error (MAE):", new_mae)

In [None]:
diff = np.abs(y_predict - y_test)
print(max(diff))