BIDIRECTIONAL LSTM FOR 1 FEATURE DURATION 24 HOURS

In [None]:
num_feature = 1

Importing library

In [None]:
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential, load_model
from keras.layers import Bidirectional, LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error
import xgboost as xgb

Importing data

In [None]:
filename = '../data/07-09-2023.csv'
data = []
with open(filename, 'r') as file:
    csv_reader = csv.reader(file)
    for row in csv_reader:
        data.append(row)
        
df = pd.DataFrame(data)
df.columns = ["Record_ID","Time","Temperature","Disolved Oxygen","Salinity","pH","Turbidity","DHT Temperature","DHT Moisture","Longitude","Latitude"]
df['Record_ID'] = df["Record_ID"].astype(int)
df['Time'] = df["Time"].astype(str)
df['Temperature'] = df["Temperature"].astype(float)
df['Disolved Oxygen'] = df["Disolved Oxygen"].astype(float)
df['Salinity'] = df["Salinity"].astype(float)
df['pH'] = df["pH"].astype(float)
df['Turbidity'] = df["Turbidity"].astype(float)
df = df.drop(columns='Record_ID')
df = df.drop(columns='DHT Temperature')
df = df.drop(columns='DHT Moisture')
df = df.drop(columns='Longitude')
df = df.drop(columns='Latitude')
df['Time'] = pd.to_datetime(df['Time'],format="%d/%m/%Y %H:%M:%S")
df.set_index('Time', inplace=True)
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True)
df.describe()

In [None]:
temp = df['Temperature']
temp = temp[temp>=18]
temp = temp.resample('H').mean()

Splitting train and test data

In [None]:
train = temp[(temp.index.year<2022) | ((temp.index.year==2022)&(temp.index.month<9))]
test = temp[(temp.index.year>2022) | ((temp.index.year==2022)&(temp.index.month>=9))]

Observe data

In [None]:
plt.figure(figsize=(100,6))
plt.plot(train.index,train.to_numpy().ravel())
plt.legend()
plt.show()

In [None]:
# train.resample('D').agg(['mean', 'sum', 'std']).plot(subplots = True, title='Temperature resampled over day', color='red')
# plt.show()

In [None]:
# train.resample('M').mean().plot(kind='bar')
# plt.ylabel('Temperature')
# plt.title('Temperature per month (averaged over month)')
# plt.show()

Function Prepare training data

In [None]:
def split_train(array: np.array, duration: int):
    x = np.empty((0,duration))
    y = np.empty((0,1))
    for i in range(array.shape[0]-duration-1):
        new_row_x = array[i:i+duration].reshape(1, duration)
        new_row_y = array[i+duration].reshape(1, 1)
        if (np.any(np.isnan(new_row_x)) or np.any(np.isnan(new_row_y))): continue
        else:
            x = np.concatenate((x,new_row_x),axis=0)
            y = np.concatenate((y,new_row_y),axis=0)
    return x.reshape(x.shape[0],x.shape[1],num_feature), y.reshape(y.shape[0],1)

Bidirectional LSTM

Bidirect-LSTM_24h_1feature_64node

In [None]:
time_step = 24

In [None]:
x_train, y_train = split_train(train.to_numpy(),time_step)

Create and train model

In [None]:
# model_1 = Sequential()
# model_1.add(Bidirectional(LSTM(64,activation='relu', input_shape=(time_step,num_feature))))
# model_1.add(Dense(1))
# model_1.compile(optimizer='adam', loss='mse')
# model_1.build(input_shape=(1,time_step,num_feature))
# model_1.summary()
# model_1.fit(x_train, y_train, epochs=10)
# model_1.save('../model/Bidirect-LSTM_24h_1feature_64node.h5')

Load model

In [None]:
model_1 = load_model('../model/Bidirect-LSTM_24h_1feature_64node.h5')
model_1.build(input_shape=(1,time_step,num_feature))
model_1.summary()

Test model

In [None]:
x_test, y_test = split_train(test.to_numpy(),time_step)
y_predict = np.array(model_1.predict(x_test))
y_test = y_test.ravel()
y_predict = y_predict.ravel()
mse = mean_squared_error(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)

In [None]:
time = np.arange(len(y_predict))
plt.figure(figsize=(100,6))
plt.plot(time, y_test, label="Real data", color='blue')
plt.plot(time, y_predict, label="Predict",color='red')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.legend()
plt.show()

Bidirect-LSTM_48h_1feature_64node

In [None]:
time_step = 24

In [None]:
x_train, y_train = split_train(train.to_numpy(),time_step)

Create and train model

In [None]:
# model_2 = Sequential()
# model_2.add(Bidirectional(LSTM(64,activation='relu', input_shape=(time_step,num_feature))))
# model_2.add(Dense(1))
# model_2.compile(optimizer='adam', loss='mse')
# model_2.build(input_shape=(1,time_step,num_feature))
# model_2.summary()
# model_2.fit(x_train, y_train, epochs=10)
# model_2.save('../model/Bidirect-LSTM_48h_1feature_64node.h5')

Load model

In [None]:
model_2 = load_model('../model/Bidirect-LSTM_48h_1feature_64node.h5')
model_2.build(input_shape=(1,time_step,num_feature))
model_2.summary()

Test model

In [None]:
x_test, y_test = split_train(test.to_numpy(),time_step)
y_predict = np.array(model_2.predict(x_test))
y_test = y_test.ravel()
y_predict = y_predict.ravel()
mse = mean_squared_error(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)

In [None]:
time = np.arange(len(y_predict))
plt.figure(figsize=(100,6))
plt.plot(time, y_test, label="Real data", color='blue')
plt.plot(time, y_predict, label="Predict",color='red')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.legend()
plt.show()

Bidirect-LSTM_48h_1feature_128node

In [None]:
time_step = 24

In [None]:
x_train, y_train = split_train(train.to_numpy(),time_step)

Create and train model

In [None]:
# model_3 = Sequential()
# model_3.add(Bidirectional(LSTM(128,activation='relu', input_shape=(time_step,num_feature))))
# model_3.add(Dense(1))
# model_3.compile(optimizer='adam', loss='mse')
# model_3.build(input_shape=(1,time_step,num_feature))
# model_3.summary()
# model_3.fit(x_train, y_train, epochs=10)
# model_3.save('../model/Bidirect-LSTM_24h_1feature_128node.h5')

Load model

In [None]:
model_3 = load_model('../model/Bidirect-LSTM_24h_1feature_128node.h5')
model_3.build(input_shape=(1,time_step,num_feature))
model_3.summary()

Test model

In [None]:
x_test, y_test = split_train(test.to_numpy(),time_step)
y_predict = np.array(model_3.predict(x_test))
y_test = y_test.ravel()
y_predict = y_predict.ravel()
mse = mean_squared_error(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)

In [None]:
time = np.arange(len(y_predict))
plt.figure(figsize=(100,6))
plt.plot(time, y_test, label="Real data", color='blue')
plt.plot(time, y_predict, label="Predict",color='red')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.legend()
plt.show()

XGBoost

XGBoost 24 hours 24 max depth

In [None]:
time_step = 24

In [None]:
x_train, y_train = split_train(train.to_numpy(),time_step)
x_test, y_test = split_train(test.to_numpy(),time_step)

In [None]:
x_train = x_train.reshape(-1,time_step)
y_train = y_train.reshape(-1)
x_test = x_test.reshape(-1,time_step)
y_test = y_test.reshape(-1)
x_train = pd.DataFrame(x_train)
x_test = pd.DataFrame(x_test)

In [None]:
params = {
    'objective': 'reg:squarederror',
    'n_estimators': 100,
    'max_depth': 24,
    'learning_rate': 0.1,
    'subsample': 0.8
}
model_4 = xgb.XGBRegressor(**params)
model_4.fit(x_train,y_train)
y_predict = model_4.predict(x_test)
y_predict  = y_predict.ravel()
mse = mean_squared_error(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
time = np.arange(len(y_predict))
plt.figure(figsize=(100,6))
plt.plot(time, y_test, label="Real data", color='blue')
plt.plot(time, y_predict, label="Predict",color='red')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.legend()
plt.show()

XGBoost 48 hours 24 max depth

In [None]:
time_step = 48

In [None]:
x_train, y_train = split_train(train.to_numpy(),time_step)
x_test, y_test = split_train(test.to_numpy(),time_step)

In [None]:
x_train = x_train.reshape(-1,time_step)
y_train = y_train.reshape(-1)
x_test = x_test.reshape(-1,time_step)
y_test = y_test.reshape(-1)
x_train = pd.DataFrame(x_train)
x_test = pd.DataFrame(x_test)

In [None]:
params = {
    'objective': 'reg:squarederror',
    'n_estimators': 100,
    'max_depth': 24,
    'learning_rate': 0.1,
    'subsample': 0.8
}
model_4 = xgb.XGBRegressor(**params)
model_4.fit(x_train,y_train)
y_predict = model_4.predict(x_test)
y_predict  = y_predict.ravel()
mse = mean_squared_error(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
time = np.arange(len(y_predict))
plt.figure(figsize=(100,6))
plt.plot(time, y_test, label="Real data", color='blue')
plt.plot(time, y_predict, label="Predict",color='red')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.legend()
plt.show()

XGBoost 24 hours 48 max depth

In [None]:
time_step = 24

In [None]:
x_train, y_train = split_train(train.to_numpy(),time_step)
x_test, y_test = split_train(test.to_numpy(),time_step)

In [None]:
x_train = x_train.reshape(-1,time_step)
y_train = y_train.reshape(-1)
x_test = x_test.reshape(-1,time_step)
y_test = y_test.reshape(-1)
x_train = pd.DataFrame(x_train)
x_test = pd.DataFrame(x_test)

In [None]:
params = {
    'objective': 'reg:squarederror',
    'n_estimators': 100,
    'max_depth': 48,
    'learning_rate': 0.1,
    'subsample': 0.8
}
model_4 = xgb.XGBRegressor(**params)
model_4.fit(x_train,y_train)
y_predict = model_4.predict(x_test)
y_predict  = y_predict.ravel()
mse = mean_squared_error(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
time = np.arange(len(y_predict))
plt.figure(figsize=(100,6))
plt.plot(time, y_test, label="Real data", color='blue')
plt.plot(time, y_predict, label="Predict",color='red')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.legend()
plt.show()