<a href="https://colab.research.google.com/github/Ditsuhi/Nitrogen_Dioxide_Prediction/blob/main/Comparing_BiConvLSTM_ConvLSTM_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import all required libraries

import zipfile
from glob import glob
import re
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from keras import layers
from keras.regularizers import l2
from keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from scipy.interpolate import NearestNDInterpolator
from keras.models import Sequential
from keras.layers import ConvLSTM2D, BatchNormalization
from keras.layers import Bidirectional
from keras.layers import  Conv2D 
from time import time


In [None]:
datafr_2019 = pd.read_csv('/content/Madrid_FIN_WDUpdated_2019.csv', index_col='Unnamed: 0')
datafr_2020 = pd.read_csv('/content/Madrid_FIN_WDUpdated_2020.csv', index_col='Unnamed: 0')

In [None]:
datafr_new_2019=datafr_2019[['NO2', 'intensidad', 'ocupacion', 'windSpeed', 'Pressure', 'SolarRad',
       ' Temp', ' Humidity', 'windDir_Categ_east', 'windDir_Categ_north', 'windDir_Categ_northeast',
       'windDir_Categ_northwest', 'windDir_Categ_south',  'windDir_Categ_southeast', 'windDir_Categ_southwest',
       'windDir_Categ_west'
       ]]
     

datafr_new_2020=datafr_2020[['NO2', 'intensidad', 'ocupacion', 'windSpeed', 'Pressure', 'SolarRad',
       'Temp', 'Humidity', 'windDir_Categ_east', 'windDir_Categ_north', 'windDir_Categ_northeast',
       'windDir_Categ_northwest', 'windDir_Categ_south',  'windDir_Categ_southeast', 'windDir_Categ_southwest',
       'windDir_Categ_west'
       ]]

In [None]:
data_np_2019 = np.asarray(datafr_new_2019)
round_data_2019 = data_np_2019.reshape(-1, 340, 16)
data_np_2020 = np.asarray(datafr_new_2020)
round_data_2020 = data_np_2020.reshape(-1, 340,  16)
mut_data_2019= round_data_2019[:, :, :]
mut_data_2020= round_data_2020[:, :,  :]
mut_data_2020_Val = mut_data_2020[0:2184, :, :]
mut_data_2020_test = mut_data_2020[2184::, :, :]

In [None]:
# split dataset to X and y (dependent and independent)

def split_sequence(sequence, time_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
   
		# find the end of this pattern
		end_ix = i + 6
    
		# check if we are beyond the sequence
		if end_ix+time_steps > len(sequence)-1:
			break
		# gather input and output parts of the pattern    
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix: end_ix+time_steps]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)
 

# define input sequence
raw_seq_2019 = mut_data_2019
raw_seq_2020_val= mut_data_2020_Val
raw_seq_2020_test= mut_data_2020_test
# choose a number of time steps (there are two case of time lags: 6-hour and 6-hour)
time_steps = 6
X_train_notNorm, y_train = split_sequence(raw_seq_2019, time_steps)
X_val_notNorm, y_val = split_sequence(raw_seq_2020_val, time_steps)
X_test_notNorm, y_test = split_sequence(raw_seq_2020_test, time_steps)

#X_train_notNorm, X_test_notNorm, y_train_notNorm, y_test_notNorm = train_test_split(X, y, test_size=0.2, shuffle = False)

In [None]:
# to normalise train data using MinMaxScaler
number_selected_columns =16

scaler = MinMaxScaler(feature_range=(0, 1), copy = False)
X_train_Normalised = X_train_notNorm.reshape(-1, number_selected_columns)
X_val_Normalised = X_val_notNorm.reshape(-1, number_selected_columns)
X_test_Normalised = X_test_notNorm.reshape(-1, number_selected_columns)

X_train_scaled = scaler.fit_transform(X_train_Normalised)
X_val_scaled = scaler.transform(X_val_Normalised)
X_test_scaled = scaler.transform(X_test_Normalised)

X_train = X_train_scaled.reshape(X_train_notNorm.shape[0], X_train_notNorm.shape[1], X_train_notNorm.shape[2], X_train_notNorm.shape[3])
X_val = X_val_scaled.reshape(X_val_notNorm.shape[0], X_val_notNorm.shape[1], X_val_notNorm.shape[2], X_val_notNorm.shape[3])
X_test = X_test_scaled.reshape(X_test_notNorm.shape[0], X_test_notNorm.shape[1], X_test_notNorm.shape[2], X_test_notNorm.shape[3])

In [None]:
#convLSTM
X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 20, 17*number_selected_columns, 1))
y_train_reshaped = y_train.reshape((y_train.shape[0], y_train.shape[1], 20, 17*number_selected_columns, 1))
X_val_reshaped = X_val.reshape((X_val.shape[0], X_val.shape[1], 20, 17*number_selected_columns, 1))
y_val_reshaped = y_val.reshape(y_val.shape[0], y_val.shape[1], 20, 17*number_selected_columns, 1)
X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 20, 17*number_selected_columns, 1))
y_test_reshaped = y_test.reshape(y_test.shape[0], y_test.shape[1], 20, 17*number_selected_columns, 1)

In [None]:
# define the architecture of the proposed model
from keras.models import Sequential
from keras.layers import ConvLSTM2D, Dropout
opt = keras.optimizers.Adam(learning_rate=0.01)

def create_model(number_selected_columns=7, optimizer=opt, kernel_size=(3, 3), filters=16, merge_mode="concat", dropout_rate=0.2):
    
    model = Sequential()    
    model.add(Bidirectional(ConvLSTM2D(input_shape=(None, 20, 17*number_selected_columns, 1),  filters=filters,  kernel_size=kernel_size, padding="same", return_sequences=True), merge_mode=merge_mode))
    
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))   
    model.add(Bidirectional(ConvLSTM2D(filters=filters, kernel_size=kernel_size, padding="same", return_sequences=True), merge_mode=merge_mode))    
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate)) 
    model.add(Bidirectional(ConvLSTM2D(filters=filters,  kernel_size=kernel_size, padding="same", return_sequences=True), merge_mode=merge_mode))     
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))            
    model.add(Conv2D(filters=1, kernel_size=(1, 1), 
                activation='elu',
                padding='same', data_format='channels_last'))
    model.compile(optimizer=optimizer, loss='mse')
    model.build(input_shape=(None,6,  20, 17*number_selected_columns, 1))    
    print(model.summary())
    return model

In [None]:
mod = create_model()

start = time()
#code here

early_stopping = EarlyStopping(monitor="val_loss", patience=5, verbose=2)
mymodel = mod.fit(X_train_reshaped, y_train_reshaped, epochs=50, verbose=2, validation_data=(X_val_reshaped, y_val_reshaped),  callbacks=[early_stopping])

print(f'Time taken to run: {time() - start} seconds')

In [None]:
yhat = mod.predict(X_test_reshaped, verbose=1)
yhat_reshaped = yhat.reshape(-1,20*17*16)
y_test_reshaped=  y_test_reshaped.reshape(-1,20*17*16)
rsme = mean_squared_error(yhat_reshaped, y_test_reshaped, squared=False)
mae = mean_absolute_error(yhat_reshaped, y_test_reshaped)
print('Test Score: %.2f RMSE' % (rmse))
print('Test Score: %.2f MAE' % (mae))


In [None]:
from keras.models import Sequential
from keras.layers import ConvLSTM2D, Dropout, BatchNormalization
from  keras.regularizers import l2
from keras.layers.convolutional import Conv3D, Conv2D


def create_model_conv(number_selected_columns=16, optimizer='adam', kernel_size=(5, 5), filters=32, dropout_rate=0.2, init_mode="glorot_uniform"):
    
    model = Sequential()
    
    model.add(ConvLSTM2D(input_shape=(None,  20, 17*number_selected_columns, 1),  filters=filters, kernel_initializer=init_mode,  kernel_size=kernel_size, padding="same", return_sequences=True, kernel_regularizer=l2(0.01)))
    model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())    
    model.add(ConvLSTM2D(filters=filters,  kernel_initializer=init_mode,  kernel_size=kernel_size, padding="same", return_sequences=True))
    model.add(Dropout(dropout_rate))   
    model.add(BatchNormalization())
    model.add(ConvLSTM2D(filters=filters, kernel_initializer=init_mode,  kernel_size=kernel_size, padding="same", return_sequences=True))
    #model.add(Dropout(dropout_rate))  
    #model.add(BatchNormalization())
    #model.add(ConvLSTM2D(filters=filters,  kernel_initializer=init_mode,  kernel_size=kernel_size, padding="same", return_sequences=True))
    #model.add(Dropout(dropout_rate))   
    #model.add(BatchNormalization())
    #model.add(ConvLSTM2D(filters=filters, kernel_initializer=init_mode,  kernel_size=kernel_size, padding="same", return_sequences=True))
    model.add(Dropout(dropout_rate))    
    model.add(BatchNormalization())
    model.add(Conv2D(filters=1, kernel_size=(1, 1),
                   activation='relu',
                   padding='same', data_format='channels_last'))         
   #model.add(ConvLSTM2D(filters=1, kernel_initializer=init_mode, kernel_size=(1, 1), activation='relu'))
    model.compile(optimizer=optimizer, loss='mse')
   
    
    print(model.summary())

    return model

In [None]:
modConv = create_model_conv()
start = time()
#code here

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, restore_best_weights=True)
my_model = modConv.fit(X_train_reshaped, y_train_reshaped,  epochs=50, verbose=2, validation_data=(X_val_reshaped, y_val_reshaped),  callbacks=[es])

print(f'Time taken to run: {time() - start} seconds')

In [None]:
yhat = modConv.predict(X_test_reshaped, verbose=1)
yhat_reshaped = yhat.reshape(-1,20*17*16)
y_test_reshaped=  y_test_reshaped.reshape(-1,20*17*16)
rsme = mean_squared_error(yhat_reshaped, y_test_reshaped, squared=False)
mae = mean_absolute_error(yhat_reshaped, y_test_reshaped)
print('Test Score: %.2f RMSE' % (rmse))
print('Test Score: %.2f MAE' % (mae))

In [None]:
#lstm

X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 340*number_selected_columns))
y_train_reshaped = y_train.reshape((y_train.shape[0], y_train.shape[1], 340*number_selected_columns))
X_val_reshaped = X_val.reshape((X_val.shape[0], X_val.shape[1], 340*number_selected_columns))
y_val_reshaped = y_val.reshape(y_val.shape[0], y_val.shape[1], 340*number_selected_columns)
X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 340*number_selected_columns))
y_test_reshaped = y_test.reshape(y_test.shape[0], y_test.shape[1], 340*number_selected_columns)

In [None]:
# define model
from keras.layers import LSTM
from keras.layers import Dense,  Dropout
from keras.models import Sequential

model = Sequential()

model.add(Dense(128,input_shape=(X_train.shape[1], 20*17*number_selected_columns) ))
model.add(LSTM(2048, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(2048, return_sequences=True))
model.add(Dropout(0.2))

#model.add(layers.Flatten())
model.add(Dense(340*number_selected_columns))
model.compile(optimizer='adam', loss='mse')

In [None]:
start = time()
#code here

early_stopping = EarlyStopping(monitor="val_loss", patience=5, verbose=2)
mymodel = model.fit(X_train_reshaped, y_train_reshaped, epochs=50, verbose=2, validation_data=(X_val_reshaped, y_val_reshaped),  callbacks=[early_stopping])

print(f'Time taken to run: {time() - start} seconds')

In [None]:
yhat = model.predict(X_test_reshaped, verbose=1)
yhat_reshaped = yhat.reshape(-1,20*17*16)
y_test_reshaped=  y_test_reshaped.reshape(-1,20*17*16)
rsme = mean_squared_error(yhat_reshaped, y_test_reshaped, squared=False)
mae = mean_absolute_error(yhat_reshaped, y_test_reshaped)
print('Test Score: %.2f RMSE' % (rmse))
print('Test Score: %.2f MAE' % (mae))