In [9]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, TimeDistributed, RepeatVector
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [10]:
# Import data
def load_data() -> pd.DataFrame:

  df = pd.read_excel('d1.xlsx')
  df.columns = ['Date', 'NOx']
  df = df.dropna(subset=['NOx'])

  return df

In [11]:
# MinMaxScaler 
# For speeding up the model fitting and improving the accuracy
def minmaxscaler(data: pd.DataFrame) -> pd.DataFrame:

  nox = data.NOx.values
  nox = nox.reshape(len(nox), 1)
  nox = scaler.fit_transform(nox)
  nox = nox.reshape(len(nox),)
  data['NOx'] = nox

  return data

In [12]:
# Build data set
def build_dataset(data):

  data = data.drop(["Date"], axis=1)
  X, Y = [], []
  for i in range(data.shape[0]-n_in-n_out+1):
    X.append(np.array(data.iloc[i:i+n_in]))
    Y.append(np.array(data.iloc[i+n_in:i+n_in+n_out]))

  return np.array(X), np.array(Y)

In [13]:
# Split the train and validation datasets
def split_data(x, y):
  
  x_train = x[:-n_val-n_out+1]
  x_val = x[-n_val:]
  y_train = y[:-n_val-n_out+1]
  y_val = y[-n_val:]

  return x_train, y_train, x_val, y_val

In [14]:
# Contruct the LSTM
def build_lstm():

  model = Sequential()
  model.add(LSTM(n_neuron, input_shape=(n_in, n_features)))
  model.add(Dropout(0.2))
  model.add(Dense(n_out))
  model.compile(optimizer=Adam(learning_rate=1e-4), loss='mae')

  return model

In [15]:
class RocAucMetricCallback(Callback):
    def __init__(self, predict_batch_size=1024):
        super(RocAucMetricCallback, self).__init__()
        self.predict_batch_size = predict_batch_size
 
    def on_batch_begin(self, batch, logs={}):
        pass
 
    def on_batch_end(self, batch, logs={}):
        pass
 
    def on_train_begin(self, logs={}):
        if not ('val_roc_auc' in self.params['metrics']):
            self.params['metrics'].append('val_roc_auc')
 
    def on_train_end(self, logs={}):
        pass
 
    def on_epoch_begin(self, epoch, logs={}):
        pass
 
    def on_epoch_end(self, epoch, logs={}):
        logs['roc_auc'] = float('-inf')
        if (self.validation_data):
            logs['roc_auc'] = roc_auc_score(self.validation_data[1], self.model.predict(self.validation_data[0], batch_size=self.predict_batch_size))
            print('ROC_AUC - epoch:%d - score:%.6f' % (epoch + 1, logs['roc_auc']))

In [16]:
# Train the model
def model_fit(x_train, y_train, x_val, y_val):
    
    model = build_lstm()
    
    my_callbacks = [
        RocAucMetricCallback(),
        EarlyStopping(monitor='roc_auc', patience=20, verbose=2, mode='max')
    ]
    history = model.fit(x_train, y_train, batch_size=batchsize, epochs=n_epochs, verbose=1, validation_data=(x_val, y_val))
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='validation')
    plt.legend()
    plt.show()
    
    return model

In [17]:
n_in = 672
n_out = 144
n_features = 1
n_val = 1

n_epochs = 300
batchsize = 128
n_neuron = 150

In [18]:
data = load_data()

#data = data[data.Date < '20151231']
#values = data.values
#plt.figure(figsize=(20,10))
#plt.plot(values[:, 0], values[:, 1])
#plt.title(data.columns[1], y=0.5, loc='right')
#plt.show()

In [19]:
scaler = MinMaxScaler(feature_range=(0, 1))
data = minmaxscaler(data)

In [20]:
data_copy = data.copy()
x, y = build_dataset(data_copy)
x_train, y_train, x_val, y_val = split_data(x, y)

In [21]:
model = build_lstm()
model = model_fit(x_train, y_train, x_val, y_val)

Epoch 1/300
 183/1592 [==>...........................] - ETA: 42:43 - loss: 0.0274

KeyboardInterrupt: ignored

In [None]:
# Validation & visulization
predict = model.predict(x_val)
validation = scaler.inverse_transform(predict)[0]
validation

In [None]:
y_val = y_val.reshape(1,n_out)
true = scaler.inverse_transform(y_val)[0]
true

In [None]:
x = [x for x in range(n_out)]
fig, ax = plt.subplots(figsize=(15,5), dpi = 300)
ax.plot(x, validation, linewidth=2.0, label = "predict")
ax.plot(x, true, linewidth=2.0, label = "true")
ax.legend(loc=2);
plt.grid(linestyle='-.')
plt.show()

In [None]:
MSE = mean_squared_error(true,validation)
RMSE = np.sqrt(MSE)
print('Test RMSE: %.3f' %RMSE)
MAE = mean_absolute_error(true,validation)
print('Test MAE: %.3f' %MAE)