In [None]:
!pip install skorch

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import skorch

In [None]:
os.system('wget https://archive.ics.uci.edu/ml/machine-learning-databases/00381/PRSA_data_2010.1.1-2014.12.31.csv')

# Time-Series Forecasting with Long-Short Term Memory Networks

##  Beijing PM2.5 Data Set 

Reference: Liang, X., Zou, T., Guo, B., Li, S., Zhang, H., Zhang, S., Huang, H. and Chen, S. X. (2015). [Assessing Beijing's PM2.5 pollution: severity, weather impact, APEC and winter heating](https://royalsocietypublishing.org/doi/10.1098/rspa.2015.0257). Proceedings of the Royal Society A, 471, 20150257.

This hourly data set contains the PM2.5 data of US Embassy in Beijing. Meanwhile, meteorological data from Beijing Capital International Airport are also included.

Dataset Description:
```
No: row number
year: year of data in this row
month: month of data in this row
day: day of data in this row
hour: hour of data in this row
pm2.5: PM2.5 concentration (ug/m^3)
DEWP: Dew Point (â„ƒ)
TEMP: Temperature (â„ƒ)
PRES: Pressure (hPa)
cbwd: Combined wind direction
Iws: Cumulated wind speed (m/s)
Is: Cumulated hours of snow
Ir: Cumulated hours of rain
``` 

In [None]:
df = pd.read_csv('PRSA_data_2010.1.1-2014.12.31.csv')
df

## Visualize time series data

In [None]:
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 15
fig_size[1] = 5
plt.rcParams["figure.figsize"] = fig_size
plt.title('pm2.5 vs Time')
plt.ylabel('pm2.5')
plt.xlabel('Time')
plt.grid(True)
plt.autoscale(axis='x',tight=True)
plt.plot(df['pm2.5'])

## Process dataset features

In [None]:
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
cbwd = df['cbwd'].values.reshape(-1, 1)
ohe = OneHotEncoder()
cbwd_ohe = ohe.fit_transform(cbwd).toarray()
print(cbwd_ohe.shape)
print(ohe.categories_)

In [None]:
pm = df['pm2.5'].values

to_drop = ['No', 'pm2.5', 'cbwd']
X_df = df.drop(labels=to_drop, axis=1)
X = X_df.values
X = np.concatenate([X, cbwd_ohe], axis=1)
print(X.shape, pm.shape)

## Create input/output sequences

In [None]:
import tqdm
def create_sequences(data, window):
    L = len(data)
    inputs = np.zeros((L-window, window))
    labels = np.zeros((L-window, 1))
    for i in tqdm.tqdm(range(L-window)):
        inputs[i] = data[i:i+window]
        labels[i] = data[i+window:i+window+1]   
    return inputs, labels

In [None]:
window = 5
pm_feat, y = create_sequences(pm, window=window)
print(pm_feat.shape, y.shape)

In [None]:
X = X[window:]
X = np.concatenate([X, pm_feat], axis=1)
print(X.shape, y.shape)

In [None]:
X = X[~np.isnan(y.flatten())]
y = y[~np.isnan(y)]

for i in range(X.shape[1]):
  y = y[~np.isnan(X[:, i])]
  X = X[~np.isnan(X[:, i])]

y = y.reshape((-1, 1))
print(X.shape, y.shape)

In [None]:
n_data_pts = X.shape[0]
train_idx = int(0.9 * n_data_pts)
X_train = X[:train_idx]
X_test = X[train_idx:]
y_train = y[:train_idx]
y_test = y[train_idx:]

In [None]:
X_scaler = MinMaxScaler(feature_range=(-1, 1))
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)
y_scaler = MinMaxScaler(feature_range=(-1, 1))
y_train = y_scaler.fit_transform(y_train)
y_test = y_scaler.transform(y_test)

In [None]:
X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
y_train = torch.tensor(y_train).float()
y_test = torch.tensor(y_test).float()

## Create LSTM model

In [None]:
class Net(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, x):
        lstm_out, self.hidden_cell = self.lstm(x.view(len(x), 1, -1), self.hidden_cell)
        out = self.linear(lstm_out.view(len(x), -1))
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))
        return out

## Train model

In [None]:
from skorch import NeuralNetRegressor
model = Net(input_size=X.shape[-1], hidden_layer_size=100, output_size=y.shape[-1])
regr = NeuralNetRegressor(model, batch_size=64, max_epochs=15, lr=1e-4)
regr.fit(X_train, y_train)

## Evaluate model

In [None]:
print("Train R2 score: {} Test R2 score: {}".format(regr.score(X_train, y_train), regr.score(X_test, y_test)))

In [None]:
from sklearn.metrics import r2_score, mean_absolute_error

train_preds = y_scaler.inverse_transform(regr.predict(X_train))
test_preds = y_scaler.inverse_transform(regr.predict(X_test))
train_targets = y_scaler.inverse_transform(y_train)
test_targets = y_scaler.inverse_transform(y_test)

train_r2 = r2_score(train_targets, train_preds)
test_r2 = r2_score(test_targets, test_preds)
train_mae = mean_absolute_error(train_targets, train_preds)
test_mae = mean_absolute_error(test_targets, test_preds)
print(train_r2, test_r2, train_mae, test_mae)