In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot
from numpy import concatenate
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from tensorflow.python.keras.layers import Dense, LSTM , Dropout
from tensorflow.python.keras import Sequential
from math import sqrt;

**DATASET PREPROCESSING**




Dataset Loading and dropping the rows with NA values.Plotting the graph for each columns. Further preprocessing includes:




1. Feature Selection
2. Label Encoding
3. Data Normalization
4. Converting to Time Series data

In [None]:
#dataset preprocessing
#datset loading dropping na values

dataset= pd.read_csv('../input/beijing-pm25-data-data-set/PRSA_data_2010.1.1-2014.12.31.csv');
dataset=dataset.dropna();

# **1. Feature Selection**
There are 8 features important for the forecast: PM2.5, dew point, temperature, pressure, wind direction, wind speed and the cumulative number of hours of snow and rain. Hence, dropping other features.

In [None]:
dataset=dataset.drop('No',axis=1);
dataset=dataset.drop('year',axis=1);
dataset=dataset.drop('month',axis=1);
dataset=dataset.drop('day',axis=1);
dataset=dataset.drop('hour',axis=1);
dataset.head();

values=dataset.values;

In [None]:
# specify columns to plot
groups = [0, 1, 2, 3, 5, 6, 7]
i = 1
# plotting each column
pyplot.figure()
for group in groups:
	pyplot.subplot(len(groups), 1, i)
	pyplot.plot(values[:, group])
	pyplot.title(dataset.columns[group], y=0.5, loc='right')
	i += 1
pyplot.show()

# **2.** **Label** **Encoding**
The Wind direction doesn’t contain numerical values so label encoding is done.


In [None]:
# integer encode direction
encoder = LabelEncoder()
values[:,4] = encoder.fit_transform(values[:,4])
# ensure all data is float
values = values.astype('float32')

# **3. Data Normalization**
Data Normalization is done using MinMaxScaler function of sklearn.

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

# **4. Converting to Time Series Data**
Since we use the LSTM neural network, we must sort the data according to the time. The dataset is transformed into a supervised learning problem. The weather variables for the hour to be predicted (t) are then removed. So, we have features for previous timestep (t-1) and for prediction of pollution PM2.5 taking its current timestep (t) data.

In [None]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [None]:
# frame as supervised learning
reframed = series_to_supervised(scaled, 1, 1)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[9,10,11,12,13,14,15]], axis=1, inplace=True)
print(reframed.head())

# Splitting into train and test sets
We will only fit the model on the first 2 years(365* 24 * 2 hours) of data, then evaluate it on the remaining 3 years of data. 

In [None]:
# split into train and test sets
values = reframed.values
n_train_hours = 365 * 24*2
train = values[:n_train_hours, :]
test = values[n_train_hours:, :]
# split into input and outputs
train_X, train_y = train[:, :8], train[:, 8]
test_X, test_y = test[:, :8], test[:, 8]

# reshaping input to be 3D [samples, timesteps, features]


In [None]:
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

# **Designing Network**
We will define the LSTM with 100 neurons in the first hidden layer and a Dropout Layer of 0.3,Next there will be another hidden layer of 50 neurons and a Dropout of 0.2. Similarly, there will two more hidden layers with respective 0.2 Dropouts and 50 neurons. In Final Layer, 1 neuron in the output layer for predicting pollution. In the activation Function we used linear function, because of sequential dataset. In the batch size we used three days (24*3 Hours) data. Optimizer function we used Adam method. Loss function we used mean squared error. By monitoring the value of test data loss function, stop the training model when it is not decreasing, and save the current best model.

In [None]:
# design network
model = Sequential()
model.add(LSTM(100, return_sequences = True, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dropout(0.3))

model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(units = 50))
model.add(Dropout(0.2))

model.add(Dense(1,activation='linear'))

model.compile(loss='mse', optimizer='adam')

In [None]:
model.summary()

# **Fitting the Network**
Network is fit with epochs size of 50 , batch size of 72.

In [None]:
# fit network
history = model.fit(train_X, train_y, epochs=50, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)

# **Plotting the graph of Train Loss and Test Loss**

In [None]:
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

# **Making Prediction/Forecasting**
Forecasting the results and invert the scaling of the prediction and test data to check.

In [None]:
# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))

# invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]

# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

# **Calculating the RMSE and MAE values**

In [None]:
# calculate RMSE and MAE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)
mae = (mean_absolute_error(inv_y, inv_yhat))
print('Test MAE: %.3f' % mae)

# **Plotting the Graph of Actual vs Predicted**

In [None]:
print('Actual :', inv_y)
print('Predicted:', inv_yhat)
# plot history
pyplot.plot(inv_y, label='Actual')
pyplot.plot(inv_yhat, label='Predicted')
pyplot.legend()
pyplot.show()