## Keras -- MLPs on Air Quality Index

In [1]:
# if you keras is not using tensorflow as backend set "KERAS_BACKEND=tensorflow" use this command
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras import utils
from tensorflow.keras.datasets import mnist 
import seaborn as sns
from tensorflow.keras.initializers import RandomNormal
from sklearn.metrics import mean_squared_error,accuracy_score,mean_absolute_error
from sklearn.preprocessing import StandardScaler,MinMaxScaler

In [2]:
# the data, shuffled and split between train and test sets 
#Read the Dataset
data=pd.read_csv('MultiVariateAQI.csv')
#Split the input and target sets
X=pd.DataFrame(data.iloc[:,:-1])
y=pd.DataFrame(data.iloc[:,-1])

In [3]:
#Preprocessing: Normalize the Data
scalerX = StandardScaler().fit(X)
scalery = StandardScaler().fit(y)
X_scale = scalerX.transform(X)
y_scale = scalery.transform(y)

In [4]:
# Split the Patterns into Train and Test set
from sklearn.model_selection import train_test_split
X_train, X_test,y_train, y_test= train_test_split(X_scale,y_scale,test_size=0.15,shuffle=True)

In [5]:
print("Number of training examples :", X_train.shape[0])
print("Number of training examples :", X_test.shape[0])

Number of training examples : 40963
Number of training examples : 7229


In [6]:
X_train = X_train
X_test = X_test
Y_train=y_train
Y_test=y_test

In [7]:
# An example data point
print(X_train[0])

[-0.42587862 -0.15177649 -0.39597468 -0.00556975 -0.01596386  0.12505375
 -0.40397641  0.32289297 -0.92855424 -0.2091982  -0.6533397  -0.31905745]


In [8]:
# Import model and other libraries
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Activation 


In [9]:
# some model parameters

output_dim = 1
input_dim = X_train.shape[1]

batch_size = 128 
nb_epoch = 20

In [10]:
# start building a model
model = Sequential()

# The model needs to know what input shape it should expect. 
# For this reason, the first layer in a Sequential model 
# (and only the first, because following layers can do automatic shape inference)
# needs to receive information about its input shape. 
# you can use input_shape and input_dim to pass the shape of input

# output_dim represent the number of nodes need in that layer
# here we have 10 nodes

model.add(Dense(output_dim, input_dim=input_dim, activation='linear'))

In [11]:
# Before training a model, you need to configure the learning process, which is done via the compile method

# It receives three arguments:
# An optimizer. This could be the string identifier of an existing optimizer , https://keras.io/optimizers/
# A loss function. This is the objective that the model will try to minimize., https://keras.io/losses/
# A list of metrics. For any classification problem you will want to set this to metrics=['accuracy'].  https://keras.io/metrics/


# Note: when using the categorical_crossentropy loss, your targets should be in categorical format 
# (e.g. if you have 10 classes, the target for each sample should be a 10-dimensional vector that is all-zeros except 
# for a 1 at the index corresponding to the class of the sample).

# that is why we converted out labels into vectors

model.compile(optimizer='sgd', loss='mse', metrics=['mse'])

# Keras models are trained on Numpy arrays of input data and labels. 
# For training a model, you will typically use the  fit function

# fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, 
# validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, 
# validation_steps=None)

# fit() function Trains the model for a fixed number of epochs (iterations on a dataset).

# it returns A History object. Its History.history attribute is a record of training loss values and 
# metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable).

# https://github.com/openai/baselines/issues/20

history = model.fit(X_train, Y_train, steps_per_epoch=500, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [12]:
# Predict using the obtained Model
result_test = pd.DataFrame(model.predict(X_test))
# De-Normalize the Predictions
result_test = pd.DataFrame(scalery.inverse_transform(result_test),columns=['Predicted'])
rmse_test = np.sqrt(mean_squared_error(y_test,result_test))
print(" Test RMSE :",rmse_test)

 Test RMSE : 277.52697288767126


 <h3>  MLP + Sigmoid activation + SGDOptimizer </h3>

In [13]:
# Multilayer perceptron

model_sigmoid = Sequential()
model_sigmoid.add(Dense(512, activation='sigmoid', input_shape=(input_dim,)))
model_sigmoid.add(Dense(128, activation='sigmoid'))
model_sigmoid.add(Dense(output_dim, activation='linear'))

model_sigmoid.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 512)               6656      
                                                                 
 dense_2 (Dense)             (None, 128)               65664     
                                                                 
 dense_3 (Dense)             (None, 1)                 129       
                                                                 
Total params: 72,449
Trainable params: 72,449
Non-trainable params: 0
_________________________________________________________________


In [14]:
model_sigmoid.compile(optimizer='sgd', loss='mse', metrics=['mse'])

history = model_sigmoid.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [15]:
# Predict using the obtained Model
result_test = pd.DataFrame(model_sigmoid.predict(X_test))
# De-Normalize the Predictions
result_test = pd.DataFrame(scalery.inverse_transform(result_test),columns=['Predicted'])
# Computer the Root Mean Square Error on the Test Set
rmse_test = np.sqrt(mean_squared_error(y_test,result_test))
print(" Test RMSE :",rmse_test)

 Test RMSE : 309.0289283537215


<h2>MLP + Sigmoid activation + ADAM </h2>

In [16]:
model_sigmoid = Sequential()
model_sigmoid.add(Dense(512, activation='sigmoid', input_shape=(input_dim,)))
model_sigmoid.add(Dense(128, activation='sigmoid'))
model_sigmoid.add(Dense(output_dim, activation='linear'))

model_sigmoid.summary()

model_sigmoid.compile(optimizer='adam', loss='mse', metrics=['mse'])

history = model_sigmoid.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 512)               6656      
                                                                 
 dense_5 (Dense)             (None, 128)               65664     
                                                                 
 dense_6 (Dense)             (None, 1)                 129       
                                                                 
Total params: 72,449
Trainable params: 72,449
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [17]:
# Predict using the obtained Model
result_test = pd.DataFrame(model_sigmoid.predict(X_test))
# De-Normalize the Predictions
result_test = pd.DataFrame(scalery.inverse_transform(result_test),columns=['Predicted'])
# Computer the Root Mean Square Error on the Test Set
rmse_test = np.sqrt(mean_squared_error(y_test,result_test))
print(" Test RMSE :",rmse_test)

 Test RMSE : 273.1480204629428


<h2> MLP + ReLU +SGD </h2>

In [18]:
# Multilayer perceptron

# https://arxiv.org/pdf/1707.09725.pdf#page=95
# for relu layers
# If we sample weights from a normal distribution N(0,σ) we satisfy this condition with σ=√(2/(ni). 
# h1 =>  σ=√(2/(fan_in) = 0.062  => N(0,σ) = N(0,0.062)
# h2 =>  σ=√(2/(fan_in)  = 0.125  => N(0,σ) = N(0,0.125)
# out =>  σ=√(2/(fan_in+1) = 0.120  => N(0,σ) = N(0,0.120)

model_relu = Sequential()
model_relu.add(Dense(512, activation='relu', input_shape=(input_dim,), kernel_initializer=RandomNormal(mean=0.0, stddev=0.062, seed=None)))
model_relu.add(Dense(128, activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.125, seed=None)) )
model_relu.add(Dense(output_dim, activation='linear'))

model_relu.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 512)               6656      
                                                                 
 dense_8 (Dense)             (None, 128)               65664     
                                                                 
 dense_9 (Dense)             (None, 1)                 129       
                                                                 
Total params: 72,449
Trainable params: 72,449
Non-trainable params: 0
_________________________________________________________________


In [19]:
model_relu.compile(optimizer='sgd', loss='mse', metrics=['mse'])

history = model_relu.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [20]:
# Predict using the obtained Model
result_test = pd.DataFrame(model_relu.predict(X_test))
# De-Normalize the Predictions
result_test = pd.DataFrame(scalery.inverse_transform(result_test),columns=['Predicted'])
# Computer the Root Mean Square Error on the Test Set
rmse_test = np.sqrt(mean_squared_error(y_test,result_test))
print(" Test RMSE :",rmse_test)

 Test RMSE : 285.1563576533931


<h2> MLP + ReLU + ADAM </h2>

In [21]:
model_relu = Sequential()
model_relu.add(Dense(512, activation='relu', input_shape=(input_dim,), kernel_initializer=RandomNormal(mean=0.0, stddev=0.062, seed=None)))
model_relu.add(Dense(128, activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.125, seed=None)) )
model_relu.add(Dense(output_dim, activation='linear'))

print(model_relu.summary())

model_relu.compile(optimizer='adam', loss='mse', metrics=['mse'])

history = model_relu.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 512)               6656      
                                                                 
 dense_11 (Dense)            (None, 128)               65664     
                                                                 
 dense_12 (Dense)            (None, 1)                 129       
                                                                 
Total params: 72,449
Trainable params: 72,449
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [22]:
# Predict using the obtained Model
result_test = pd.DataFrame(model_relu.predict(X_test))
# De-Normalize the Predictions
result_test = pd.DataFrame(scalery.inverse_transform(result_test),columns=['Predicted'])
# Computer the Root Mean Square Error on the Test Set
rmse_test = np.sqrt(mean_squared_error(y_test,result_test))
print(" Test RMSE :",rmse_test)

 Test RMSE : 275.8483883588573


<h2> 5. MLP + Dropout + AdamOptimizer </h2>

In [23]:
# https://stackoverflow.com/questions/34716454/where-do-i-call-the-batchnormalization-function-in-keras

from tensorflow.keras.layers import Dropout

model_drop = Sequential()

model_drop.add(Dense(512, activation='sigmoid', input_shape=(input_dim,), kernel_initializer=RandomNormal(mean=0.0, stddev=0.039, seed=None)))
model_drop.add(Dropout(0.5))

model_drop.add(Dense(128, activation='sigmoid', kernel_initializer=RandomNormal(mean=0.0, stddev=0.55, seed=None)) )
model_drop.add(Dropout(0.5))

model_drop.add(Dense(output_dim, activation='linear'))


model_drop.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_13 (Dense)            (None, 512)               6656      
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_14 (Dense)            (None, 128)               65664     
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_15 (Dense)            (None, 1)                 129       
                                                                 
Total params: 72,449
Trainable params: 72,449
Non-trainable params: 0
_________________________________________________________________


In [24]:
model_drop.compile(optimizer='adam', loss='mse', metrics=['mse'])

history = model_drop.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [25]:
# Predict using the obtained Model
result_test = pd.DataFrame(model_drop.predict(X_test))
# De-Normalize the Predictions
result_test = pd.DataFrame(scalery.inverse_transform(result_test),columns=['Predicted'])
# Computer the Root Mean Square Error on the Test Set
rmse_test = np.sqrt(mean_squared_error(y_test,result_test))
print(" Test RMSE :",rmse_test)

 Test RMSE : 278.76706512948317
