# Course: Deep Learning
# Author: Sandro Camargo <sandrocamargo@unipampa.edu.br>
# Non Linear Regression with Multi Layer Perceptron Example
# Dataset: https://archive.ics.uci.edu/ml/datasets/Algerian+Forest+Fires+Dataset

A Python library is a collection of related functions. A library contains bundles of encapsulated code which can be used repeatedly in different programs.

In [None]:
# Import Libraries
import keras # Neural Network Library
from keras import layers # Layers to a neural network
from keras import optimizers # optimizers
import pandas as pd # Data Manipulation library
import numpy as np # Fast Numeric Computing library
import tensorflow as tf # Optimizers
import matplotlib.pyplot as plt # Plot library
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import plot_model # Print the network

In [None]:
# Loading dataset
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00547/Algerian_forest_fires_dataset_UPDATE.csv', header=1, skiprows=[124,125,126,170], usecols=list(range(0,13)))
# About the parameters
# Header=1: column names (day, month, year, ...) are in the line 1 of this CSV file.
# skiprows=[124,125,126,170]: this lines, which not contains valid data, are not imported. If this parameter is missing, all lines are imported.
# usecols=list(range(0,13)): The last column, which is named Classes, is not imported. If this parameter is missing, all columns are imported.

# inspecting columns and data types from "data" dataframe
data.info()

In [None]:
scaler = MinMaxScaler()
print(scaler.fit(data))
MinMaxScaler()
data = pd.DataFrame(scaler.transform(data))

The dataset must be randomly splitted in two parts: training set and testing set. The main approaches to split are holdout and n-fold cross validation.
*   Training set is used for building (training) the model.
*   Testing set is used for testing the generalization ability of the model built.

Moreover, inputs($x$) and outputs($y$) must be splitted in each set.




In [None]:
np.random.seed(1) # Random numbers will be ever the same
rnd = np.random.rand(len(data)) < 0.8 # Training set will contain 80% of the data

# Create the training dataset (80%)
train_data = data[rnd].copy()
train_x = train_data.drop(columns=data.columns[12])  # Drop the output column
train_y = train_data[data.columns[12]]  # Extract the output column

# Create the testing dataset (20%)
test_data = data[~rnd].copy()
test_x = test_data.drop(columns=data.columns[12])  # Drop the output column
test_y = test_data[data.columns[12]]  # Extract the output column

# Creating the training dataset (80%)
#train_x = data[rnd]
#train_x.drop(train_x.columns[[12]], axis=1, inplace=True) # column 12 is removed, because it is the output (y)
#train_y = data[rnd]
#train_y.drop(train_y.iloc[:, 0:12], axis=1, inplace=True) # columns from 0 to 11 are removed, because they are the inputs (x)

# Creating the testing dataset (20%)
#test_x = data[~rnd]
#test_x.drop(test_x.columns[[12]], axis=1, inplace=True)
#test_y = data[~rnd]
#test_y.drop(test_y.iloc[:, 0:12], axis=1, inplace=True)

# Verifying dataset dimensions
print('The training dataset (inputs) dimensions are: ', train_x.shape)
print('The training dataset (outputs) dimensions are: ', train_y.shape)
print('The testing dataset (inputs) dimensions are: ', test_x.shape)
print('The testing dataset (outputs) dimensions are: ', test_y.shape)

After creating the datasets, the next step is defining the architecture of our model.

It must be defined:


*   Architecture: in terms of neurons and layers
*   Optimizer: is the algorithm or method used to change the weights in order to minimize the loss function.

The last step is compiling the model. In this step the loss function, the optimizer and the evaluation metrics must be defined.

In [None]:
# Function to define model architecture
def build_model():
  # Defining the architecture
  # Sequential = Feedforward Neural Network
  # input_shape is the amount of columns from training set
  model = keras.Sequential([
        layers.Input(shape=[len(train_x.columns)]),
        layers.Dense(10, activation="relu"),
        layers.Dense(5, activation="relu"),
        layers.Dense(1)
  ])

  # Defining the optimizer
  optimizer = tf.keras.optimizers.RMSprop(
      learning_rate = 0.001)

  # Mean Squared Error (MSE) is the default loss function in regression models
  model.compile(loss = 'mse',
      optimizer = optimizer,
      metrics = ['mse','mae'])

  return model

Just for curiosity, you should observe how many parameters ($\theta$) your model has.
At this point, your model is built.

In [None]:
model = build_model()
model.summary()

After creating the model, it must be trained (fitted).
Training is done using training set and the amount of epochs must be defined.

In [None]:
EPOCHS = 200

history = model.fit(
    train_x, train_y, epochs = EPOCHS, verbose = 1
)

This plot should be generated just to inspect the learning convergence.
It is expected a decreasing of the loss function value through the epochs.


In [None]:
plt.plot(history.history['mse'])
plt.title('Training MSE')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.legend(['Error'], loc='upper right')
plt.savefig("mlp-regr-lossfunction.png")
plt.show()

In [None]:
weights = model.get_weights() # return a numpy list of weights
print(type(weights))
print(weights)

After the training process, the knowledge learnt by a neural network is stored in its weights.

After the training process, the model should be tested in order to measure its quality, it means, how good are its predictions. The model must be evaluated using the testing set, which is composed by samples that are not in the training set. In regression problems, the correlation coefficient is the default metric to measure the model quality.
The correlation coefficient is computed using real outputs ($y$) and predicted outputs ($\hat{y}$). Correlation coefficient can vary between 0 (bad predictions) and 1 (perfect predictions).

In [None]:
test_predictions = model.predict(test_x) # predict radon activities with the built linear regression model

plt.scatter(test_y, test_predictions, marker = 'o', c = 'blue')
plt.plot([-0.1,1.1], [-0.1,1.1], color = 'black', ls = '--')
plt.ylabel('Predictions')
plt.xlabel('Real Values')
plt.title('Regression with MLP (Testing Set)')
plt.ylim(-0.1, 1.1)
plt.xlim(-0.1, 1.1)
plt.axis(True)
plt.savefig("mlp-regr-testing.png")
plt.show()

print("Correlation Coefficient in testing set: %.4f" % np.corrcoef(np.transpose(test_predictions), np.transpose(test_y))[0,1])

In [None]:
train_predictions = model.predict(train_x) # predict radom activities with the built linear regression model

plt.scatter(train_y, train_predictions, marker = 'o', c = 'blue')
plt.plot([-0.1,1.1], [-0.1,1.1], color = 'black', ls = '--')
plt.ylabel('Predictions')
plt.xlabel('Real Values')
plt.title('Regression with MLP (Training Set)')
plt.ylim(-0.1, 1.1)
plt.xlim(-0.1, 1.1)
plt.axis(True)
plt.savefig("mlp-regr-training.png")
plt.show()

print("Correlation Coefficient in training set: %.4f" % np.corrcoef(np.transpose(train_predictions), np.transpose(train_y))[0,1])

In [None]:
# Save the model architecture as an image
plot_model(model, to_file='model_architecture.png', show_shapes=True, show_layer_names=True)