# Course: Deep Learning
# Author: Sandro Camargo <sandrocamargo@unipampa.edu.br>
# Classification with Multi Layer Perceptron Example
# Dataset: https://archive.ics.uci.edu/ml/datasets/Statlog+%28Landsat+Satellite%29

A Python library is a collection of related functions. A library contains bundles of encapsuated code which can be used repeatedly in different programs.

In [None]:
# Import Libraries
import keras # Neural Network Library
from keras import layers # Layers to a neural network
from keras import optimizers # optimizers
import pandas as pd # Data Manipulation library
import numpy as np # Fast Numeric Computing library
import tensorflow as tf # Optimizers
import matplotlib.pyplot as plt # Plot library
from sklearn.preprocessing import MinMaxScaler, label_binarize
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, ConfusionMatrixDisplay

In [None]:
# Loading training dataset
colnames=['r1','r2','r3','r4','r5','r6','r7','r8','r9','g1','g2','g3','g4','g5','g6','g7','g8','g9','b1','b2','b3','b4','b5','b6','b7','b8','b9','a1','a2','a3','a4','a5','a6','a7','a8','a9','class']
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/satimage/sat.trn', header=None, delimiter=" ", names=colnames)
# About the parameters
# Header=1: column names (day, month, year, ...) are in the line 1 of this CSV file.
# skiprows=[124,125,126,170]: this lines, which not contains valid data, are not imported. If this parameter is missing, all lines are imported.
# usecols=list(range(0,13)): The last column, which is named Classes, is not imported. If this parameter is missing, all columns are imported.

# inspecting columns and data types from "data" dataframe
data.info()

In [None]:
classes = data.iloc[:,36]
scaler = MinMaxScaler()
print(scaler.fit(data))
MinMaxScaler()
data = pd.DataFrame(scaler.transform(data))

The dataset must be randomly splitted in two parts: training set and testing set. The main approaches to split are holdout and n-fold cross validation.
*   Training set is used for building (training) the model.
*   Testing set is used for testing the generalization ability of the model built.

Moreover, inputs($x$) and outputs($y$) must be splitted in each set.




In [None]:
np.random.seed(1) # Random numbers will be ever the same
rnd = np.random.rand(len(data)) < 0.8 # Training set will contain 80% of the data

# Creating the training dataset (80%)
train_x = data[rnd]
train_x.drop(train_x.columns[[36]], axis=1, inplace=True) # column 12 is removed, because it is the output (y)
train_y = data[rnd]
train_y_bin = label_binarize(classes[rnd], classes=[1, 2, 3, 4, 5, 6, 7])

# Creating the testing dataset (20%)
test_x = data[~rnd]
test_x.drop(test_x.columns[[36]], axis=1, inplace=True)
test_y = data[~rnd]
test_y_bin = label_binarize(classes[~rnd], classes=[1, 2, 3, 4, 5, 6, 7])

# Verifying dataset dimensions
print('The training dataset (inputs) dimensions are: ', train_x.shape)
print('The training dataset (outputs) dimensions are: ', train_y.shape)
print('The testing dataset (inputs) dimensions are: ', test_x.shape)
print('The testing dataset (outputs) dimensions are: ', test_y.shape)

After creating the datasets, the next step is defining the architecture of our model.

It must be defined:


*   Architecture: in terms of neurons and layers
*   Optimizer: is the algorithm or method used to change the weights in order to minimize the loss function.

The last step is compiling the model. In this step the loss function, the optimizer and the evaluation metrics must be defined.

In [None]:
# Function to define model architecture
def build_model():
  # Defining the architecture
  # Sequential = Feedforward Neural Network
  # 1 single neuron
  # input_shape is the amount of columns from training set
  model = keras.Sequential([
        layers.Dense(21, input_shape = [len(train_x.columns)], activation="relu"),
        layers.Dense(14, activation="relu"),
        layers.Dense(7, activation="softmax")
  ])

  # Defining the optimizer
  optimizer = tf.keras.optimizers.RMSprop(
      learning_rate = 0.001)

  # Mean Squared Error (MSE) is the default loss function in regression models
  model.compile(loss = 'categorical_crossentropy',
      optimizer = optimizer,
      metrics = ['categorical_crossentropy','accuracy'])

  return model

Just for curiosity, you should observe how many parameters ($\theta$) your model has.
At this point, your model is built.

In [None]:
model = build_model()
model.summary()

After creating the model, it must be trained (fitted).
Training is done using training set and the amount of epochs must be defined.

In [None]:
EPOCHS = 200

history = model.fit(
    train_x, train_y_bin, epochs = EPOCHS, verbose = 1
)

This plot should be generated just to inspect the learning convergence.
It is expected a decreasing of the loss function value through the epochs.


In [None]:
plt.plot(history.history['categorical_crossentropy'])
plt.title('Training Categorical Cross Entropy')
plt.ylabel('Categorical Cross Entropy')
plt.xlabel('Epoch')
plt.legend(['Error'], loc='upper right')
plt.savefig("trainingerror.pdf")
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.title('Training Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Error'], loc='lower right')
plt.savefig("trainingaccuracy.pdf")
plt.show()

After the training process, the knowledge learnt by a neural network is stored in its weights.

After the training process, the model should be tested in order to measure its quality, it means, how good are its predictions. The model must be evaluated using the testing set, which is composed by samples that are not in the training set. In regression problems, the correlation coefficient is the default metric to measure the model quality.
The correlation coefficient is computed using real outputs ($y$) and predicted outputs ($\hat{y}$). Correlation coefficient can vary between 0 (bad predictions) and 1 (perfect predictions).

In [None]:
test_predictions = model.predict(test_x) # predict randon activities with the built linear regression model
print(confusion_matrix(test_predictions.argmax(axis=1), test_y_bin.argmax(axis=1)))
print(classification_report(test_predictions.argmax(axis=1), test_y_bin.argmax(axis=1), target_names=['red soil', 'cotton crop', 'grey soil', 'damp grey soil', 'soil with vegetation stubble', 'very damp grey soil']))
print('The accuracy on the test set is equal to: %.4f ' % accuracy_score(test_predictions.argmax(axis=1), test_y_bin.argmax(axis=1)))

In [None]:
cm = confusion_matrix(test_y_bin.argmax(axis=1), test_predictions.argmax(axis=1))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['red', 'cotton', 'grey', 'damp', 'w/veg', 'very damp'])
disp.plot(cmap="bwr")
plt.title('Confusion Matrix for Testing Set')
plt.savefig("confusionmatrix.pdf")
plt.show()

In [None]:
train_predictions = model.predict(train_x)
print(confusion_matrix(train_predictions.argmax(axis=1), train_y_bin.argmax(axis=1)))
print(classification_report(train_predictions.argmax(axis=1), train_y_bin.argmax(axis=1), target_names=['red soil', 'cotton crop', 'grey soil', 'damp grey soil', 'soil with vegetation stubble', 'very damp grey soil']))
print('The accuracy on the training set is equal to: %.4f ' % accuracy_score(train_predictions.argmax(axis=1), train_y_bin.argmax(axis=1)))

In [None]:
!pip3 install ann_visualizer
from ann_visualizer.visualize import ann_viz
ann_viz(model, view=True, filename="my_model", title="Simple Architecture")