Importing all the libraries for the project

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from numpy import loadtxt
from keras.callbacks import EarlyStopping
from matplotlib import pyplot
from sklearn.utils import shuffle
from keras import optimizers
from sklearn.preprocessing import OneHotEncoder

In [None]:
# Fix a random seed for both TensorFlow and Numpy
seed = 2
tf.random.set_seed(seed)
np.random.seed(seed)

Loading the training set to train the first neural network

In [None]:
tr = loadtxt("dataset/monk1/monks-1.train", dtype='str', delimiter=' ')
# The first column and the last of this matrix don't contain any useful information for our project,
# so we can delete them
tr = np.delete(tr, [0, 8], 1) 
# Now the first column of the matrix contains the target, while the other six the input for the NN. Let us now 
# convert to integers all the values of the matrix
tr = tr.astype(int)
tr[:20]

At this point we are ready to divide the column of the matrix into target and input. In this case the target is the first column of the matrix, while the input are the other 6 columns

In [None]:
# target
y = tr[:, 0]
# input
x = tr[:, 1:7]

x, y = shuffle(x, y)

In [None]:
one_hot_encoder = OneHotEncoder(sparse=False)
one_hot_encoder.fit(x)
x = one_hot_encoder.transform(x)

Now we need to implement early stopping to understand where we have to stop the training of our neural network, for this reason we need to split our dataset to distinguish between the training set and the validation set

In [None]:
'''
print("size of the design set", len(x))

# we are going to use 30% of our training set as the validation set
subset_size = int(0.3 * len(x))

# find randomly the indexes of the rows for the validation set
index = np.random.choice(len(x), subset_size, replace=False)
index = np.sort(index)

# create the validation set
vl_x = x[index]
vl_y = y[index]

# create the training set
tr_x = np.delete(x, index, 0)
tr_y = np.delete(y, index, 0)

print("size of the training set", len(tr_x))
print("size of the validation set", len(vl_y))
'''

So we are now ready to build the neural network

In [None]:
# Function to create a model, it is required to perform a grid search over the hyper-parameters
def create_model():
    # (From Keras documentation) A Sequential model is appropriate for a plain stack of layers where each layer 
    # has exactly one input tensor and one output tensor. 
    # In other words, in a sequential model, the input flows sequentially from the first layers to the last one,
    # without self-loops or backward edges
    model = Sequential();
    # The dense class is used to implement a densely connected neural network, which is a neural network where each 
    # input is connected to every output by a weight.
    # The parameter "input_space" specify the size of the input space (in this case 6, since there are exactly
    # 6 columns in tr_x). This parameter must be added to the first hidden layer of the NN.
    model.add(Dense(4, input_dim=17, activation="relu"))
    # We have added a hidden layer to our neural network (which also works as an input layer, since it fetches the
    # input), now let's add similarly an output layer.
    model.add(Dense(1, activation="sigmoid"))
    # Now we can compile the neural network. To compile it we need to specify: the loss function (which is
    # binary_crossentropy for binary classificatio), the optimizer (in this case we'll use SGD, which is the 
    # stochastic gradient descent algorithm), the metrics to collect and report the performance of the
    # neural network (in this case we will use the accuracy, since it is a classification problem).
    model.compile(loss='mean_squared_error', optimizer="SGD", metrics=['accuracy'])
    return model

Keras models can be used in scikit-learn by wrapping them with the KerasClassifier or KerasRegressor class from the module SciKeras. We will use the KerasClassifier since we have to solve a classification problem.

In [None]:
# The parameter verbose = 0 is used to silence the output.
model = KerasClassifier(model=create_model, verbose=0)

Now we need to define the values and the hyperparameters for the grid search. Below there is a list of hyperparameters that we'll consider for the grid search.

1. The learning rate for the gradient descent algorithm
2. the value of alpha for momentum

In [None]:
batch_size = [20, 60, 100]
epochs = [1000]
learn_rate = [0.01, 0.1, 0.2]
momentum = [0.0, 0.1, 0.2, 0.3]

# In the SciKeras wrapper, to route the parameters to the optimizer we need the prefix optimizer__.
param_grid = dict(
    optimizer__learning_rate=learn_rate,
    optimizer__momentum=momentum, 
    epochs=epochs,
    batch_size=batch_size
)

In [None]:
# GridSearchCV is used to perform an exhaustive search over specified parameter values for an estimator. We will use
# it to find the best set of hyper-parameters for the model selection of our neural network.
# param_grid = Dictionary with parameters names (str) as keys and lists of parameter settings to try as values.
# n_jobs = Number of jobs to run in parallel, the value -1 will use all the available processors.
# cv = number of folds of the cross validation.
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5, error_score="raise")

# a simple condition for early stopping. Patience stops the training only if the loss on the validation set rose
# for a number of consecutive epochs 
es = EarlyStopping(monitor='loss', mode='min', patience=10)

'''
grid_result = grid.fit(
    tr_x, tr_y, # training set
    validation_data=(vl_x, vl_y), # validation set
    verbose=1, # to print the results
    shuffle=True, # shuffle before each epoch
    callbacks=[es], # callbacks to be executed at the end of each epoch
    epochs = 500 # ELIMINA POI
)'''

'''
grid_result = grid.fit(
    x, y, # training set
    validation_split=0.3, # validation set's dimension
    verbose=1, # to print the results
    shuffle=True, # shuffle before each epoch
    callbacks=[es], # callbacks to be executed at the end of each epoch
    epochs = 100 # ELIMINA POI
)'''

grid_result = grid.fit(
    x, y,
    callbacks=[es],
    verbose=1
)

Print the best configuration of hyper-parameters found by the grid search and its accuracy

In [None]:
print("Best accuracy: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

# LE CASELLE SOTTO A QUESTA SONO SOLO DELLE PROVE PER IL CODICE E ANDRANNO ELIMINATE NELLA VERSIONE FINALE DEL PROGETTO #

In [None]:
# QUESTA CELLA È PER UN TEST, ELIMINARE POI


# define model
model = Sequential()
model.add(Dense(3, input_dim=6, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# simple early stopping
es = EarlyStopping(monitor='val_loss', mode='min', patience=50)#, min_delta=1, verbose=1)
# fit model
history = model.fit(tr_x, tr_y, validation_data=(vl_x, vl_y), epochs=4000, verbose=0, callbacks=[es])
# evaluate the model
_, train_acc = model.evaluate(tr_x, tr_y, verbose=0)
_, test_acc = model.evaluate(vl_x, vl_y, verbose=0)
print('Train: %.3f, Validation: %.3f' % (train_acc, test_acc))
# plot training history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='validation')
pyplot.legend()
pyplot.show()

In [None]:
pyplot.plot(history.history['accuracy'], label='train')
pyplot.plot(history.history['val_accuracy'], label='validation')
pyplot.legend()
pyplot.show()

In [None]:
_, accuracy = model.evaluate(tr_x, tr_y) # verbose=0 to suppress output
print('Accuracy: %.2f' % (accuracy*100)) 

In [None]:
history.history['loss']

In [None]:
history.history.keys()

## ALTRO ESPERIMENTO ##

In [None]:
# define model
model2 = Sequential()
model2.add(Dense(3, input_dim=17, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))

model2.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])

history2 = model2.fit(x, y, epochs=1000, batch_size=20, verbose=1)
# evaluate the model
_, train_acc = model2.evaluate(x, y, verbose=0)
print('Train: %.3f' % train_acc)

tf.keras.optimizers.SGD(
    learning_rate=0.01, momentum=0.2, nesterov=False, name="SGD"
)

# plot training history
pyplot.plot(history2.history['loss'], label='train')
pyplot.legend()
pyplot.show()

In [None]:
pyplot.plot(history2.history['accuracy'], label='train')
pyplot.legend()
pyplot.show()

In [None]:
model.get_params().keys()

In [None]:
a = history2.history['accuracy'][999]
a

In [None]:
pip install keras-tuner

In [None]:
!pip install -q -U keras-tuner