# **NEURAL NETWORKS FROM SCRATCH**

.

.

In [1]:
#Import libraries
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from itertools import product
from scipy.signal import savgol_filter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, r2_score
from sklearn.metrics import precision_score,recall_score,f1_score, accuracy_score
from sklearn.metrics import precision_recall_curve, roc_auc_score, roc_curve

import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

import warnings
warnings.filterwarnings("ignore")

2022-05-18 20:23:53.295711: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-18 20:23:53.295739: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


.

.

## **Define the class NeuralNet**

In [2]:
class NeuralNet :
    """
    The class builds a Neural Network for binary classification and regression
    It is possible to define:
    - The task of the problem ("classification", "regression")
    - The activation function ("sigmoid","tanh","relu","leaky_relu","elu","swish")
    - The hidden layers (a tuple where each element represents the number of nodes in each layer)
    - The Gradient Descent algorithm ("batch","adam")
    - The learning rate
    - The type of regularization ("ridge","lasso")
    - The regularization factor lambda
    - The maximum number of iterations
    - The momentum factor in the weights optimization
    - A random state for reproducible results
    - A flag to display results while processing
    - A flag to apply early stopping to the algorithm
    """ 
    """
    Modules:
    - Training: training the Neural Network
    - Predict: class prediction (for classification) and value (for regression)
    - Predict_proba: probability prediction (only for classification)
    - Score: performance metric (different metrics for classification and regression)
    
    Attributes:
    - last_iter: last iteration of training
    - best_weights: best weights found with training
    - cost_function_tr: cost function values for the training set up to the last iteration
    - cost_function_te: cost function values for the validation set up to the last iteration
    
    """
    
    def __init__(self, task, function = "sigmoid", Hidden_layers = (5,), algo = "batch",
                 alpha = 0.3, regularization = "ridge", Lambda = 0.0, Max_iter = 1000,
                 momentum = 0.8, random_state = None, verbose = 0,
                 early_stopping = True) :
        self.task = task
        self.function = function
        self.Hidden_layers = Hidden_layers
        self.algo = algo
        self.alpha = alpha
        self.regularization = regularization
        self.Lambda = Lambda
        self.Max_iter = Max_iter
        self.momentum = momentum
        self.random_state = random_state
        self.verbose = verbose
        self.early_stopping = early_stopping
    
    def act(function,X) :
        """
        The function returns the transformed values according to the specified activation function
        - function    activation function 
        - X           input matrix (data with no target column)
        """
        
        if function == "sigmoid" :
            return 1. / (1. + np.exp(-X))
        elif function == "tanh" :
            return np.tanh(X)
        elif function == "relu" :
            return np.where(X>=0,X,0)
        elif function == "leaky_relu" :
            return np.where(X>=0,X,0.1*X)
        elif function == "elu" :
            return np.where(X>=0,X, 0.1*(np.exp(X) - 1))           #alpha = 0.1
        elif function == "swish" :
            return X * (1./(1.+np.exp(-X)))
        else :
            raise ValueError("The activation function is not valid")

    
    def derivative(function,X) :
        """
        The function returns the transformed values according to the derivative of the specified activation function
        - function    activation function 
        - X           input matrix (data with no target column)
        """
        
        if function == "sigmoid" :
            return (NeuralNet.act(function,X)) * (1 - NeuralNet.act(function,X))
        elif function == "tanh" :
            return (1 - NeuralNet.act(function,X)**2)
        elif function == "relu" :
            return np.where(X>=0,1,0)
        elif function == "leaky_relu" :
            return np.where(X>=0,1,0.1)
        elif function == "elu" :
            return np.where(X>=0,1, 0.1*np.exp(X)) 
        elif function == "swish" :
            return NeuralNet.act("sigmoid",X) * (1 + X - NeuralNet.act(function,X))
        else :
            raise ValueError("The activation function is not valid")
            
    
    def randw(self, Lin,Lout, function, X) :
        """
        Source: https://machinelearningmastery.com/weight-initialization-for-deep-learning-neural-networks/
        The function returns a matrix of random weights with dimensions Lout x (Lin 1)
        - Lin         dimension of the input layer
        - Lout        dimension of the output layer
        - function    activation function
        - X           input matrix
        """
        
        if self.random_state is not None :
            np.random.seed(self.random_state)
        if (function == "relu") or (function == "leaky_relu") or (function == "elu") :
            return np.random.normal(0.0, np.sqrt(2/X.shape[0]),(Lin+1)*Lout).reshape(Lout,Lin+1)
        elif (function == "sigmoid") or (function == "tanh") or (function == "swish") :
            epsilon = (6 / (Lin+Lout))**0.5;
            return np.random.rand(Lout,Lin+1)*2*epsilon - epsilon
        else :
            raise ValueError("The activation function is not valid")
        
        
    def Bias(m,X) :
        """
        The function returns the matrix with a bias column made of 1s
         - m           number of elements to add (on one row)
         - X           input matrix (data with no target column)
        """
        if m != X.shape[1] :
            raise ValueError("The value of m must match the number of columns of X")
        else :    
            return np.vstack([np.ones((1,m)),X]) 

    
    def Forward_propagation(self, X, T) :        
        """
        The function returns the values obtained in the the last layer through the forward propagation algorithm
        - X           input matrix (data with no target column)
        - T           tuple containing n matrices (i.e. weights)
        """
        
        m = X.shape[0]                                 #Number of observations
        n = len(T)                                     #Number of matrices (i.e. weights)
        X = np.hstack([np.ones((m,1)),X])              #Add bias column
        a = NeuralNet.act(self.function,np.dot(T[0],X.T))            #a1 = X.T

        for i in np.arange(3,n+2) :
            if i==(n+1) :                              #Final layer
                if self.task == "classification" :
                    a = NeuralNet.act('sigmoid',np.dot(T[i-2],NeuralNet.Bias(m,a)))
                else :
                    a = np.dot(T[i-2],NeuralNet.Bias(m,a))
                break
            else :                                     #Other layers
                a = NeuralNet.act(self.function,np.dot(T[i-2],NeuralNet.Bias(m,a)))

        return a.T
    
    
    def ForwardBack_propagation(self, X, y, T) :
        """
        The function returns the values obtained in each layer (through forward propagation) and the errors (through back propagaton)
        - X           input matrix (data with no target column)
        - yy          target variable transformed with OneHotEncoder (number of columns = number of classes)
        - T           tuple containing n matrices (i.e. weights)
        """
        
        m = X.shape[0]                                   #Number of observations
        n = len(T)                                       #Number of matrices (i.e. weights)
        X = np.hstack([np.ones((m,1)),X])                #Add bias column
        a2 = NeuralNet.act(self.function,np.dot(T[0],X.T))    #a1 = X.T
        ANODES = (a2,)
        DNODES = ()

        #Modify target column for regression
        if self.task == "regression" :
            y = np.array(y).reshape(len(y),1)

        #Forward propagation loop
        for i in np.arange(3,n+3) :
            if i==(n+1) :                              #Final layer
                if self.task == "classification" :
                    a = NeuralNet.act('sigmoid',np.dot(T[i-2],NeuralNet.Bias(m,ANODES[i-3])))
                else :
                    a = np.dot(T[i-2],NeuralNet.Bias(m,ANODES[i-3]))
                ANODES = ANODES + (a,)
                DNODES = (a - y.T,) + DNODES
                break
            else :                                     #Other layers
                ANODES = ANODES + (NeuralNet.act(self.function,np.dot(T[i-2],NeuralNet.Bias(m,ANODES[i-3]))),)

        #Back propagation loop
        for i in np.arange(2,n+1)[::-1] :
            DNODES = (( np.dot(T[i-1].T,DNODES[0]) * NeuralNet.derivative(self.function,NeuralNet.Bias(m,ANODES[i-2])) )[1:,:],) + DNODES

        return ANODES, DNODES
    
    
    def J_Grad(self, X, y, labels, T) :
        """
        The function returns the Cost function (with the regularization term) and the Gradient
        - X           input matrix (data with no target column)
        - y           target variable
        - labels      number of differen classes
        - Lambda      regularization factor
        - T           tuple containing n matrices (i.e. weights)
        """
        
        m = X.shape[0]
        n = len(T)

        #Create target array (OneHotEncoder - number of columns = number of classes)
        if self.task == "classification" :
            yy = np.zeros((len(y),labels))
            for i in np.arange(0,m) :
                for k in np.arange(0,labels) :
                    if y[i] == k :
                        yy[i,k] = 1
        else :
            yy = y

        #Forward and Back propagation
        anodes, dnodes = NeuralNet.ForwardBack_propagation(self, X, yy, T)

        #Gradient of the Cost function
        Delta1 = np.dot(dnodes[0],np.hstack([np.ones((m,1)),X]))
        Delta2 = np.dot(dnodes[1],NeuralNet.Bias(m,anodes[0]).T)
        if self.regularization == "ridge" :
            t1grad = (Delta1+(np.hstack([np.zeros((T[0].shape[0],1)), T[0][:,1:]]))*self.Lambda)/m
            t2grad = (Delta2+(np.hstack([np.zeros((T[1].shape[0],1)), T[1][:,1:]]))*self.Lambda)/m  
        else :
            t1grad = (Delta1+(np.hstack([np.zeros((T[0].shape[0],1)), np.sign(T[0][:,1:])]))*self.Lambda)/m
            t2grad = (Delta2+(np.hstack([np.zeros((T[1].shape[0],1)), np.sign(T[1][:,1:])]))*self.Lambda)/m  
            
        TGRAD = (t1grad,) + (t2grad,)
        for i in np.arange(3,n+1) :
            Delta = np.dot(dnodes[i-1],NeuralNet.Bias(m,anodes[i-2]).T)
            if self.regularization == "ridge" :
                tgrad = (Delta+(np.hstack([ np.zeros((T[i-1].shape[0],1)), T[i-1][:,1:]]))*self.Lambda)/m 
            else :
                tgrad = (Delta+(np.hstack([ np.zeros((T[i-1].shape[0],1)), T[i-1][:,1:]]))*self.Lambda)/m 
            TGRAD = TGRAD + (tgrad,)

        #Regularization term and Cost function
        if self.regularization == "ridge" :
            REG = sum([np.sum(i[:,1:]**2) for i in T])*self.Lambda/(2*m)  
        else :
            REG = sum([np.sum(abs(i[:,1:])) for i in T])*self.Lambda/(2*m)  
            
        #Cost function
        if self.task == "classification" :
            J = sum(sum(-np.log10(anodes[-1])*yy.T - np.log10(1-anodes[-1])*(1-yy).T))/m + REG
        else :
            J = (dnodes[-1]**2).sum()/(2*m) + REG

        return J, TGRAD

    
    def Prediction(self, X, T) :
        """
        The function returns the corresponding predicted class using fixed weights.
        This is specific of a binary classification problem. In the case of regression, it return only the predicted values
        - X           input matrix (data with no target column)
        - T           tuple containing n matrices (i.e. weights)
        """

        Final = NeuralNet.Forward_propagation(self, X, T)
        if self.task == "classification" :
            Predictions = []
            for i in np.arange(0,len(Final)) :
                Predictions.append(np.argmax(Final[i,:]))
            return Predictions
        else :
            return Final
    
    
    def Score(self, X, y, metric) :
        """
        The function returns the accuracy (for classification) and RMSE for regression
        - X           input matrix (data with no target column)
        - y           target variable
        - metric      metric to evaluate the performance
        """
        
        if self.task == "classification" :
            if metric == "accuracy" :
                return (self.Predict(X) == y).sum()/len(y)
            elif metric == "precision" :
                return precision_score(y, self.Predict(X))
            elif metric == "recall" :
                return recall_score(y, self.Predict(X))
            elif metric == "f1score" :
                return f1_score(y, self.Predict(X))
            elif metric == "auc" :
                return roc_auc_score(y, self.Predict_proba(X))
            else :
                raise ValueError("Misspelled or inappropriate metric for %s" % self.task)
            
        else :
            if metric == "rmse" :
                return np.sqrt(((self.Predict(X) - np.array(y).reshape(len(y),1))**2).sum()/len(y))
            elif metric == "mae" :
                return (abs(self.Predict(X) - np.array(y).reshape(len(y),1))).sum()/len(y)
            elif metric == "mpe" :
                return 100*( abs((self.Predict(X) - np.array(y).reshape(len(y),1))/np.array(y).reshape(len(y),1)) ).sum()/len(y)
            elif metric == "r2" :
                return r2_score(y, self.Predict(X) )
            else :
                raise ValueError("Misspelled or inappropriate metric for %s" % self.task)
                
                
    def Training(self, X_train, y_train, X_test, y_test) :
        """
        The function returns the last iteration, a tuple containing the weights, the cost function value at each iteration for the training and validation sets
        - X_train               input matrix (data with no target column)
        - y_train               target variable
        - X_test          matrix for cross-validation
        - y_test          target variable for cross-validation
        """
        
        if (len(X_train) != len(y_train)) or (len(X_test) != len(y_test)):
            raise ValueError("Data and target have different dimensions")
         
        else :
            inputs = X_train.shape[1]               #Number of features
            Cost_value = 1e09                       #Value to start early stopping for classification
            tolerance = 1e-07                       #Tolerance value for regression early stopping
            self.flag = "Convergent cost function"  #Initialization of the "convergence flag" (if cost function is divergent, it will be changed)
            
            #Number of labels (in the output)
            if self.task == "classification" :
                labels = len(np.unique(y_train))
            else :
                labels = 1

            #Random initialization of weights
            n = len(self.Hidden_layers)
            THETA = (NeuralNet.randw(self, inputs, self.Hidden_layers[0], self.function, X_train),)
            for i in np.arange(0,n) :
                if i==n-1 :
                    THETA = THETA + (NeuralNet.randw(self, self.Hidden_layers[i], labels, self.function, X_train),)
                    break
                else :
                    THETA = THETA + (NeuralNet.randw(self, self.Hidden_layers[i], self.Hidden_layers[i+1], self.function, X_train),)
            
            #Initialization to zero of the change in the weights (to compute the momentum)
            Change = tuple([0.0*x for x in THETA])
            #Initialization of the momenta for the Adam algorithm
            M_beta = tuple([0.0*x for x in THETA])
            V_beta = tuple([0.0*x for x in THETA])
            #Default hyperparameters of the Adam algorithm
            beta1 = 0.9
            beta2 = 0.999
            epsilon = 1e-08
            
            #Iterative training (i: epoch)
            Cost_tr, Cost_te = [], []
            for i in range(self.Max_iter) :
                #Cost function and gradient
                J_tr, G_tr = NeuralNet.J_Grad(self, X_train, y_train, labels, THETA)
                J_te, G_te = NeuralNet.J_Grad(self, X_test, y_test, labels, THETA)
                #Update cost function lists
                Cost_tr.append(J_tr)
                Cost_te.append(J_te)

                #Show results
                if self.verbose != 0 :
                    if self.task == "classification" :
                        Accuracy_tr = (NeuralNet.Prediction(self, X_train, THETA) == y_train).sum()/len(y_train)
                        Accuracy_te = (NeuralNet.Prediction(self, X_test, THETA) == y_test).sum()/len(y_test)
                        print('\rIteration: {}/{} ----- Training cost: {:.5f} - Validation cost: {:.5f} --- Training accuracy: {:.5f} - Validation accuracy: {:.5f}'.format(i,
                                                                                                                                              self.Max_iter,
                                                                                                                                              J_tr,J_te,
                                                                                                                                              Accuracy_tr,Accuracy_te), end='')
                    else :
                        RMSE_tr = np.sqrt(((NeuralNet.Prediction(self, X_train, THETA) - np.array(y_train).reshape(len(y_train),1))**2).sum()/len(y_train))
                        RMSE_te = np.sqrt(((NeuralNet.Prediction(self, X_test, THETA) - np.array(y_test).reshape(len(y_test),1))**2).sum()/len(y_test))
                        print('\rIteration: {}/{} ----- Training cost: {:.5f} - Validation cost: {:.5f} --- Training RMSE: {:.5f} - Validation RMSE: {:.5f}'.format(i,
                                                                                                                                              self.Max_iter,
                                                                                                                                              J_tr,J_te,
                                                                                                                                              RMSE_tr,RMSE_te), end='')
                #Early stopping
                #The condition i>Iter is added to avoid initial flactuations
                Iter = 50
                broken = 0
                if self.task == "classification" :
                    if (i>Iter) & (self.early_stopping == True) :        
                        diff = 100
                        if (Cost_te[Iter-1] < Cost_tr[Iter-1]) and ((J_tr - J_te) < 0) :
                            broken = 1
                        else :
                            if J_te < Cost_value :
                                Cost_value = J_te
                            else :
                                broken = 1
                else :
                    if (i>Iter) & (self.early_stopping == True) :        
                        if abs(J_tr - Cost_value) > tolerance :
                            Cost_value = J_tr
                        else :
                            broken = 1

                #Stop when the cost function is divergent
                broken2 = 0
                if (i>Iter) and (Cost_tr[-1]>Cost_tr[-10]) and (Cost_tr[-1]>Cost_tr[-50]) :
                    broken, broken2 = 1, 1
                    
                if broken==1 :
                    if broken2==1 :
                        self.flag = "Divergent cost function"
                    else :
                        self.flag = "Convergent cost function"
                        self.last_iter = i
                        self.best_weights = THETA
                        self.cost_function_tr = Cost_tr
                        self.cost_function_te = Cost_te
                    break
                
                #Update weights
                if self.algo == "batch" :
                    #Compute changes in the weights (with momentum)
                    NewChange = ()
                    for k in range(n+1) :
                        NewChange = (NewChange + (self.alpha*G_tr[k] + self.momentum*Change[k],))

                    #Update weights
                    for k in range(n+1) :
                        THETA = (THETA + (THETA[0] - NewChange[k],))[1:]

                    #Update change
                    Change = NewChange
                elif self.algo == "adam":
                    #Adam algorithm without momentum
                    if self.momentum != 0.0 :
                        raise ValueError("Set the momentum to zero for the Adam algorithm")
                    #Compute first and second momenta
                    NewM_beta = ()
                    NewV_beta = ()
                    for k in range(n+1) :
                        NewM_beta = (NewM_beta + (beta1*M_beta[k] + (1-beta1)*G_tr[k],))
                        NewV_beta = (NewV_beta + (beta2*V_beta[k] + (1-beta2)*(G_tr[k]**2),))
                    
                    #Bias correction of the momenta (with static decay of the beta parameters)
                    Mhat = tuple([x/(1 - beta1**(i+1)) for x in NewM_beta])
                    Vhat = tuple([x/(1 - beta2**(i+1)) for x in NewV_beta])
                    
                    #Update weights
                    for k in range(n+1) :
                        THETA = (THETA + (THETA[0] - self.alpha*Mhat[k]/(np.sqrt(Vhat[k]) + epsilon),))[1:]
                    
                    #Update momenta
                    M_beta = NewM_beta
                    V_beta = NewV_beta
                else :
                    raise ValueError("The algorithm used for optimizing the weights is not valid")
                    
            self.last_iter = i
            self.best_weights = THETA
            self.cost_function_tr = Cost_tr
            self.cost_function_te = Cost_te
        
    
    def Predict(self, X) :
        """
        The function returns the corresponding predicted class using the best weights found with training. 
        This is specific of a binary classification problem. In the case of regression, it return only the predicted values
        - X           input matrix (data with no target column)
        - T           tuple containing n matrices (i.e. weights)
        """

        Final = NeuralNet.Forward_propagation(self, X, self.best_weights)
        if self.task == "classification" :
            Predictions = []
            for i in np.arange(0,len(Final)) :
                Predictions.append(np.argmax(Final[i,:]))
            return Predictions
        else :
            return Final
    
    
    def Predict_proba(self, X) :
        """
        The function returns the final probabilities of getting the positive class and the corresponding predicted class
        This is specific of a binary classification problem. 
        - X           input matrix (data with no target column)
        - T           tuple containing n matrices (i.e. weights)
        """
        
        Final = NeuralNet.Forward_propagation(self, X, self.best_weights)
        if self.task == "classification" :
            Predictions = []
            for i in np.arange(0,len(Final)) :
                Predictions.append(np.argmax(Final[i,:]))
            return Final[:,1]
        else :
            raise ValueError("No probabilities for %s" % self.task)

.

.

.

.

.

## **Datasets for classification and regression**

The dataset for classification has been retrieved from https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.names : it is about the *Pima Indians Diabetes*.

The dataset for regression is taken from the repository https://github.com/SamuComqi92/Car_price_prediction ("Car price" - the training and test sets has been already corrected and scaled)

In [3]:
#Import the data
#CLASSIFICATION
Data_class = pd.read_csv("dati2")

#Create a balanced dataset (268 records for each class)
Data_class_bal = pd.concat([Data_class[Data_class.Class==0].sample(268, random_state=0),Data_class[Data_class.Class==1]])

#Creation of X and y
X_class = Data_class_bal.drop(["Class"],axis=1)
y_class = Data_class_bal.Class
X_class = np.array(X_class)
y_class = np.array(y_class)

#Creation of training and test sets
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class,y_class,test_size=0.3,random_state=0)

#Feature scaling
scaler = MinMaxScaler()
scaler.fit(X_train_class)
X_train_class = scaler.transform(X_train_class)
X_test_class = scaler.transform(X_test_class)

In [4]:
#REGRESSION
Data_train_regr = pd.read_csv("Car_Dataset_train.csv")
Data_test_regr = pd.read_csv("Car_Dataset_test.csv")
Data_train_regr = Data_train_regr.iloc[:,1:]
Data_test_regr = Data_test_regr.iloc[:,1:]

#Shape of the datasets
print(Data_train_regr.shape, Data_test_regr.shape)

#Creation of X and y (for training and testing)
X_train_regr = Data_train_regr.drop("CarPrice",axis=1)
y_train_regr = Data_train_regr.CarPrice
X_test_regr = Data_test_regr.drop("CarPrice",axis=1)
y_test_regr = Data_test_regr.CarPrice

(194, 18) (11, 18)


.

.

.

## **Training: classification & regression (example)**

In [5]:
#Classification
X_train_t, X_test_t, y_train_t, y_test_t = train_test_split(X_train_class,y_train_class,test_size = 0.2,random_state=0)



#Batch algorithm
print("Batch GD algorithm")
Model = NeuralNet(task="classification", function = "sigmoid", Hidden_layers = (6,), 
              algo = "batch", alpha = 0.2, regularization = "ridge", Lambda = 0.0,
              Max_iter = 1000, momentum = 0.0, early_stopping = True, 
              random_state = 42, verbose = 1)

Model.Training(X_train_t, y_train_t, X_test_t, y_test_t)

#Batch + Momentum algorithm
print("\nBatch + Momentum GD algorithm")
Model = NeuralNet(task="classification", function = "sigmoid", Hidden_layers = (6,), 
              algo = "batch", alpha = 0.2, regularization = "ridge", Lambda = 0.0,
              Max_iter = 1000, momentum = 0.85, early_stopping = True, 
              random_state = 42, verbose = 1)

Model.Training(X_train_t, y_train_t, X_test_t, y_test_t)

#Adam algorithm
print("\nAdam GD algorithm")
Model = NeuralNet(task="classification", function = "sigmoid", Hidden_layers = (6,), 
              algo = "adam", alpha = 0.2, regularization = "ridge", Lambda = 0.0,
              Max_iter = 1000, momentum = 0.0, early_stopping = True, 
              random_state = 42, verbose = 1)

Model.Training(X_train_t, y_train_t, X_test_t, y_test_t)

Batch GD algorithm
Iteration: 999/1000 ----- Training cost: 0.49454 - Validation cost: 0.53867 --- Training accuracy: 0.74333 - Validation accuracy: 0.66667
Batch + Momentum GD algorithm
Iteration: 748/1000 ----- Training cost: 0.44556 - Validation cost: 0.48758 --- Training accuracy: 0.76000 - Validation accuracy: 0.72000
Adam GD algorithm
Iteration: 76/1000 ----- Training cost: 0.43617 - Validation cost: 0.46066 --- Training accuracy: 0.75000 - Validation accuracy: 0.72000

In [6]:
#Regression
X_train_t, X_test_t, y_train_t, y_test_t = train_test_split(X_train_regr,y_train_regr,test_size = 0.2,random_state=0)



#Batch algorithm
print("Batch GD algorithm")
Model = NeuralNet(task="regression", function = "sigmoid", Hidden_layers = (7,3), 
              algo = "batch", alpha = 0.1, regularization = "ridge", Lambda = 0.0,
              Max_iter = 1000, momentum = 0.0, early_stopping = True, 
              random_state = 42, verbose = 1)

Model.Training(X_train_t, y_train_t, X_test_t, y_test_t)

#Batch + Momentum algorithm
print("\nBatch + Momentum GD algorithm")
Model = NeuralNet(task="regression", function = "sigmoid", Hidden_layers = (7,3), 
              algo = "batch", alpha = 0.1, regularization = "ridge", Lambda = 0.0,
              Max_iter = 1000, momentum = 0.85, early_stopping = True, 
              random_state = 42, verbose = 1)

Model.Training(X_train_t, y_train_t, X_test_t, y_test_t)

#Adam algorithm
print("\nAdam GD algorithm")
Model = NeuralNet(task="regression", function = "relu", Hidden_layers = (7,3), 
              algo = "adam", alpha = 0.1, regularization = "ridge", Lambda = 0.0,
              Max_iter = 1000, momentum = 0.0, early_stopping = True, 
              random_state = 42, verbose = 1)

Model.Training(X_train_t, y_train_t, X_test_t, y_test_t)

Batch GD algorithm
Iteration: 999/1000 ----- Training cost: 0.00389 - Validation cost: 0.00433 --- Training RMSE: 0.08820 - Validation RMSE: 0.09303
Batch + Momentum GD algorithm
Iteration: 999/1000 ----- Training cost: 0.00305 - Validation cost: 0.00330 --- Training RMSE: 0.07812 - Validation RMSE: 0.08122
Adam GD algorithm
Iteration: 452/1000 ----- Training cost: 0.00273 - Validation cost: 0.00286 --- Training RMSE: 0.07391 - Validation RMSE: 0.07560