# Programming Assignment 4
## Neural Networks

Aluno: Francisco Edyvalberty Alenquer Cordeiro \
Matrícula: 518659


# Imports

In [6]:
import numpy as np
import matplotlib.pyplot as plt

# Utility Functions

## Metrics

In [7]:
def accuracy(y_true, y_pred):
    y_true = y_true.reshape(-1, 1)
    y_pred = y_pred.reshape(-1, 1)

    right_prediction = y_true == y_pred
    accuracy = right_prediction.sum() / len(y_true)
    return accuracy

def recall(y_true, y_pred):
    y_true = y_true.reshape(-1, 1)
    y_pred = y_pred.reshape(-1, 1)
    array = np.hstack([y_true, y_pred])
    array = array[array[:,0] == 1]
    
    right_prediction = array[:, 0] == array[:, 1]
    recall = right_prediction.sum() / len(array)
    return recall

def precision(y_true, y_pred):
    y_true = y_true.reshape(-1, 1)
    y_pred = y_pred.reshape(-1, 1)
    array = np.hstack([y_true, y_pred])
    array = array[array[:,1] == 1]
    
    right_prediction = array[:, 0] == array[:, 1]
    precision = right_prediction.sum() / len(array)

    return precision

def f1_score(y_true, y_pred):
    y_true = y_true.reshape(-1, 1)
    y_pred = y_pred.reshape(-1, 1)
    precision_score = precision(y_true, y_pred)
    recall_score = recall(y_true, y_pred)

    f1_score = 2 * (precision_score * recall_score) / (precision_score + recall_score)

    return f1_score

def get_mse(y_real, y_pred):
    return np.mean((y_real - y_pred) ** 2)

def get_rmse(y_real, y_pred):
    return np.sqrt(get_mse(y_real, y_pred))

## MinMaxScaler

In [8]:
class MinMaxScaler:
    def __init__(self):
        self.fitted = False

    def fit_transform(self, data):      
        self.maximum = data.max(axis=0)
        self.minimum = data.min(axis=0)
        self.fitted = True

        scaled_data =  (data - self.minimum) / (self.maximum - self.minimum)
        return scaled_data
    
    def transform(self, data):
        if not self.fitted:
            raise Exception('Scaler not fitted!')

        scaled_data =  (data - self.minimum) / (self.maximum - self.minimum)
        return scaled_data

    def inverse_transform(self, scaled_data):
        if not self.fitted:
            raise Exception('Scaler not fitted!')
        
        original_data = (self.maximum - self.minimum) * scaled_data + self.minimum
        return original_data

## Standardization


In [9]:
class StandardScaler:
    def __init__(self):
        self.fitted = False

    def fit_transform(self, data):
        self.mean = data.mean(axis=0)
        self.std = data.std(axis=0)
        self.fitted = True

        scaled_data = (data - self.mean) / self.std
        return scaled_data

    def transform(self, data):
        if not self.fitted:
            raise Exception('Scaler not fitted!')

        scaled_data = (data - self.mean) / self.std
        return scaled_data

    def inverse_transform(self, scaled_data):
        if not self.fitted:
            raise Exception('Scaler not fitted!')

        original_data = (scaled_data * self.std) + self.mean
        return original_data


## Cross Validation

In [10]:
def get_cv_folds(data, n_folds=10, shuffle=True, random_state=12894):
    indexes = np.arange(data.shape[0])
    if shuffle:
        np.random.seed(random_state)
        np.random.shuffle(indexes)

    slices = np.array_split(indexes, n_folds)
    all_elements = np.hstack(slices)   
    
    splits = []
    for i in range(n_folds):
        train_idx = all_elements[~np.isin(all_elements, slices[i])]
        test_idx = slices[i]

        splits.append((train_idx, test_idx))

    return splits

## Train Test Split

In [11]:
def train_test_split(X, y, train_size_perc, random_seed=264852):
    
    y = y.reshape(-1, 1)
    
    N = X.shape[0]
    train_size = int(train_size_perc * N)

    indexes = np.arange(0, N, 1)

    np.random.seed(random_seed)
    train_idx = np.random.choice(indexes, train_size, replace=False)
    test_idx = np.delete(indexes, train_idx)

    X_train = X[train_idx, :]
    y_train = y[train_idx, :]
    X_test = X[test_idx, :]
    y_test = y[test_idx, :]

    return X_train, X_test, y_train, y_test


## Do Cross Validation and Get Metrics

In [12]:
def do_cv_and_get_metrics(classifier, cv_splits, X_train, y_train, X_test, title='Classifier', scaler=None):

    X_train = X_train.copy()
    y_train = y_train.copy()
    X_test = X_test.copy()

    train_metrics = {
        'accuracy': [],
        'recall': [],
        'precision': [],
        'f1_score': []
    }

    valid_metrics = {
        'accuracy': [],
        'recall': [],
        'precision': [],
        'f1_score': []
    }
    # Reporting results
    print('#' + f'{title}'.center(60, '-') + '#')

    print('\n---> Validation Folds Metrics')
    print('Fold\tAccuracy\tRecall\t\tPrecision\tF1-Score')
    count_fold = 1
    for train_idx, val_idx in cv_splits:
        # Spliting data
        X_train_cv = X_train[train_idx, :]
        y_train_cv = y_train[train_idx, :]
        X_val_cv = X_train[val_idx, :]
        y_val_cv = y_train[val_idx, :]

        # Scaling if have scaler argument
        if scaler is not None:
            X_train_cv = scaler.fit_transform(X_train_cv)
            X_val_cv = scaler.transform(X_val_cv)

        # Training Model
        classifier.fit(X_train_cv, y_train_cv.ravel())

        # Predictions
        y_train_cv_pred = classifier.predict(X_train_cv)
        y_val_cv_pred = classifier.predict(X_val_cv)

        # Storing metrics
        train_metrics['accuracy'].append(accuracy(y_train_cv, y_train_cv_pred))
        train_metrics['recall'].append(recall(y_train_cv, y_train_cv_pred))
        train_metrics['precision'].append(precision(y_train_cv, y_train_cv_pred))
        train_metrics['f1_score'].append(f1_score(y_train_cv, y_train_cv_pred))

        valid_metrics['accuracy'].append(accuracy(y_val_cv, y_val_cv_pred))
        valid_metrics['recall'].append(recall(y_val_cv, y_val_cv_pred))
        valid_metrics['precision'].append(precision(y_val_cv, y_val_cv_pred))
        valid_metrics['f1_score'].append(f1_score(y_val_cv, y_val_cv_pred))

        print('{0:.0f}\t{1:.4f}  \t{2:.4f}\t\t{3:.4f}   \t{4:.4f}'.format(
                count_fold,
                valid_metrics['accuracy'][-1], 
                valid_metrics['recall'][-1],
                valid_metrics['precision'][-1],
                valid_metrics['f1_score'][-1]
            )
        )
        count_fold+=1


    print('\n--->\tTraining Metrics')

    print('Accuracy Mean:     \t{0:.4f} | Accuracy Std:   \t{1:.4f}'.format(
        np.mean(train_metrics['accuracy']), 
        np.std(train_metrics['accuracy']))
    )
    print('Recall Mean:     \t{0:.4f} | Recall Std:       \t{1:.4f}'.format(
        np.mean(train_metrics['recall']), 
        np.std(train_metrics['recall']))
    )
    print('Precision Mean:     \t{0:.4f} | Precision Std:   \t{1:.4f}'.format(
        np.mean(train_metrics['precision']), 
        np.std(train_metrics['precision']))
    )
    print('F1 Score Mean:     \t{0:.4f} | F1 Score Std:   \t{1:.4f}'.format(
        np.mean(train_metrics['f1_score']), 
        np.std(train_metrics['f1_score']))
    )

    print('\n--->\tValidation Metrics')

    print('Accuracy Mean:     \t{0:.4f} | Accuracy Std:   \t{1:.4f}'.format(
        np.mean(valid_metrics['accuracy']), 
        np.std(valid_metrics['accuracy']))
    )
    print('Recall Mean:     \t{0:.4f} | Recall Std:       \t{1:.4f}'.format(
        np.mean(valid_metrics['recall']), 
        np.std(valid_metrics['recall']))
    )
    print('Precision Mean:     \t{0:.4f} | Precision Std:   \t{1:.4f}'.format(
        np.mean(valid_metrics['precision']), 
        np.std(valid_metrics['precision']))
    )
    print('F1 Score Mean:     \t{0:.4f} | F1 Score Std:   \t{1:.4f}'.format(
        np.mean(valid_metrics['f1_score']), 
        np.std(valid_metrics['f1_score']))
    )

    print('\n--->\tTest Metrics')

    if scaler is not None:
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
    
    classifier.fit(X_train, y_train.ravel())
    y_test_pred = classifier.predict(X_test)

    print('Accuracy:     \t{0:.4f}'.format(accuracy(y_test, y_test_pred)))
    print('Recall:     \t{0:.4f}'.format(recall(y_test, y_test_pred)))
    print('Precision:     \t{0:.4f}'.format(precision(y_test, y_test_pred)))
    print('F1 Score:     \t{0:.4f}'.format(f1_score(y_test, y_test_pred)))


# Task 1 - MLP (Regression)

In [13]:
data = np.genfromtxt('../data/concrete.csv', delimiter=',')
X = data[:, :-1]
y = data[:, -1]
print('Shape:', data.shape)
data[:3, :]


Shape: (1030, 9)


array([[ 540.  ,    0.  ,    0.  ,  162.  ,    2.5 , 1040.  ,  676.  ,
          28.  ,   79.99],
       [ 540.  ,    0.  ,    0.  ,  162.  ,    2.5 , 1055.  ,  676.  ,
          28.  ,   61.89],
       [ 332.5 ,  142.5 ,    0.  ,  228.  ,    0.  ,  932.  ,  594.  ,
         270.  ,   40.27]])

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 0.8, random_seed=466852
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, 0.75, random_seed=654824
)

print('Number of Rows by Split')
print('X_train: {} ({}%)'.format(X_train.shape[0], X_train.shape[0]/data.shape[0]*100))
print('X_test:  {} ({}%)'.format(X_test.shape[0], X_test.shape[0]/data.shape[0]*100))
print('X_val:   {} ({}%)'.format(X_val.shape[0], X_val.shape[0]/data.shape[0]*100))

Number of Rows by Split
X_train: 618 (60.0%)
X_test:  206 (20.0%)
X_val:   206 (20.0%)


In [505]:
class Sigmoid():
    @staticmethod
    def get_value(x):
        return 1/(1+np.exp(-x))

    @staticmethod
    def get_derivative(x):
        return Sigmoid.get_value(x) - Sigmoid.get_value(x)**2

class ReLU():
    @staticmethod
    def get_value(x):
        return np.maximum(0, x)

    @staticmethod
    def get_derivative(x):
        if x <= 0:
            return 0
        else:
            return 1



In [455]:
class MyMLP():
    def __init__(self, n_inputs, n_outputs):
        self.fitted = False
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs

        
        self.hidden_layers_weights = []
        self.hidden_layers_z = []
        self.hidden_layers_activation_functions = []

    
    def get_n_neurons_of_last_layer(self):
        if len(self.hidden_layers_weights) == 0:
            return self.n_features
        else:
            return len(self.hidden_layers_weights[-1])

    def add_hidden_layer(self, n_neurons, activation_function, random_state=81237):
        seed = np.random.RandomState(random_state)
        n_neurons_of_last_layer = self.get_n_neurons_of_last_layer()
        print(n_neurons_of_last_layer)
        if type(activation_function) == ReLU:
            new_hidden_layer = seed.normal(
                size = (n_neurons_of_last_layer+1, n_neurons)
            ) * np.sqrt(2/n_neurons_of_last_layer)
            new_hidden_layer[:,0] = 0.01
            new_hidden_layer[:, :] = 1

        elif type(activation_function) == Sigmoid:
            new_hidden_layer = seed.normal(
                size = (n_neurons, n_neurons_of_last_layer+1)
            ) * np.sqrt(1/n_neurons_of_last_layer)
            new_hidden_layer[:,0] = 0

        self.hidden_layers_weights.append(new_hidden_layer)
        self.hidden_layers_activation_functions.append(activation_function)
        self.hidden_layers_z.append([])
    
    def set_output_layer(self, activation_function=None, n_outputs=None):

        if activation_function is None:
            self.n_outputs = 1
        if type(activation_function) == Sigmoid:
            self.n_outputs = 1
        elif type(activation_function) == 'Softmax':
            if self.n_outputs is not None:
                self.n_outputs = n_outputs
            else:
                raise Exception('Parameter \'n_outputs\' not found.')
        else:
            raise Exception('Unrecognized activation function.')

        self.output_activation_function = activation_function

    def forward_propagation(self, X):
        X = np.hstack([np.ones((X.shape[0], 1)), X])
        w = self.hidden_layers_weights


        # print(X)
        # print(w)
        for i in range(len(w)):
            if i == 0:
                print(w[i])
                print(X)
                self.hidden_layers_z[i] = X @ w[i]
                print(self.hidden_layers_z[i].shape)
            else:
                print('------------')
                print(self.hidden_layers_z[i-1])
                print(self.hidden_layers_z[i-1].shape[0])
                self.hidden_layers_z[i-1] = np.hstack(
                    [
                        np.ones((self.hidden_layers_z[i-1].shape[0], 1)),
                        self.hidden_layers_z[i-1]
                    ]
                )
                print('NEW')
                print(self.hidden_layers_z[i-1])
                print(self.hidden_layers_z[i-1].shape[0])
                print(w[i])
                self.hidden_layers_z[i] =  self.hidden_layers_z[i-1] @ w[i]
            
            


    def fit(self, X, y):
        pass

    def predict(self, X):
        pass

my_mlp = MyMLP(n_features=1)
my_mlp.add_hidden_layer(n_neurons=3, activation_function=ReLU())
my_mlp.add_hidden_layer(n_neurons=2, activation_function=ReLU())
# my_mlp.add_hidden_layer(n_neurons=3)
# my_mlp.add_hidden_layer(n_neurons=3)
my_mlp.hidden_layers_weights

1
2


[array([[1., 1., 1.],
        [1., 1., 1.]]),
 array([[1., 1.],
        [1., 1.],
        [1., 1.]])]

In [678]:
class MyMLP():
    def __init__(self, n_inputs, n_outputs):
        self.fitted = False
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs

        self.n_hidden = []
        self.hidden_weights = []
        self.hidden_activation_functions = [None]

    def get_n_neurons_of_last_layer(self):
        if len(self.hidden_weights) == 0:
            return self.n_inputs
        else:
            return len(self.hidden_weights[-1])

    def add_hidden_layer(self, n_neurons, activation_function):
        self.n_hidden.append(n_neurons)
        self.hidden_activation_functions = [activation_function] + self.hidden_activation_functions


    def initialize_weights(self, random_state=8776123):
        self.hidden_weights = []
        
        seed = np.random.RandomState(random_state)

        layers = [self.n_inputs] + self.n_hidden + [self.n_outputs]
        print('Layers:', layers)

        for i in range(len(layers)-1):
            # Initialization strategies
            if type(self.hidden_activation_functions[i]) == ReLU:
                w = seed.normal(
                    size = (layers[i]+1, layers[i+1])
                ) * np.sqrt(2/layers[i])  
                w[0, :] = 0.01
                print(w.shape)
            else:
                w = seed.normal(
                    size = (layers[i]+1, layers[i+1])
                ) * np.sqrt(1/layers[i])
                print(w.shape)
                w[0, :] = 0

            self.hidden_weights.append(w)

    #TODO - Insert BIAS terms
    def forward_propagation(self, X):
        activated_values = X
        
        for i, w in enumerate(self.hidden_weights):
            activated_values = np.hstack([np.ones((activated_values.shape[0], 1)), activated_values])
            activation_function = self.hidden_activation_functions[i]            
            hidden_input = np.array(activated_values @ w)
            print('\n-------------------------')
            print('Step', i)
            print('-------------------------')
            print('\nInput', activated_values.shape)
            print(activated_values)
            print('\nWeights', w.shape)
            print(w)
            print('\nZ', hidden_input.shape)
            print(hidden_input)
            if activation_function is not None:
                activated_values = activation_function.get_value(hidden_input)
            else:
                activated_values = hidden_input



        
        return activated_values
            
    def fit(self, X, y):
        pass

    def predict(self, X):
        pass

my_mlp = MyMLP(n_inputs=2, n_outputs=1)
my_mlp.add_hidden_layer(n_neurons=4, activation_function=ReLU())
my_mlp.add_hidden_layer(n_neurons=2, activation_function=ReLU())
# my_mlp.add_hidden_layer(n_neurons=2, activation_function=ReLU())
# my_mlp.add_hidden_layer(n_neurons=3)
# my_mlp.add_hidden_layer(n_neurons=3)
my_mlp.hidden_weights

[]

In [679]:
my_mlp.initialize_weights()
my_mlp.hidden_weights

Layers: [2, 4, 2, 1]
(3, 4)
(5, 2)
(3, 1)


[array([[ 0.01      ,  0.01      ,  0.01      ,  0.01      ],
        [-0.73480594,  1.48559353, -0.52143647, -1.36089508],
        [-0.36556347, -0.0464983 , -0.72774453, -1.54059428]]),
 array([[ 0.01      ,  0.01      ],
        [ 0.59453462,  0.52016974],
        [ 0.28036132, -1.22278891],
        [-1.01657088, -1.08121814],
        [ 0.69353916,  0.0580974 ]]),
 array([[ 0.        ],
        [-1.63628029],
        [-0.13923623]])]

In [682]:
X = np.array([[2, 2], [1, 1]])
my_mlp.forward_propagation(X)


-------------------------
Step 0
-------------------------

Input (2, 3)
[[1. 2. 2.]
 [1. 1. 1.]]

Weights (3, 4)
[[ 0.01        0.01        0.01        0.01      ]
 [-0.73480594  1.48559353 -0.52143647 -1.36089508]
 [-0.36556347 -0.0464983  -0.72774453 -1.54059428]]

Z (2, 4)
[[-2.19073883  2.88819046 -2.488362   -5.7929787 ]
 [-1.09036941  1.44909523 -1.239181   -2.89148935]]

-------------------------
Step 1
-------------------------

Input (2, 5)
[[1.         0.         2.88819046 0.         0.        ]
 [1.         0.         1.44909523 0.         0.        ]]

Weights (5, 2)
[[ 0.01        0.01      ]
 [ 0.59453462  0.52016974]
 [ 0.28036132 -1.22278891]
 [-1.01657088 -1.08121814]
 [ 0.69353916  0.0580974 ]]

Z (2, 2)
[[ 0.81973688 -3.52164727]
 [ 0.41627025 -1.76193758]]

-------------------------
Step 2
-------------------------

Input (2, 3)
[[1.         0.81973688 0.        ]
 [1.         0.41627025 0.        ]]

Weights (3, 1)
[[ 0.        ]
 [-1.63628029]
 [-0.13923623]]



array([[-1.3413193],
       [-0.6811348]])