In [31]:
import numpy as np
import pickle

In [32]:
def xavier_init(size, gain = 1.0):
    """
    Xavier initialization of network weights.

    Arguments:
        - size {tuple} -- size of the network to initialise.
        - gain {float} -- gain for the Xavier initialisation.

    Returns:
        {np.ndarray} -- values of the weights.
    """
    low = -gain * np.sqrt(6.0 / np.sum(size))
    high = gain * np.sqrt(6.0 / np.sum(size))
    return np.random.uniform(low=low, high=high, size=size)

In [33]:
class Layer:
    """
    Abstract layer class.
    """

    def __init__(self, *args, **kwargs):
        raise NotImplementedError()

    def forward(self, *args, **kwargs):
        raise NotImplementedError()

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

    def backward(self, *args, **kwargs):
        raise NotImplementedError()

    def update_params(self, *args, **kwargs):
        pass

In [34]:
class MSELossLayer(Layer):
    """
    MSELossLayer: Computes mean-squared error between y_pred and y_target.
    """

    def __init__(self):
        self._cache_current = None

    @staticmethod
    def _mse(y_pred, y_target):
        return np.mean((y_pred - y_target) ** 2)

    @staticmethod
    def _mse_grad(y_pred, y_target):
        return 2 * (y_pred - y_target) / len(y_pred)

    def forward(self, y_pred, y_target):
        self._cache_current = y_pred, y_target
        return self._mse(y_pred, y_target)

    def backward(self):
        return self._mse_grad(*self._cache_current)

In [35]:
class CrossEntropyLossLayer(Layer):
    """
    CrossEntropyLossLayer: Computes the softmax followed by the negative 
    log-likelihood loss.
    """

    def __init__(self):
        self._cache_current = None

    @staticmethod
    def softmax(x):
        numer = np.exp(x - x.max(axis=1, keepdims=True))
        denom = numer.sum(axis=1, keepdims=True)
        return numer / denom

    def forward(self, inputs, y_target):
        assert len(inputs) == len(y_target)
        n_obs = len(y_target)
        probs = self.softmax(inputs)
        self._cache_current = y_target, probs

        out = -1 / n_obs * np.sum(y_target * np.log(probs))
        return out

    def backward(self):
        y_target, probs = self._cache_current
        n_obs = len(y_target)
        return -1 / n_obs * (y_target - probs)

In [36]:
class SigmoidLayer(Layer):
    """
    SigmoidLayer: Applies sigmoid function elementwise.
    """

    def __init__(self):
        """ 
        Constructor of the Sigmoid layer.
        """
        self._cache_current = None

    def forward(self, x):
        """ 
        Performs forward pass through the Sigmoid layer.

        Logs information needed to compute gradient at a later stage in
        `_cache_current`.

        Arguments:
            x {np.ndarray} -- Input array of shape (batch_size, n_in).

        Returns:
            {np.ndarray} -- Output array of shape (batch_size, n_out)
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        output = 1/(1 + np.exp(-x))
        self._cache_current = output
        return output

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def backward(self, grad_z):
        """
        Given `grad_z`, the gradient of some scalar (e.g. loss) with respect to
        the output of this layer, performs back pass through the layer (i.e.
        computes gradients of loss with respect to parameters of layer and
        inputs of layer).

        Arguments:
            grad_z {np.ndarray} -- Gradient array of shape (batch_size, n_out).

        Returns:
            {np.ndarray} -- Array containing gradient with respect to layer
                input, of shape (batch_size, n_in).
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        output = self._cache_current
        return grad_z * (1 - output) * output

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

I believe this is how it works. Because, we are given $\frac{\partial Loss}{\partial Z}$, and we want to obtain $\frac{\partial Loss}{\partial X} = \frac{\partial Loss}{\partial Z} \cdot \frac{\partial Z}{\partial X}$. This is equal to multiplying the **grad_z** ($\frac{\partial Loss}{\partial Z}$) by the derivative of the sigmoid function ($\frac{\partial Z}{\partial X}$).

In [37]:
s = SigmoidLayer()
a = np.array([2])
print(s.forward(a))
print(s.backward(1))

[0.88079708]
[0.10499359]


In [38]:
import torch

In [39]:
sigmoid = torch.nn.Sigmoid()
t = torch.Tensor([2])
l = torch.nn.Parameter(t)
sigmoid(l).backward()
print(l.grad)

tensor([0.1050])


  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [40]:
sigmoid.

SyntaxError: invalid syntax (3303402067.py, line 1)

In [None]:
a = torch.transpose(t, 0, 1)
a

In [41]:
a * value_loss

NameError: name 'value_loss' is not defined

In [42]:
class ReluLayer(Layer):
    """
    ReluLayer: Applies Relu function elementwise.
    """

    def __init__(self):
        """
        Constructor of the Relu layer.
        """
        self._cache_current = None

    def forward(self, x):
        """ 
        Performs forward pass through the Relu layer.

        Logs information needed to compute gradient at a later stage in
        `_cache_current`.

        Arguments:
            x {np.ndarray} -- Input array of shape (batch_size, n_in).

        Returns:
            {np.ndarray} -- Output array of shape (batch_size, n_out)
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        output =  x
        output[output<=0] = 0
        self._cache_current = output
        return output

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def backward(self, grad_z):
        """
        Given `grad_z`, the gradient of some scalar (e.g. loss) with respect to
        the output of this layer, performs back pass through the layer (i.e.
        computes gradients of loss with respect to parameters of layer and
        inputs of layer).

        Arguments:
            grad_z {np.ndarray} -- Gradient array of shape (batch_size, n_out).

        Returns:
            {np.ndarray} -- Array containing gradient with respect to layer
                input, of shape (batch_size, n_in).
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        relu_derivative = self._cache_current
        relu_derivative[relu_derivative > 0] = 1
        return grad_z * relu_derivative

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

In [43]:
a = np.array([1, -2, 5, 0])
rl = ReluLayer()
print(rl.forward(a))
rl.backward(1)

[1 0 5 0]


array([1, 0, 1, 0])

In [44]:
import torch
l = torch.nn.Linear(5, 3)
a = torch.Tensor([1, 0, 0, 0, 0])
print(l.weight)
l(a)

Parameter containing:
tensor([[ 0.0968,  0.0787,  0.3657, -0.2421,  0.2171],
        [ 0.3219,  0.1358, -0.0431, -0.2225, -0.3177],
        [-0.0509, -0.4332,  0.2034, -0.0082, -0.1672]], requires_grad=True)


tensor([ 0.1197,  0.4997, -0.3725], grad_fn=<ViewBackward0>)

In [45]:
a = np.array([1, 2])
b = np.array([[2], [1]])
np.matmul(a, b)

array([4])

In [46]:
a.shape

(2,)

In [47]:
c = np.array([[1, 2], [3, 4]])
c

array([[1, 2],
       [3, 4]])

In [48]:
c.transpose()

array([[1, 3],
       [2, 4]])

In [49]:
c.shape[1]

2

In [50]:
initial_biases = np.random.rand(1, 5)
np.concatenate([initial_biases] * 5, 0)

array([[0.43880357, 0.95491541, 0.53560563, 0.1189112 , 0.66806565],
       [0.43880357, 0.95491541, 0.53560563, 0.1189112 , 0.66806565],
       [0.43880357, 0.95491541, 0.53560563, 0.1189112 , 0.66806565],
       [0.43880357, 0.95491541, 0.53560563, 0.1189112 , 0.66806565],
       [0.43880357, 0.95491541, 0.53560563, 0.1189112 , 0.66806565]])

In [51]:
type(np.ones(shape=(1, 5)).astype(int))

numpy.ndarray

In [52]:
a = LinearLayer(10, 1)
print(type(a))
print(isinstance(a, SigmoidLayer))

<class '__main__.LinearLayer'>
False


In [114]:
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([5, 5, 5])
a + b

array([[ 6,  7,  8],
       [ 9, 10, 11]])

In [171]:
class LinearLayer(Layer):
    """
    LinearLayer: Performs affine transformation of input.
    """

    def __init__(self, n_in, n_out):
        """
        Constructor of the linear layer.

        Arguments:
            - n_in {int} -- Number (or dimension) of inputs.
            - n_out {int} -- Number (or dimension) of outputs.
        """
        self.n_in = n_in
        self.n_out = n_out

        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        self._W = None
        self._b = None

        self._cache_current = None
        self._grad_W_current = None
        self._grad_b_current = None
        
        self._W = np.random.rand(n_in, n_out)
        self._b = np.random.rand(1, n_out)
        print(self._b)

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def forward(self, x):
        """
        Performs forward pass through the layer (i.e. returns Wx + b).

        Logs information needed to compute gradient at a later stage in
        `_cache_current`.

        Arguments:
            x {np.ndarray} -- Input array of shape (batch_size, n_in).

        Returns:
            {np.ndarray} -- Output array of shape (batch_size, n_out)
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        self._cache_current = x
        #B_matrix = np.concatenate([self._b] * len(x), 0)
        #print(B_matrix.shape)
        return np.matmul(x, self._W)  + self._b

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def backward(self, grad_z):
        """
        Given `grad_z`, the gradient of some scalar (e.g. loss) with respect to
        the output of this layer, performs back pass through the layer (i.e.
        computes gradients of loss with respect to parameters of layer and
        inputs of layer).

        Arguments:
            grad_z {np.ndarray} -- Gradient array of shape (batch_size, n_out).

        Returns:
            {np.ndarray} -- Array containing gradient with respect to layer
                input, of shape (batch_size, n_in).
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        self._grad_W_current = np.matmul(self._cache_current.transpose(), grad_z)
        self._grad_b_current = np.matmul(np.ones(shape=(1, grad_z.shape[0])), grad_z)
        return np.matmul(grad_z, self._W.transpose())

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def update_params(self, learning_rate):
        """
        Performs one step of gradient descent with given learning rate on the
        layer's parameters using currently stored gradients.

        Arguments:
            learning_rate {float} -- Learning rate of update step.
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        self._b -= learning_rate * self._grad_b_current
        self._W -= learning_rate * self._grad_W_current

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

In [54]:
a = np.array([[2, 2, 2], [3, 3, 3]])
np.ones_like(a)

array([[1, 1, 1],
       [1, 1, 1]])

In [55]:
c = np.array([[1, 2, 3], [4, 5, 6]])
c * np.ones_like(c)

array([[1, 2, 3],
       [4, 5, 6]])

In [74]:
class IdentityLayer(Layer):
    """
    IdentityLayer: Applies Identity function elementwise.
    """
    def __init__(self):
        """
        Constructor of the linear layer.

        Arguments:
            - n_in {int} -- Number (or dimension) of inputs.
            - n_out {int} -- Number (or dimension) of outputs.
        """
    
    def forward(self, x):
        """ 
        Performs forward pass through the Relu layer.

        Logs information needed to compute gradient at a later stage in
        `_cache_current`.

        Arguments:
            x {np.ndarray} -- Input array of shape (batch_size, n_in).

        Returns:
            {np.ndarray} -- Output array of shape (batch_size, n_out)
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        return x

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def backward(self, grad_z):
        """
        Given `grad_z`, the gradient of some scalar (e.g. loss) with respect to
        the output of this layer, performs back pass through the layer (i.e.
        computes gradients of loss with respect to parameters of layer and
        inputs of layer).

        Arguments:
            grad_z {np.ndarray} -- Gradient array of shape (batch_size, n_out).

        Returns:
            {np.ndarray} -- Array containing gradient with respect to layer
                input, of shape (batch_size, n_in).
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        return grad_z

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

In [75]:
def get_act_or_loss_layer(layer_name: str):
    if layer_name == 'relu':
        return ReluLayer()
    elif layer_name == 'sigmoid':
        return SigmoidLayer()
    elif layer_name == 'identity':
        return IdentityLayer()
    elif layer_name == 'cross_entropy':
        return CrossEntropyLossLayer()
    elif layer_name == 'mse':
        return MSELossLayer()

In [76]:
a = [1, 10, 5]
for i in reversed(a):
    print(i)
print(a)

5
10
1
[1, 10, 5]


In [77]:
class MultiLayerNetwork(object):
    """
    MultiLayerNetwork: A network consisting of stacked linear layers and
    activation functions.
    """

    def __init__(self, input_dim, neurons, activations):
        """
        Constructor of the multi layer network.

        Arguments:
            - input_dim {int} -- Number of features in the input (excluding 
                the batch dimension).
            - neurons {list} -- Number of neurons in each linear layer 
                represented as a list. The length of the list determines the 
                number of linear layers.
            - activations {list} -- List of the activation functions to apply 
                to the output of each linear layer.
        """
        self.input_dim = input_dim
        self.neurons = neurons
        self.activations = activations

        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        self._layers = []
        
        # Append all the layers and activation functions
        current_in_dim = self.input_dim
        for i in range (0, len(neurons)):
            self._layers.append(LinearLayer(current_in_dim, neurons[i]))
            self._layers.append(get_act_or_loss_layer(activations[i]))
            current_in_dim = neurons[i]
        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def forward(self, x):
        """
        Performs forward pass through the network.

        Arguments:
            x {np.ndarray} -- Input array of shape (batch_size, input_dim).

        Returns:
            {np.ndarray} -- Output array of shape (batch_size,
                #_neurons_in_final_layer)
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        for layer in self._layers:
            x = layer.forward(x)
        return x # Replace with your own code

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def __call__(self, x):
        return self.forward(x)

    def backward(self, grad_z):
        """
        Performs backward pass through the network.

        Arguments:
            grad_z {np.ndarray} -- Gradient array of shape (batch_size,
                #_neurons_in_final_layer).

        Returns:
            {np.ndarray} -- Array containing gradient with respect to layer
                input, of shape (batch_size, input_dim).
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        for layer in reversed(self._layers):
            grad_z = layer.backward(grad_z)
        
        return grad_z

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def update_params(self, learning_rate):
        """
        Performs one step of gradient descent with given learning rate on the
        parameters of all layers using currently stored gradients.

        Arguments:
            learning_rate {float} -- Learning rate of update step.
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        for layer in self._layers:
            if isinstance(layer, LinearLayer):
                layer.update_params(learning_rate)

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

In [78]:
def save_network(network, fpath):
    """
    Utility function to pickle `network` at file path `fpath`.
    """
    with open(fpath, "wb") as f:
        pickle.dump(network, f)

In [79]:
def load_network(fpath):
    """
    Utility function to load network found at file path `fpath`.
    """
    with open(fpath, "rb") as f:
        network = pickle.load(f)
    return network

In [80]:
import random

In [81]:
indices = random.sample(range(3), k=3)
indices

[2, 1, 0]

In [64]:
print(type(indices))

<class 'list'>


In [65]:
a = np.array([1, 2, 3])
a[[2, 1, 0]]

array([3, 2, 1])

In [128]:
def split_into_batches(input_dataset, target_dataset, batch_size):
    index_start = 0
    index_end = batch_size
    input_batches, target_batches = [], []
    while index_end <= len(input_dataset):
        input_batches.append(input_dataset[index_start:index_end])
        target_batches.append(target_dataset[index_start:index_end])
        index_start += batch_size
        index_end += batch_size
    return input_batches, target_batches

In [129]:
a = np.array([1, 2, 3, 4, 5, 6, 7, 8])
b = np.array([1, 2, 3, 4, 5, 6, 7, 8])
print(len(b))
a_batches, b_batches = split_into_batches(a, b, 2)
print(a_batches)
print(b_batches)

8
[array([1, 2]), array([3, 4]), array([5, 6]), array([7, 8])]
[array([1, 2]), array([3, 4]), array([5, 6]), array([7, 8])]


In [165]:
class Trainer(object):
    """
    Trainer: Object that manages the training of a neural network.
    """

    def __init__(
        self,
        network,
        batch_size,
        nb_epoch,
        learning_rate,
        loss_fun,
        shuffle_flag,
    ):
        """
        Constructor of the Trainer.

        Arguments:
            - network {MultiLayerNetwork} -- MultiLayerNetwork to be trained.
            - batch_size {int} -- Training batch size.
            - nb_epoch {int} -- Number of training epochs.
            - learning_rate {float} -- SGD learning rate to be used in training.
            - loss_fun {str} -- Loss function to be used. Possible values: mse,
                cross_entropy.
            - shuffle_flag {bool} -- If True, training data is shuffled before
                training.
        """
        self.network = network
        self.batch_size = batch_size
        self.nb_epoch = nb_epoch
        self.learning_rate = learning_rate
        self.loss_fun = loss_fun
        self.shuffle_flag = shuffle_flag

        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        self._loss_layer = get_act_or_loss_layer(self.loss_fun)
        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    @staticmethod
    def shuffle(input_dataset, target_dataset):
        """
        Returns shuffled versions of the inputs.

        Arguments:
            - input_dataset {np.ndarray} -- Array of input features, of shape
                (#_data_points, n_features) or (#_data_points,).
            - target_dataset {np.ndarray} -- Array of corresponding targets, of
                shape (#_data_points, #output_neurons).

        Returns: 
            - {np.ndarray} -- shuffled inputs.
            - {np.ndarray} -- shuffled_targets.
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        # Shuffle indices
        shuffled_indices = random.sample(range(len(input_dataset)), k=len(input_dataset))
        
        # Get the dataset in the order of the shuffled indices
        shuffled_inputs = input_dataset[shuffled_indices]
        shuffled_targets = target_dataset[shuffled_indices]
        
        return shuffled_inputs, shuffled_targets 

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def train(self, input_dataset, target_dataset):
        """
        Main training loop. Performs the following steps `nb_epoch` times:
            - Shuffles the input data (if `shuffle` is True)
            - Splits the dataset into batches of size `batch_size`.
            - For each batch:
                - Performs forward pass through the network given the current
                batch of inputs.
                - Computes loss.
                - Performs backward pass to compute gradients of loss with
                respect to parameters of network.
                - Performs one step of gradient descent on the network
                parameters.

        Arguments:
            - input_dataset {np.ndarray} -- Array of input features, of shape
                (#_training_data_points, n_features).
            - target_dataset {np.ndarray} -- Array of corresponding targets, of
                shape (#_training_data_points, #output_neurons).
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        # Shuffle dataset if required
        if self.shuffle_flag:
            input_dataset, target_dataset = self.shuffle(input_dataset, target_dataset)
        
        # Split dataset into batches
        input_batches, target_batches = split_into_batches(input_dataset, target_dataset, self.batch_size)
        
        # Repeat the process for the chosen number of epochs
        for i in range(self.nb_epoch):
            running_loss = 0
            # Obtain the output, compute the loss and perform gradient descent for each batch
            for input_batch, target_batch in zip(input_batches, target_batches):
                output = self.network(input_batch)
                loss = self._loss_layer(output, target_batch)
                self.network.backward(self._loss_layer.backward())
                self.network.update_params(self.learning_rate)
                running_loss += loss
            
            print(f'Epoch {i+1} => Avg Loss = {running_loss/len(input_batches)}')

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def eval_loss(self, input_dataset, target_dataset):
        """
        Function that evaluate the loss function for given data. Returns
        scalar value.

        Arguments:
            - input_dataset {np.ndarray} -- Array of input features, of shape
                (#_evaluation_data_points, n_features).
            - target_dataset {np.ndarray} -- Array of corresponding targets, of
                shape (#_evaluation_data_points, #output_neurons).
        
        Returns:
            a scalar value -- the loss
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        output = self.network(input_dataset)
        loss = self._loss_layer(output, target_dataset)
        return loss

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

In [166]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([['A'], ['B'], ['C']])
a_s, b_s = Trainer.shuffle(a, b)
print(a_s)
print(b_s)

[[1 2 3]
 [7 8 9]
 [4 5 6]]
[['A']
 ['C']
 ['B']]


In [167]:
class Preprocessor(object):
    """
    Preprocessor: Object used to apply "preprocessing" operation to datasets.
    The object can also be used to revert the changes.
    """

    def __init__(self, data):
        """
        Initializes the Preprocessor according to the provided dataset.
        (Does not modify the dataset.)

        Arguments:
            data {np.ndarray} dataset used to determine the parameters for
            the normalization.
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        self._max_value = data.max()
        self._min_value = data.min()

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def apply(self, data):
        """
        Apply the pre-processing operations to the provided dataset.

        Arguments:
            data {np.ndarray} dataset to be normalized.

        Returns:
            {np.ndarray} normalized dataset.
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        return (data - self._min_value) / (self._max_value - self._min_value)

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

    def revert(self, data):
        """
        Revert the pre-processing operations to retrieve the original dataset.

        Arguments:
            data {np.ndarray} dataset for which to revert normalization.

        Returns:
            {np.ndarray} reverted dataset.
        """
        #######################################################################
        #                       ** START OF YOUR CODE **
        #######################################################################
        return data * (self._max_value - self._min_value) + self._min_value

        #######################################################################
        #                       ** END OF YOUR CODE **
        #######################################################################

In [168]:
a = np.array([[-2,  0], [8, 6]])
prep = Preprocessor(a)
a_preprocessed = prep.apply(a)
print(a_preprocessed)
a_reverted = prep.revert(a_preprocessed)
print(a_reverted)

[[0.  0.2]
 [1.  0.8]]
[[-2.  0.]
 [ 8.  6.]]


In [173]:
def example_main():
    input_dim = 4
    neurons = [16, 3]
    activations = ["relu", "identity"]
    net = MultiLayerNetwork(input_dim, neurons, activations)

    dat = np.loadtxt("iris.dat")
    np.random.shuffle(dat)

    x = dat[:, :4]
    y = dat[:, 4:]

    split_idx = int(0.8 * len(x))

    x_train = x[:split_idx]
    y_train = y[:split_idx]
    x_val = x[split_idx:]
    y_val = y[split_idx:]

    prep_input = Preprocessor(x_train)

    x_train_pre = prep_input.apply(x_train)
    x_val_pre = prep_input.apply(x_val)

    trainer = Trainer(
        network=net,
        batch_size=8,
        nb_epoch=1000,
        learning_rate=0.01,
        loss_fun="cross_entropy",
        shuffle_flag=True,
    )

    trainer.train(x_train_pre, y_train)
    print(f"Train loss = {trainer.eval_loss(x_train_pre, y_train)}")
    print(f"Validation loss = {trainer.eval_loss(x_val_pre, y_val)}")

    preds = net(x_val_pre).argmax(axis=1).squeeze()
    targets = y_val.argmax(axis=1).squeeze()
    accuracy = (preds == targets).mean()
    print(f"Validation accuracy: {accuracy}")

In [176]:
example_main()

[[0.01410092 0.8506121  0.38523912 0.01377887 0.20025755 0.53115905
  0.6210272  0.80000277 0.76799672 0.00503631 0.37628902 0.07971439
  0.49429395 0.32952723 0.40066377 0.88464664]]
[[0.05765671 0.63794632 0.68076062]]
Epoch 1 => Avg Loss = 1.185310100220246
Epoch 2 => Avg Loss = 1.1646673459624408
Epoch 3 => Avg Loss = 1.1555867204139214
Epoch 4 => Avg Loss = 1.1469654529511792
Epoch 5 => Avg Loss = 1.1385010658257713
Epoch 6 => Avg Loss = 1.1301631466886586
Epoch 7 => Avg Loss = 1.1219363809781728
Epoch 8 => Avg Loss = 1.1138079213685037
Epoch 9 => Avg Loss = 1.1057660348814797
Epoch 10 => Avg Loss = 1.097799886505313
Epoch 11 => Avg Loss = 1.0898994729343618
Epoch 12 => Avg Loss = 1.0820555802493106
Epoch 13 => Avg Loss = 1.0742597481942338
Epoch 14 => Avg Loss = 1.0665042382916716
Epoch 15 => Avg Loss = 1.058782005038373
Epoch 16 => Avg Loss = 1.0510866697202303
Epoch 17 => Avg Loss = 1.043412496446596
Epoch 18 => Avg Loss = 1.0357543700322274
Epoch 19 => Avg Loss = 1.02810777537

Epoch 240 => Avg Loss = 0.2961420043047313
Epoch 241 => Avg Loss = 0.2951653922616364
Epoch 242 => Avg Loss = 0.2941943998269547
Epoch 243 => Avg Loss = 0.2932289883263656
Epoch 244 => Avg Loss = 0.29226911952077
Epoch 245 => Avg Loss = 0.29131475559187936
Epoch 246 => Avg Loss = 0.2903658591289569
Epoch 247 => Avg Loss = 0.289422393116442
Epoch 248 => Avg Loss = 0.2884843209222924
Epoch 249 => Avg Loss = 0.28755160628692755
Epoch 250 => Avg Loss = 0.2866236321508983
Epoch 251 => Avg Loss = 0.28570296177581983
Epoch 252 => Avg Loss = 0.2847880256377231
Epoch 253 => Avg Loss = 0.28387807729919234
Epoch 254 => Avg Loss = 0.28297326864020655
Epoch 255 => Avg Loss = 0.282073588179817
Epoch 256 => Avg Loss = 0.2811790034780805
Epoch 257 => Avg Loss = 0.280289480187248
Epoch 258 => Avg Loss = 0.2794049843708358
Epoch 259 => Avg Loss = 0.27852548264844257
Epoch 260 => Avg Loss = 0.2776509421111033
Epoch 261 => Avg Loss = 0.2767813302447092
Epoch 262 => Avg Loss = 0.27591661487781266
Epoch 263

Epoch 504 => Avg Loss = 0.15760603513951102
Epoch 505 => Avg Loss = 0.15734283462426524
Epoch 506 => Avg Loss = 0.15708068483394463
Epoch 507 => Avg Loss = 0.1568195798593713
Epoch 508 => Avg Loss = 0.15655951383629227
Epoch 509 => Avg Loss = 0.15630048094428578
Epoch 510 => Avg Loss = 0.1560424754057733
Epoch 511 => Avg Loss = 0.15578549148516274
Epoch 512 => Avg Loss = 0.15552941811184076
Epoch 513 => Avg Loss = 0.15527464222389176
Epoch 514 => Avg Loss = 0.15502084571928695
Epoch 515 => Avg Loss = 0.15476903970292227
Epoch 516 => Avg Loss = 0.15451630560562116
Epoch 517 => Avg Loss = 0.1542664280618476
Epoch 518 => Avg Loss = 0.1540163665663515
Epoch 519 => Avg Loss = 0.1537666717541103
Epoch 520 => Avg Loss = 0.15351981267355308
Epoch 521 => Avg Loss = 0.15327303441921242
Epoch 522 => Avg Loss = 0.15302615489766372
Epoch 523 => Avg Loss = 0.15278240823868192
Epoch 524 => Avg Loss = 0.15253876353561027
Epoch 525 => Avg Loss = 0.1522949904293151
Epoch 526 => Avg Loss = 0.152054025223

Epoch 763 => Avg Loss = 0.11311802711488926
Epoch 764 => Avg Loss = 0.1130067108673482
Epoch 765 => Avg Loss = 0.11289569987370304
Epoch 766 => Avg Loss = 0.11278499293227198
Epoch 767 => Avg Loss = 0.11267458884759113
Epoch 768 => Avg Loss = 0.11256448643037256
Epoch 769 => Avg Loss = 0.11245468449746328
Epoch 770 => Avg Loss = 0.11234518187180374
Epoch 771 => Avg Loss = 0.11223597738238876
Epoch 772 => Avg Loss = 0.11212706986422687
Epoch 773 => Avg Loss = 0.11201845815830191
Epoch 774 => Avg Loss = 0.11191014111153387
Epoch 775 => Avg Loss = 0.11180211757674091
Epoch 776 => Avg Loss = 0.11169438641260161
Epoch 777 => Avg Loss = 0.11158694648361724
Epoch 778 => Avg Loss = 0.11147979666007472
Epoch 779 => Avg Loss = 0.1113729358180103
Epoch 780 => Avg Loss = 0.11126636283917295
Epoch 781 => Avg Loss = 0.11116026252724429
Epoch 782 => Avg Loss = 0.1110540096773179
Epoch 783 => Avg Loss = 0.11094853432781895
Epoch 784 => Avg Loss = 0.1108428625780005
Epoch 785 => Avg Loss = 0.1107379576