## 0. Import NumPy and Datasets

In [32]:
import numpy as np

from sklearn.datasets import load_digits, load_iris

In [322]:
# I need this constant to prevent log(0) in CrossEntropy loss
eps = 1e-7

## 1. Utility functions

### 1.1. Activation functions

I implemented 3 different activation functions:
<table>
    <thead>
        <tr>
            <th>Name</th>
            <th>Plot</th>
            <th>Equation</th>
            <th>Derivative</th>
            <th>Range</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Identity</td>
            <td><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/9e/Activation_identity.svg/240px-Activation_identity.svg.png"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f690285952308aa49e3c6aac892df31cad6d1b06"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1f82687d38aa641f513d166b138923a84d7aae86"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0c8c11c44279888c9e395eeb5f45d121348ae10a"></td>
        </tr>
        <tr>
            <td>Rectified linear unit (ReLU)</td>
            <td><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/f/fe/Activation_rectified_linear.svg/240px-Activation_rectified_linear.svg.png"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/824a1cc623637e8a5c041a4ac3fc96aa70ed88ff"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e8723cef7eb5dedf4aa20e174ee281b76a6cbec4"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8dc2d914c2df66bc0f7893bfb8da36766650fe47"></td>
        </tr>
        <tr>
            <td>Logistic (Sigmoid)</td>
            <td><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Activation_logistic.svg/240px-Activation_logistic.svg.png"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/36f792c44c0a7069ad01386452569d6e34fe95d7"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/50a861269c68b1f1b973155fa40531d83c54c562"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c79c6838e423c1ed3c7ea532a56dc9f9dae8290b"></td>
        </tr>
    </tbody>
</table>

Each one is implemented using OOP design with two mathods:
<ul>
    <li>__call__(x) - apply function to input <b>x</b>;</li>
    <li>prime(x) - apply derivative of appropriate function to the input <b>x</b>.</li>
</ul>

Also user can add their own function with appropriate derivative using provided structure.

In [356]:
class Identity:
    """
    Implementation of Identity activation function and its derivative: 
     - f(x) = x
     - f'(x) = 1
    
    Methods
    -------
    __call__(x)
        Evaluates function value at point x.
    prime(x)
        Evaluates function's derivative value at point x.
    
    """
    
    def __call__(self, x):
        """Function call method
        
        f(x) = x
        
        Parameters
        ----------
        x : np.array
            A numpy array to input to function.
        
        Returns
        -------
        np.array
            A value of the function of x.
            
        """
        
        return x
    
    def prime(self, x):
        """Derivative evalution method
        
        f'(x) = 1
        
        Parameters
        ----------
        x : np.array
            A numpy array to input to function's derivative.
        
        Returns
        -------
        np.array
            A value of the function's derivative of x.
            
        """
        
        return 1

In [341]:
class ReLU:
    """
    Implementation of ReLU activation function: 
    - f(x) = max(0, x)
    - f'(x) = 1 if x < 0 else 1
    
    Methods
    -------
    __call__(x)
        Evaluates function value at point x.
    prime(x)
        Evaluates function's derivative value at point x.
    
    """
    
    def __call__(self, x):
        """Function call method
        
        f(x) = max(0, x)
        
        Parameters
        ----------
        x : np.array
            A numpy array to input to function.
        
        Returns
        -------
        np.array
            A value of the function of x.
            
        """
        
        return np.maximum(0, x)
    
    def prime(self, x):
        """Derivative evalution method
        
        f'(x) = 1 if x < 0 else 1
        
        Parameters
        ----------
        x : np.array
            A numpy array to input to function's derivative.
        
        Returns
        -------
        np.array
            A value of the function's derivative of x.
            
        """
        
        return (x >= 0).astype('float')    

In [357]:
class Sigmoid:
    """
    Implementation of Sigmoid activation function: 
    - f(x) = 1 / (1 + exp(-x))
    - f'(x) = f(x)(1 - f(x))
    
    Attributes
    ----------
    x : np.array
        Cached value of a sigmoid function.
    
    Methods
    -------
    __call__(x)
        Evaluates function value at point x.
    prime(x)
        Evaluates function's derivative value at point x.
    
    """
    
    def __init__(self):
        self.x = None
    
    def __call__(self, x):
        """Function call method
        
        f(x) = 1 / (1 + exp(-x))
        
        Parameters
        ----------
        x : np.array
            A numpy array to input to function.
        
        Returns
        -------
        np.array
            A value of the function of x.
            
        """
        
        self.x = 1 / (1 + np.exp(-x))
        return self.x
    
    def prime(self, x):
        """Derivative evalution method
        
        f'(x) = f(x)(1 - f(x))
        
        Parameters
        ----------
        x : np.array
            A numpy array to input to function's derivative.
        
        Returns
        -------
        np.array
            A value of the function's derivative of x.
            
        """
        
        return self.x * (1 - self.x)    

<table>
    <thead>
        <tr>
            <th>Name</th>
            <th>Equation</th>
            <th>Derivative</th>
            <th>Range</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Softmax</td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6d7500d980c313da83e4117da701bf7c8f1982f5"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/81a8feb8f01aaed053c103113e3b4917f936aef0"></td>
            <td><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c79c6838e423c1ed3c7ea532a56dc9f9dae8290b"></td>
        </tr>
    </tbody>
</table>

Implementation design is the same as usual activation functions have.

In [347]:
class Softmax:
    """
    Implementation of Softmax activation function: 
    - f_i(x) = exp(x_i) / sum(exp(x_j))
    - f_i'(x) = f_i(x)(delta_ij - f_j(x))
    
    Methods
    -------
    __call__(x)
        Evaluates function value at point x.
    prime(x)
        Evaluates function's derivative value at point x.
    
    """
    
    def __call__(self, x):
        """Function call method
        
        Function implementation is numerically stable to prevent underflow or overflow.
        
        f_i(x) = exp(x_i) / sum(exp(x_j))
        
        Parameters
        ----------
        x : np.array
            A numpy array to input to function.
        
        Returns
        -------
        np.array
            A value of the function of x.
            
        """
        
        # Stabilization
        x = x - np.max(x, axis=1, keepdims=True)
        num = np.exp(x)
        den = np.sum(num, axis=1, keepdims=True)
        
        return num / den
    
    def prime(self, x):
        """Derivative evalution method
        
        Derivative returns one because we suppose to have derivative of CrossEntropy
        loss to ba calculated with association with softmax function. It is much more
        easy to implement and it is more efficient solution.
        
        f_i'(x) = f_i(x)(delta_ij - f_j(x))        
        
        Parameters
        ----------
        x : np.array
            A numpy array to input to function's derivative.
        
        Returns
        -------
        int
            1.
            
        """
        
        return 1

### 1.2. Losses

<p>I have two loss function implemented but it is easy to implement your own and use it.</p>
<p>
    <ul>
        <li>
            Regression: <img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e258221518869aa1c6561bb75b99476c4734108e"/>
        </li>
        <li>
            Classification: <img src="https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcQediZV_J9oWckQU6SMM1bwIJUF05pYb3QJQQhJ3t3YoFcax5Ve"/>
    </ul>
</p>
</p>

In [350]:
class MSE:
    """
    Implementation of Mean Squared Error loss function: 
    - L(y, y_pred) = 1 / 2n * sum((y_i - y_pred_i)^2)
    - L'(y, y_pred) = 1 / n * (y - y_pred)
    
    Methods
    -------
    __call__(y, y_pred)
        Evaluates loss value.
    prime(x)
        Evaluates function's derivative value.
    
    """
    
    def __call__(self, y, y_pred):
        """Function call method
        
        L(y, y_pred) = 1 / 2n * sum((y_i - y_pred_i)^2)
        
        Parameters
        ----------
        y : np.array
            A numpy array of real values.
        y_pred : np.array
            A numpy array of predicted values
        
        Returns
        -------
        int
            A value of the loss.
            
        """
        
        return np.mean(np.square(y - y_pred)) / 2
    
    def prime(self, y, y_pred):
        """Derivative evalution method
        
        L'(y, y_pred) = 1 / n * (y - y_pred)
        
        Parameters
        ----------
        y : np.array
            A numpy array of real values.
        y_pred : np.array
            A numpy array of predicted values
        
        Returns
        -------
        np.array
            A numpy array with appropriate derivatives.
            
        """
        
        return (y - y_pred) / len(y)

In [352]:
class CrossEntropy:
    """
    Implementation of Mean Squared Error loss function: 
    - L(y, y_pred) = -sum(y_i * log(y_pred_i))
    - L'(y, y_pred) = y_pred - y
    
    Methods
    -------
    __call__(y, y_pred)
        Evaluates loss value.
    prime(x)
        Evaluates function's derivative value.
    
    """
    
    def __call__(self, y, y_pred):
        """Function call method
        
        L(y, y_pred) = -sum(y_i * log(y_pred_i))
        
        Parameters
        ----------
        y : np.array
            A numpy array of real values.
        y_pred : np.array
            A numpy array of predicted values
        
        Returns
        -------
        int
            A value of the loss.
            
        """
        
        return -np.mean(y * np.log(y_pred + eps))

    def prime(self, y, y_pred):
        """Derivative evalution method
        
        Given derivative is calculated with dependence on softmax function.
        L'(y, y_pred) = y_pred - y
        
        Parameters
        ----------
        y : np.array
            A numpy array of real values.
        y_pred : np.array
            A numpy array of predicted values
        
        Returns
        -------
        np.array
            A numpy array with appropriate derivatives.
            
        """
        
        return y_pred - y

### 1.3. Metrics

I created only one metric: Accuracy.

In [355]:
def accuracy(y, y_pred):
    """Accuracy evalution function
        
    Parameters
    ----------
    y : np.array
        A numpy array of real values.
    y_pred : np.array
        A numpy array of predicted value.
        
    Returns
    -------
    float
        An accuracy of the prediction.
            
    """
    
    y = np.argmax(y, axis=1)
    y_pred = np.argmax(y_pred, axis=1)
    return (y == y_pred).astype('int').mean()

### 1.4. Other utility functions

I defined function which transforms vector of target values to one-hot matrix.

In [301]:
def one_hot(y):
    """Function for one-hot transformation of the target values
        
        This function transformates input vector of target values to one-hot encoded values.
        This transformation is neccessary for our Neural Network implementation.
        
        Parameters
        ----------
        y : np.array
            A numpy array of target values.
        
        Returns
        -------
        np.array
            A numpy array with one-hot encoded values.
            
        """
    
    y_one_hot = []
    maximum = max(y)
    for value in y:
        row = [1 if i == value else 0 for i in range(maximum + 1)]
        y_one_hot.append(row)
    return np.array(y_one_hot)

In [380]:
def unison_shuffled_copies(x, y):
    """Randomly shuffles the data
    
    Parameters
    ----------
    x : np.array
        Input x data.
    y : np.array
        Input y data.
        
    Returns
    -------
    tuple
        A tuple of shuffled x and y arrays.
    
    """
    p = np.random.permutation(len(x))
    return x[p], y[p]

In [399]:
def train_test_split(x, y, size=0.7):
    """Returns train/test splt data
    
    Parameters
    ----------
    x : np.array
        Input x data.
    y : np.array
        Input y data
        
    Returns
    -------
    tuple
        Contains x_train, y_train, x_test, y_test
    """
    train_num = np.ceil(len(x) * 0.7).astype(int)
    idx = np.random.permutation(len(x))
    x, y = x[idx], y[idx]
    
    return x[:train_num], y[:train_num], x[train_num:], y[train_num:]

## 2. Data Structures for Neural Network Model

<p>Our basic data structure for neural network is double-linked list. The implementation utilizes a notion of teminated node which points to the end of a list. That node is connected to the first node and last one.</p>
<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/5/5e/Doubly-linked-list.svg/1220px-Doubly-linked-list.svg.png"/>

### 2.1. Nodes

Node represents single layer of a neural network. 
It has connections to previous layer and next layer.
Also it has key value to represent a layer if True and nil point if we have termination node.

In [358]:
class Node:
    """
    Double-linked list node implementation.
    
    Attributes
    ----------
    key : bool
        Flag of termination node. False if it is termantion one.
    prev : Node
        Reference to the previous node.
    next : Node
        Reference to the next node
        
    """
    
    def __init__(self, key=True, prev=None, next_=None):
        self.key = key
        self.prev = prev
        self.next = next_
        
    def __str__(self):
        return str(self.key)

### 2.2. Double-Linked List

This data structure is a basic for our sequential neural network model. It is done only with basic functionality which is necessary for our network model.

In [359]:
class LinkedList:
    """
    Double-linked list implementation.
    
    Attributes
    ----------
    nil : Node
        Termination node which begins and ends our list.
        
    Methods
    -------
    insert(node)
        Inserts a node to the end of a list.
        
    """
    
    def __init__(self):
        self._nil = Node(False)
        self._nil.prev = self._nil
        self._nil.next = self._nil
        
    def __str__(self):
        string = ''
        x = self._nil.next
        while x.key:
            string += str(x) + ' -> '
            x = x.next
        return string[:-3]
    
    def insert(self, node):
        """Insert a node to the end of a list.
        
        Parameters
        ----------
        node : Node
            Node to be inserted.
        
        """
        
        node.prev = self._nil.prev
        node.next = self._nil
        self._nil.prev.next = node
        self._nil.prev = node

## Neural Network Implementation

### 3.1. Layer Class

In [372]:
class Layer(Node):
    """
    This is a basic class. Other layers are inherited from it.
    It defines basic functionality that has to be implemented.
    
    Attributes
    ----------
    name : str
        Layer type name.
    shape: tuple
        A shape of the layer.
        
    Methods
    -------
        build(*args, **kwargs):
            Infers layer's weights shape.
        
        forward(*args, **kwargs)
            Does forward propagation through the layer.
            
        backward(*args, **kwargs)
            Does backward propagation through the layer.
            
        update_weights(*args, **kwargs)
            Updates all weights of the class
    
    """
    def __init__(self):
        super().__init__()
        self._name = self.__class__.__name__  
        self.shape = None
    def __str__(self):
        return self._name
    
    def build(self, *args, **kwargs):
        pass
    
    def forward(self, *args, **kwargs):
        pass
    
    def backward(self, *args, **kwargs):
        pass
    
    def update_weights(self, *args, **kwargs):
        pass

### Input Layer Class

In [376]:
class Input(Layer):
    """
    This class defines input layer where input data is provided.
    
    Attributes
    ----------
        x : np.array
            A numpy array of training data.
            
    Methods
    -------
        build(x) 
            Initializes training data and infer layer's shape
        
        forward(x)
            Does forward propagation (just return input data).
    
    """
    
    def __init__(self):
        super().__init__()
        
        self.x = None
        
    def build(self, x):
        """Infer shape of the layer and initialize input data.
        
        Parameters
        ----------
            x : np.array
                An array of input data.
        
        """
        self.x = x
        self.shape = self.x.shape
        
    def forward(self, *args, **kwargs):
        """Does forward propagation to the next layer.
        
        Input layer does not have any weights and biases so we just propagate our input data to the next layer.
        
        Returns
        -------
        np.array
            Input data.
        """
        
        return self.x
    
    @property
    def A(self):
        """This property is defined to be consistent with other layers."""
        
        return self.x
    
    @property
    def weights(self):
        """This property is defined to be consistent with other layers."""
        
        return self.x

In [378]:
class Dense(Layer):
    """
    This class defines basic dense layer.
    
    Dense layer is a layer which dot product and add biass to the input. 
    After that it uses activation function and propagates this value to the next layer.
    
    Layer structure:
        If we have:
            X - input.
            W - weights.
            b - biases.
            f(x) - actiovation function
        Then:
            Z = dot(X, W) + b
            A = f(Z)
            
    Attributes
    ----------
    weights : np.array
        The matrix of weights.
    bias : np.array
        The vector of biases.
    Z : np.array
        Cached value of linear transformation.
    A : np.array
        Cached value after activation.
    dW : np.array
        Cached value of loss' derivative w.r.t. weights.
    db : np.array
        Cached value of loss' derivative w.r.t. bias.
        
    Methods
    -------
    init(units, activation=None)
        Initialize layer with given number of units and activation function.
    build():
        Infers layer's weights shape.
        
    forward(*args, **kwargs)
        Does forward propagation through the layer.
            
    backward(*args, **kwargs)
        Does backward propagation through the layer.
            
    update_weights(*args, **kwargs)
        Updates all weights of the class
    
    """
    
    def __init__(self, units, activation=None):
        """Initialization method
        
        Initializes number of units and activation function in the layer.
        
        Activation functions available for string parameter:
        - 'softmax';
        - 'relu';
        - 'sigmoid';
        - 'identity'.
        
        Parameters
        ----------
        units : int
            The number of units inside the layer.
        activation : class or str
            The activation function class which implements function itself and its derivative.
        
        """
        super().__init__()
        
        self._units = units
        
        
        if isinstance(activation, str) and activation.lower() == 'softmax':
            self._activation = Softmax()
        elif isinstance(activation, str) and activation.lower() == 'relu':
            self._activation = ReLU()
        elif isinstance(activation, str) and activation.lower() == 'sigmoid':
            self._activation = Sigmoid()
        elif isinstance(activation, str) and activation.lower() == 'identity':
            self._activation = Identity()
        else:
            self._activation = activation()
            
        self.weights = None
        self.bias = None
        self.Z = None
        self.A = None
        self.dW = None
        self.db = None
        
    def __str__(self):
        return super().__str__() + '(' + str(self._units) + ')'
    
    def build(self):
        """Infers layer's weights and bias shapes
        
        At first it infer shape of the layer. 
        Then weights and bias are initialized randomly with normal distribution.
        
        """
        
        self.shape = self.prev.shape[1], self._units
        self.weights = np.random.randn(*self.shape)
        self.bias = np.random.randn(self._units)
        
    def forward(self, x):
        """Propagates activated data to the next layer
        
        Computes output of the layer:
        X - input data.
        W - weights.
        b - bias.
        f(x) - activation function
        
        Z = dot(X, W) + b
        A = f(Z)
        
        Parameters
        ----------
        x : np.array
            Input data.
            
        Returns
        -------
        np.array
            Propagated through layer data.
        
        """
        
        self.Z = np.dot(x, self.weights) + self.bias
        self.A = self._activation(self.Z)

        return self.A

    def backward(self, dPred):
        """Calculates gradients of the layer and continue backward propagation further.
        
        Computes gradients using backprop:
        We have:
            dA(l) - gradient w.r.t. activation of the current layer. It is defined as input because of neural network design.
            Z(l) - cached data after linear transformation of the current layer.
            A(l-1) - cached data after activation of the previous layer.
            W(l) - weights of the current layer.
            f(x) - activation function.
        Then:
            dZ(l) = dA(l) * f'(Z(l)) - gradient w.r.t. linear transformation of the current layer.
            dW(l) = dot(A(l-1).T, dZ(l)) - gradient w.r.t. weights of the current layer.
            db(l) = sum(dZ(l)_i) - gradient w.r.t. bias of the current layer.
            dA(l-1) = dot(dZ, W(l).T) - gradient w.r.t. activation unit og the previous layer.
        
        Parameters
        ----------
        dPred : np.array
            Input gradient. In this implementation it is gradient w.r.t. activation unit of the current layer.
            
        Returns
        -------
        np.array
            Output gradient. In this implementation it is gradient w.r.t. activation unit of the previous layer.
            
        """
        dZ = np.multiply(dPred, self._activation.prime(self.Z))
        self.dW = np.dot(self.prev.A.T, dZ)  
        self.db = np.sum(dZ, axis=0, keepdims=True)
        dA = np.dot(dZ, self.weights.T)
        
        return dA
    
    def update_weights(self, alpha):
        """ Does weight updating.
        
        It updates weights using gradient descent rule:
        W = W - alpha * dW
        b = b - alpha * db
        
        Parameters:
            alpha : float
                Step-size parameter.
 
        """
        self.weights = self.weights - alpha * self.dW
        self.bias = self.bias - alpha * self.db

In [416]:
class Sequential(LinkedList):
    """
    Sequential Neural Network model implementation.
    
    This implementation of neural network gives basic functionality 
    of adding layers with specific numbers of nodes and build neural network without any problems.
    
    Attributes
    ----------
    is_built : bool
        Flag that indicates whether model has already been built.
    loss : Obj
        Loss function with its derivative.
    alpha : float
        Step-size parameter.
    y : np.array
        Real target values.
    metric : func
        Metric which is calculated after training.
    
    Methods
    -------
    add(layer)
        Adds a layer to the end of the model.
    compile(loss, alpha=None, metric=None)
        Compile a model with specific loss, step-size and metric.
    evaluate(y, y_pred)
        Evaluate loss function and metric values with give data.
    fit(x, y, epochs=1, shuffle=True)
        Build and train neural network.
    predict(x)
        Makes prediction for the input.
    show_shape()
        Display shapes of layers.
    
    """
    
    def __init__(self):
        super().__init__()
        
        self._is_built = False
        self._loss = None
        self.alpha = None
        self.y = None
        self.metric = None
    
    def add(self, layer):
        """Add layer to the end of the model.
        
        Parameters
        ----------
        layer : Layer
            A layer to be added.
        
        """
        
        self.insert(layer)

    def compile(self, loss, alpha=1e-4, metric=accuracy):
        """Compile model with given loss function, step-size and metric function.
        
        Provided loss functions:
        - 'mse' for Mean Squared Error (regression);
        - 'crossentropy' for Cross Entropy loss (classification).
        
        Parameters
        ----------
        loss : Obj or str
            Loss function object with function and its derivative evaluation implemented.
        alpha : float
            Step-size parameter.
        metric : func
            Metric function.
        
        """
        
        self.alpha = alpha
        self.metric = metric
        
        if isinstance(loss, str) and (loss.lower() == 'mse'):
            self._loss = MSE()
        elif isinstance(loss, str) and  (loss.lower() == 'crossentropy'):
            self._loss = CrossEntropy()
        else:
            self._loss = loss

        # TODO: several metrics handling
            
    def evaluate(self, x, y):
        """Evaluates loss function and metric values.
        
        Parameters
        ----------
        x : np.array
            Input data x values.
        y : np.array
            Real target values.
            
        Returns
        -------
        tuple
            Tuple contains loss function value an metric value
        
        """
        y = one_hot(y)
        layer = self._nil.next.next
        while layer.key:
            x = layer.forward(x)
            layer = layer.next
        y_pred = x
        return self._loss(y, y_pred), self.metric(y, y_pred)
    
    def fit(self, x, y, epochs=1, shuffle=True):
        """Builds and trains model.
        
        If you have trained your model already you can continue using this method.
        It automatically defines that your model should continue learning because of special flag.
        
        Parameters
        ----------
        x : np.array
            Input data array.
        y : np.array
            Real target values.
        epochs : int
            Number of epochs to train.
        shuffle : bool
            Whether to shuffle input data.
            
        """
        
        # TODO: validation split implementation
        # TODO: validation data implementation
        # TODO: visualization of training process
        
        # Shuffle data if needed
        if shuffle:
            x, y = unison_shuffled_copies(x, y)
        
        # Build model if is hasn't been built already
        if not self._is_built:
            self._build(x, y)
        
        # Train model
        for epoch in range(epochs):
            # Forward pass
            output = self._forward()
            # Calculate loss value and its derivative
            loss_value = self._loss(self.y, output)
            derivative = self._loss.prime(self.y, output)
            # Backward pass
            self._backward(derivative)
            # Weight updating
            self._update_weights()
        print('Loss: ', loss_value)
        print('Accuracy: ', self.metric(self.y, output))

    def predict(self, x):
        """Predicts output with given input.
        
        Parameters
        ----------
        x : np.array
            Input data.
        
        Returns
        -------
        np.array
            Array of predictions.
            
        """
        
        layer = self._nil.next.next
        while layer.key:
            x = layer.forward(x)
            layer = layer.next
        return np.argmax(x)
        
    def _build(self, x, y):
        """Build layers and do one-hot encoding of input target variable.
        
        Parameters
        ----------
        x : np.array
            Input data.
        y : np.array
            Input target variable values.
        
        """
        
        self.y = one_hot(y)
        
        input_layer = self._nil.next
        input_layer.build(x)
        
        layer = input_layer.next
        while layer.key:
            layer.build()
            layer = layer.next
            
        self._is_built = True
     
    def _forward(self):
        """Does a forward pass layer by layer
        
        Returns
        -------
        np.array
            Returns output of activation of the last layer.
            
        """
        
        x = None
        layer = self._nil.next
        while layer.key:
            x = layer.forward(x)
            layer = layer.next
        return x
        
    def _backward(self, x):
        """Does a backward pass layer by layer
        
        Parameters
        ----------
        x : np.array
            Derivative of the loss function w.r.t. last activation unit.
        
        """
        layer = self._nil.prev
        while layer.prev.key:
            x = layer.backward(x)
            layer = layer.prev
            
    def _update_weights(self):
        """Updates weights and bias."""
        layer = self._nil.next
        while layer.key:
            layer.update_weights(self.alpha)
            layer = layer.next

    def show_shape(self):
        """Show model structure."""
        string = ''
        layer = self._nil.next
        while layer.key:
            string += str(layer.shape) + ' -> '
            layer = layer.next
        print(string[:-3])

In [417]:
# Load data
data = load_digits()

x, y = data['data'], data['target']

In [418]:
# Split data
x_train, y_train, x_test, y_test = train_test_split(x, y)

In [429]:
# Build model
model = Sequential()

model.add(Input())
model.add(Dense(2048, 'sigmoid'))
model.add(Dense(10, 'softmax'))

# Compile and train
model.compile(loss='crossentropy', alpha=0.0001)
model.fit(x_train, y_train, epochs=100)

Loss:  0.0006639189131890422
Accuracy:  1.0


In [430]:
# Evaluate loss and accuracy on test data
loss, accuracy = model.evaluate(x_test, y_test)

In [431]:
print('Loss:', loss)
print('Accuracy:', accuracy)

Loss: 0.28188711331426497
Accuracy: 0.6827458256029685


In [436]:
# Show random prediction
idx = np.random.randint(len(x))
print('Real value:', y[idx])
print('Predicted value:', model.predict(x[idx]))

Real value: 7
Predicted value: 7


7