### Tanh

In [1]:
import numpy as np

def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

def mse_tanh(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime_tanh(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

### Relu

In [2]:
def relu(x):
    return np.maximum(0, x)

def relu_prime(x):
    return np.where(x > 0, 1, 0)

def mse_relu(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime_relu(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size

### Abstract class Layer

In [3]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward_propagation(self, input):
        raise NotImplementedError

    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

### Fully-connected layer

In [4]:
class FCLayer(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

### Activation Layer

In [5]:
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

### Dropout Layer

In [6]:
class DropoutLayer(Layer):
    def __init__(self, p):
        self.p = p  # probabilidad de dropout

    def forward_propagation(self, input_data):
        self.mask = np.random.binomial(1, 1 - self.p, size=input_data.shape) / (1 - self.p)
        self.output = input_data * self.mask
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return output_error * self.mask

### Network class

In [7]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    def add(self, layer):
        self.layers.append(layer)

    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    def predict(self, input_data):
        samples = len(input_data)
        result = []
        for i in range(samples):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
        return result

    def fit(self, x_train, y_train, epochs, learning_rate):
        samples = len(x_train)
        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                err += self.loss(y_train[j], output)
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

### Solve the XOR problem without Dropout Layer

In [8]:
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])
np.random.seed(10)
net = Network()
net.add(FCLayer(2, 10))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(10, 1))
net.add(ActivationLayer(tanh, tanh_prime))
#net.add(DropoutLayer(p=0.1))
net.use(mse_tanh, mse_prime_tanh)
net.fit(x_train, y_train, epochs=500, learning_rate=0.1)
out = net.predict(x_train)
print(out)

epoch 1/500   error=0.698956
epoch 2/500   error=0.474182
epoch 3/500   error=0.348177
epoch 4/500   error=0.323970
epoch 5/500   error=0.317388
epoch 6/500   error=0.314884
epoch 7/500   error=0.313556
epoch 8/500   error=0.312545
epoch 9/500   error=0.311595
epoch 10/500   error=0.310635
epoch 11/500   error=0.309654
epoch 12/500   error=0.308653
epoch 13/500   error=0.307637
epoch 14/500   error=0.306609
epoch 15/500   error=0.305570
epoch 16/500   error=0.304522
epoch 17/500   error=0.303462
epoch 18/500   error=0.302391
epoch 19/500   error=0.301304
epoch 20/500   error=0.300200
epoch 21/500   error=0.299075
epoch 22/500   error=0.297927
epoch 23/500   error=0.296751
epoch 24/500   error=0.295544
epoch 25/500   error=0.294302
epoch 26/500   error=0.293021
epoch 27/500   error=0.291699
epoch 28/500   error=0.290329
epoch 29/500   error=0.288910
epoch 30/500   error=0.287437
epoch 31/500   error=0.285906
epoch 32/500   error=0.284314
epoch 33/500   error=0.282658
epoch 34/500   erro

### Solve the XOR problem with Dropout Layer

In [9]:
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])
np.random.seed(10)
net = Network()
net.add(FCLayer(2, 10))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(10, 1))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(DropoutLayer(p=0.1))
net.use(mse_tanh, mse_prime_tanh)
net.fit(x_train, y_train, epochs=500, learning_rate=0.1)
out = net.predict(x_train)
print(out)

epoch 1/500   error=0.753250
epoch 2/500   error=0.490580
epoch 3/500   error=0.364831
epoch 4/500   error=0.410985
epoch 5/500   error=0.343759
epoch 6/500   error=0.233284
epoch 7/500   error=0.341379
epoch 8/500   error=0.342953
epoch 9/500   error=0.342048
epoch 10/500   error=0.340437
epoch 11/500   error=0.338715
epoch 12/500   error=0.337018
epoch 13/500   error=0.335376
epoch 14/500   error=0.080790
epoch 15/500   error=0.331657
epoch 16/500   error=0.326005
epoch 17/500   error=0.328046
epoch 18/500   error=0.328260
epoch 19/500   error=0.327333
epoch 20/500   error=0.326065
epoch 21/500   error=0.324707
epoch 22/500   error=0.323324
epoch 23/500   error=0.321930
epoch 24/500   error=0.611926
epoch 25/500   error=0.439681
epoch 26/500   error=0.406685
epoch 27/500   error=0.331428
epoch 28/500   error=0.321935
epoch 29/500   error=0.317927
epoch 30/500   error=0.315346
epoch 31/500   error=0.313266
epoch 32/500   error=0.311373
epoch 33/500   error=0.309540
epoch 34/500   erro

### Solve the MNIST problem with TANH

In [None]:
from keras.datasets import mnist
from keras.utils import to_categorical
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
y_train = to_categorical(y_train)
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = to_categorical(y_test)
np.random.seed(10)
net = Network()
net.add(FCLayer(28*28, 100))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 50))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(50, 10))
net.add(ActivationLayer(tanh, tanh_prime))
net.use(mse_tanh, mse_prime_tanh)
net.fit(x_train, y_train, epochs=20, learning_rate=0.1)
out = net.predict(x_test[0:10])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:10])

### Solve the MNIST problem with RELU

In [None]:
from keras.datasets import mnist
from keras.utils import to_categorical
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
y_train = to_categorical(y_train)
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = to_categorical(y_test)
np.random.seed(10)
net = Network()
net.add(FCLayer(28*28, 100))
net.add(ActivationLayer(relu, relu_prime))
net.add(FCLayer(100, 50))
net.add(ActivationLayer(relu, relu_prime))
net.add(FCLayer(50, 10))
net.add(ActivationLayer(relu, relu_prime))
net.use(mse_relu, mse_prime_relu)
net.fit(x_train, y_train, epochs=20, learning_rate=0.1)
out = net.predict(x_test[0:10])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:10])