In [None]:
import numpy as np

In [None]:
class Regularizations:

    class L2:
        @staticmethod
        def reg(a):
            return np.mean(a**2)

        @staticmethod
        def reg_prime(a):
            return 2 * a / a.size

In [None]:
class Losses:

    class MSE:
        @staticmethod
        def loss(y_true, y_pred):
            return np.mean(np.power(y_true-y_pred, 2))

        @staticmethod
        def loss_prime(y_true, y_pred):
            # print(y_true.shape, y_pred.shape)
            return 2*(y_pred-y_true) / y_true.size

In [None]:
class Activations: # alphabet order

    class activation:
        @staticmethod
        def activation(a):
            return a

        @staticmethod
        def activation_prime(a):
            return 1


    class ReLU(activation):
        @staticmethod
        def activation(a):
            return np.maximum(a, 0)

        @staticmethod
        def activation_prime(a):
            ret = np.array(1 * (a > 0))
            return ret


    class sigmoid(activation):
        @staticmethod
        def activation(a):
            return 1 / (1 + np.exp(-a))

        @staticmethod
        def activation_prime(a):
            return a * (1 - a)


    class softmax(activation):
        @staticmethod
        def activation(a):
            exp = np.exp(a)
            return exp / (0.0001 + np.sum(exp, axis=0)) # mb 0

        @staticmethod
        def activation_prime(a):
            t = np.eye(N=a.shape[0], M=a.shape[1])
            return t * a * (1 - a) - (1 - t) * a * a


    class stable_softmax(activation):
        @staticmethod
        def activation(a):
            a = a - max(a)
            exp = np.exp(a)
            return exp / np.sum(exp, axis=1)

        # dont know prime


    class tanh(activation):
        @staticmethod
        def activation(a):
            return np.tanh(a)

        @staticmethod
        def activation_prime(a):
            return 1 - np.tanh(a)**2

In [None]:
class Layers:

    class DummyLayer:

        def __init__(self):
            self.input_shape = None
            self.output_shape = None

        def forward_pass(self, input):
            raise NotImplementedError

        def backward_pass(self, output):
            raise NotImplementedError



    class Dense(DummyLayer):

        def __init__(self, input_shape=None, output_shape=None, learning_rate=None, reg_const=None, reg_type=None):
            super().__init__()
            self.input_shape = input_shape
            self.output_shape = output_shape

            self.input = None
            self.output = None

            self.learning_rate = learning_rate
            self.reg_const = reg_const

            self.reg_type = reg_type

            if self.reg_type is None:
                self.reg_function = None
                self.reg_prime = None
            else:
                self.reg_function = reg_type.reg
                self.reg_prime = reg_type.reg_prime

            self.features_weights = np.random.rand(input_shape, output_shape) - 0.5
            self.bias_weights = np.random.rand(1, output_shape) - 0.5

            self.learnable = True

        def forward_pass(self, input):
            self.input = input
            self.output = input @ self.features_weights + self.bias_weights
            return self.output

        def backward_pass(self, output_error):
            input_error = output_error @ self.features_weights.T
            weights_error = self.input.T @ output_error + self.reg_const * self.reg_prime(self.features_weights)
            bias_error = np.sum(output_error, axis=0)

            self.features_weights -= self.learning_rate * weights_error
            self.bias_weights -= self.learning_rate * bias_error

            return input_error



    class Activation(DummyLayer):

        def __init__(self, activation_type=Activations.tanh):
            super().__init__()
            self.activation = activation_type.activation
            self.activation_prime = activation_type.activation_prime

            self.input = None
            self.output = None

            self.learnable = False

        def forward_pass(self, input):
            self.input = input
            self.output = self.activation(self.input)
            return self.output

        def backward_pass(self, output):
            return self.activation_prime(self.input) * output

In [None]:
class NeuralNetwork:

    def __init__(self, layers, default_learning_rate=0.01, default_reg_const=0.01, reg_type=Regularizations.L2, loss_class=Losses.MSE):
        self.layers = []
        for layer in layers:
            if layer.learnable:
                if layer.learning_rate is None:
                    layer.learning_rate = default_learning_rate

                if layer.reg_const is None:
                    layer.reg_const = default_reg_const

                if layer.reg_type is None:
                    layer.reg_function = reg_type.reg
                    layer.reg_prime = reg_type.reg_prime
            self.layers.append(layer)

        self.loss = loss_class.loss
        self.loss_prime = loss_class.loss_prime

    def fit(self, X, y, cnt_epochs=10, cnt_it=10000): # add optimizer
        it_for_epoch = cnt_it // cnt_epochs
        for i in range(cnt_epochs):
            for j in range(it_for_epoch):

                output = X
                for layer in self.layers:
                    output = layer.forward_pass(output)

                error = self.loss_prime(y, output)

                for layer in reversed(self.layers):
                    error = layer.backward_pass(error)

            print('epoch %d/%d   error=%f' % (i+1, cnt_epochs, self.loss(y, self.predict(X))))

    def predict(self, X):
        output = X
        for layer in self.layers:
            output = layer.forward_pass(output)

        return output

In [None]:
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1],
              [0]])

print(X.shape, y.shape)

nn = NeuralNetwork([
    Layers.Dense(2, 2),
    Layers.Activation(activation_type=Activations.tanh),
    Layers.Dense(2, 1),
    Layers.Activation(activation_type=Activations.tanh)
], default_learning_rate=0.01, default_reg_const=0)

nn.fit(X, y, cnt_epochs=10, cnt_it=30000)

nn.predict(X)

(4, 2) (4, 1)
epoch 1/10   error=0.193711
epoch 2/10   error=0.139242
epoch 3/10   error=0.013316
epoch 4/10   error=0.003122
epoch 5/10   error=0.001546
epoch 6/10   error=0.000989
epoch 7/10   error=0.000715
epoch 8/10   error=0.000555
epoch 9/10   error=0.000451
epoch 10/10   error=0.000378


array([[0.00125322],
       [0.9725487 ],
       [0.97255891],
       [0.00214815]])

In [None]:
# for mnist in colab
!wget https://raw.githubusercontent.com/yandexdataschool/Practical_DL/35c067adcc1ab364c8803830cdb34d0d50eea37e/week01_backprop/util.py -O util.py
!wget https://raw.githubusercontent.com/yandexdataschool/Practical_DL/35c067adcc1ab364c8803830cdb34d0d50eea37e/week01_backprop/mnist.py -O mnist.py

--2021-10-19 20:26:26--  https://raw.githubusercontent.com/yandexdataschool/Practical_DL/35c067adcc1ab364c8803830cdb34d0d50eea37e/week01_backprop/util.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3782 (3.7K) [text/plain]
Saving to: ‘util.py’


2021-10-19 20:26:26 (40.8 MB/s) - ‘util.py’ saved [3782/3782]

--2021-10-19 20:26:26--  https://raw.githubusercontent.com/yandexdataschool/Practical_DL/35c067adcc1ab364c8803830cdb34d0d50eea37e/week01_backprop/mnist.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2697 (2.6K) [text/pl

In [None]:
import mnist

In [None]:
from mnist import load_dataset
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset(flatten=True)

In [None]:
a = y_train
b = np.zeros((a.size, a.max()+1))
b[np.arange(a.size),a] = 1
b.shape

(50000, 10)

In [None]:
y_train_prepr = b

In [None]:
nn = NeuralNetwork([
    Layers.Dense(28*28, 100),
    Layers.Activation(activation_type=Activations.ReLU),
    Layers.Dense(100, 200),
    Layers.Activation(activation_type=Activations.ReLU),
    Layers.Dense(200, 10),
], default_learning_rate=0.01)

nn.fit(X_train, y_train_prepr, cnt_epochs=100, cnt_it=1000)

epoch 1/100   error=4.739836


KeyboardInterrupt: ignored

In [None]:
val = np.argmax(nn.predict(X_train), axis=1)

In [None]:
print(y_train[0], val[0])

5 8


In [None]:
cnt = 0
for i in range(val.shape[0]):
    if val[i] == y_train[i]:
        cnt += 1

print(cnt / val.shape[0])

0.22102
