In [1]:
import sys
import numpy as np

from typing import Optional
from keras.datasets import mnist

2024-12-03 13:36:10.916101: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-03 13:36:10.992703: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-03 13:36:11.052016: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733222171.115853    5642 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733222171.142975    5642 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-03 13:36:11.302373: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

✅Write down more dificult architecture

In [2]:
class Neuron:
    activation_functions = {
        'softmax': lambda x: np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True),
        'tanh': np.tanh
    }
    
    def __init__(self, input_size: tuple[int, int], activation: str='tanh', alpha_const: float=0.02, c_const: float=0.01,
                 dropout: bool=False):
        self.weights = alpha_const * np.random.random(input_size) - c_const
        self.activation = self.activation_functions[activation]
        self.derivative = lambda x: 1 - np.tanh(x) ** 2 if activation == 'tanh' else None
        self.dropout = dropout
        self.output = None

    def activate(self, inputs: np.array):
        z = np.dot(inputs, self.weights)
        self.output = self.activation(z)
        if self.dropout:
            dropout_mask = np.random.randint(2, size=self.output.shape)
            self.output *= dropout_mask * 2
        return self.output

In [3]:
class Network:
    def __init__(self, neurons: list, batch_size: int=100, learning_rate: float=0.1):
        self.neurons = neurons
        self.batch_size = batch_size
        self.learning_rate = learning_rate

    def forward(self, x: np.array) -> np.array:
        for neuron in self.neurons:
            x = neuron.activate(x)
        return x

    def backward(self, output: np.array, y: np.array, batch_start: int, batch_end: int):
        delta = None
        
        for i in range(len(self.neurons) - 1, 0, -1):
            neuron = self.neurons[i]
            if i == len(self.neurons) - 1:
                delta = (y[batch_start:batch_end] - output) / (self.batch_size * output.shape[0])
            else:
                delta = delta.dot(neuron.weights) * neuron.derivative(neuron.output)
            neuron.weights += self.learning_rate * self.neurons[i - 1].output.T.dot(delta)
    
    def train(self, x_train: np.array, y_train: np.array, x_test: np.array, y_test: np.array, epochs: int=10, verbose: Optional[int]=10):
        for epoch in range(epochs):
            error, correct_cnt = (0.0,0)
            for i in range(int(len(x_train) / batch_size)):
                batch_start, batch_end = ((i * batch_size), ((i + 1) * self.batch_size))
                input = x_train[batch_start:batch_end]
                output = self.forward(input)

                for k in range(batch_size):
                    error += np.sum((y_train[k:k+1] - output) ** 2)
                    correct_cnt += int(np.argmax(output[k:k+1]) == np.argmax(y_train[batch_start+k:batch_end+k+1]))

                self.backward(output, y_train, batch_start, batch_end)

            test_correct_cnt = 0
            test_error = 0.0
            
            for i in range(len(x_test)):
                input = x_test[i:i+1]
                output = self.forward(input)

                test_correct_cnt += int(np.argmax(output) == np.argmax(y_test[i:i+1]))
                test_error += np.sum((y_test[i:i+1] - output) ** 2)
            
            if verbose and epoch % verbose == 0:
                sys.stdout.write("\n" + \
                        "I:" + str(epoch) + \
                        " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                        " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                        " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                        " Train-Acc:" + str(correct_cnt/ float(len(images))))

✅Load and scale our data

In [4]:
def one_hot_encoder(y_data: np.array) -> np.array:
    y_coded = np.zeros((len(y_data), 10))
    for i, l in enumerate(y_data):
        y_coded[i][l] = 1
    return y_coded

In [5]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = x_train[:1000].reshape(1000, 28 * 28) / 255, y_train[:1000]
test_images = x_test.reshape(len(x_test), 28 * 28) / 255

In [6]:
labels = one_hot_encoder(labels)
test_labels = one_hot_encoder(y_test)

✅ Design and train our Network

In [7]:
alpha, epochs, hidden_size = (2, 500, 100)
pixels_per_image, num_labels = (784, 10)
batch_size = 100

In [8]:
neuron1 = Neuron((pixels_per_image, hidden_size), dropout=True)
neuron2 = Neuron((hidden_size, num_labels), activation='softmax', alpha_const=0.2, c_const=0.1)

network = Network([neuron1, neuron2], batch_size=batch_size, learning_rate=alpha)

In [9]:
%%time

network.train(images, labels, test_images, test_labels, epochs=epochs, verbose=10)


I:0 Test-Err:0.900 Test-Acc:0.0827 Train-Err:90.00 Train-Acc:0.067
I:10 Test-Err:0.899 Test-Acc:0.1142 Train-Err:90.00 Train-Acc:0.109
I:20 Test-Err:0.897 Test-Acc:0.1521 Train-Err:90.00 Train-Acc:0.146
I:30 Test-Err:0.896 Test-Acc:0.1935 Train-Err:90.00 Train-Acc:0.197
I:40 Test-Err:0.894 Test-Acc:0.2426 Train-Err:90.00 Train-Acc:0.263
I:50 Test-Err:0.893 Test-Acc:0.2791 Train-Err:90.00 Train-Acc:0.298
I:60 Test-Err:0.892 Test-Acc:0.3202 Train-Err:90.00 Train-Acc:0.349
I:70 Test-Err:0.890 Test-Acc:0.3584 Train-Err:90.00 Train-Acc:0.377
I:80 Test-Err:0.889 Test-Acc:0.3865 Train-Err:90.00 Train-Acc:0.425
I:90 Test-Err:0.887 Test-Acc:0.4057 Train-Err:90.01 Train-Acc:0.424
I:100 Test-Err:0.886 Test-Acc:0.4251 Train-Err:90.01 Train-Acc:0.473
I:110 Test-Err:0.884 Test-Acc:0.4517 Train-Err:90.02 Train-Acc:0.486
I:120 Test-Err:0.883 Test-Acc:0.4592 Train-Err:90.02 Train-Acc:0.502
I:130 Test-Err:0.882 Test-Acc:0.4765 Train-Err:90.02 Train-Acc:0.507
I:140 Test-Err:0.880 Test-Acc:0.489 Train-Er