In [None]:
print('hello world')

: 

In [2]:
import numpy as np

import tensorflow as tf

class Network():
    def __init__(self, nnodes, ninputs, lamda=0):
        self.lamda = lamda
        self.weight = np.random.randn(ninputs, nnodes) * np.sqrt(2. / ninputs)
        self.bias = np.random.rand(nnodes) * 0.01
        self.sdw = np.zeros((ninputs, nnodes))
        self.sdb = np.zeros(nnodes)
        self.vdw = np.zeros((ninputs, nnodes))
        self.vdb = np.zeros(nnodes)
        self.t = 0

    def forward(self, inputs):
        self.input = inputs
        self.output = np.dot(inputs, self.weight) + self.bias
        return self.output

    def backward(self, gradient):
        self.gradient_weight = np.dot(self.input.T, gradient)
        self.gradient_bias = np.sum(gradient, axis=0)
        self.gradient_input = np.dot(gradient, self.weight.T)
        return self.gradient_input

    def calculate(self, optimizer):
        if optimizer == 'adam':
            self.t += 1
            beta1, beta2 = 0.9, 0.999
            epsilon = 1e-8

            self.sdw = beta2 * self.sdw + (1 - beta2) * (self.gradient_weight ** 2)
            self.sdb = beta2 * self.sdb + (1 - beta2) * (self.gradient_bias ** 2)

            self.vdw = beta1 * self.vdw + (1 - beta1) * self.gradient_weight
            self.vdb = beta1 * self.vdb + (1 - beta1) * self.gradient_bias

            # Bias correction
            sdw_corrected = self.sdw / (1 - beta2 ** self.t)
            sdb_corrected = self.sdb / (1 - beta2 ** self.t)
            vdw_corrected = self.vdw / (1 - beta1 ** self.t)
            vdb_corrected = self.vdb / (1 - beta1 ** self.t)

            self.sdw_corrected = sdw_corrected
            self.sdb_corrected = sdb_corrected
            self.vdw_corrected = vdw_corrected
            self.vdb_corrected = vdb_corrected

    def update(self, learning_rate, optimizer):
        if optimizer == 'adam':
            self.weight -= learning_rate * self.vdw_corrected / (np.sqrt(self.sdw_corrected) + 1e-8)
            self.bias -= learning_rate * self.vdb_corrected / (np.sqrt(self.sdb_corrected) + 1e-8)
        else:
            self.weight -= learning_rate * self.gradient_weight
            self.bias -= learning_rate * self.gradient_bias

    def l2(self):
        return np.sum(self.weight ** 2)

class Relu():
    def forward(self, inputs):
        self.input = inputs
        self.output = np.maximum(0, inputs)
        return self.output
    
    def backward(self, gradients):
        self.gradient = gradients * (self.input > 0)
        return self.gradient

class Sigmoid():
    def forward(self, inputs):
        self.input = inputs
        self.output = 1 / (1 + np.exp(-inputs))
        return self.output

    def backward(self, dvalues):
        self.dinputs = dvalues * (1 - self.output) * self.output
        return self.dinputs

class Softmax():
    def forward(self, inputs):
        self.input = inputs
        exp = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp / np.sum(exp, axis=1, keepdims=True)
        self.output = probabilities
        return self.output

    def backward(self, gradient):
        return gradient

class CategoricalCrossEntropyLoss():
    def forward(self, probs, true_outputs, layers):
        clipped_probs = np.clip(probs, 1e-7, 1 - 1e-7)
        loss_data = -np.sum(true_outputs * np.log(clipped_probs)) / len(true_outputs)
        
        l2_terms = [layer.lamda * np.sum(layer.l2()) for layer in layers]
        loss_weight = 0.5 * np.sum(l2_terms) / len(true_outputs)
        return loss_data + loss_weight

    def backward(self, probs, true_outputs):
        samples = len(true_outputs)
        self.dinputs = (probs - true_outputs) / samples
        return self.dinputs

class BinaryCrossEntropyLoss():
    def forward(self, y_pred, y_true, layers):
        y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
        loss_data = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss_data 

    def backward(self, dvalues, y_true):
        dvalues = np.clip(dvalues, 1e-7, 1 - 1e-7)
        self.dinputs = (dvalues - y_true) / len(y_true)
        return self.dinputs

class Fit():
    def __init__(self, layers, loss_function, layers_for_fit, learning_rate, optimizer='gradient'):
        self.layers_for_fit = layers_for_fit
        self.layers = layers
        self.loss_function = loss_function
        self.learning_rate = learning_rate
        self.optimizer = optimizer

    def fit(self, epochs, X_train, y_train, batch_size):
        for epoch in range(epochs):
            epoch_loss = 0  # Initialize epoch loss
            for i in range(0, len(X_train), batch_size):
                batch_inputs = X_train[i:i + batch_size]
                batch_true_outputs = y_train[i:i + batch_size]

                x = batch_inputs
                for layer in self.layers_for_fit:
                    x = layer.forward(x)

                loss = self.loss_function.forward(x, batch_true_outputs, self.layers)
                epoch_loss += loss  # Accumulate batch loss

                gradient = self.loss_function.backward(x, batch_true_outputs)
                for layer in reversed(self.layers_for_fit):
                    gradient = layer.backward(gradient)

                for layer in self.layers:
                    layer.calculate(self.optimizer)

                for layer in self.layers:
                    layer.update(self.learning_rate, self.optimizer)

            print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(X_train) * batch_size}")  # Print average loss for the epoch



In [None]:

data = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = data.load_data()

X_train = X_train.reshape(-1, 28*28) / 255.0
X_test = X_test.reshape(-1, 28*28) / 255.0

y_train = np.eye(10)[y_train]

# Define network
layer1 = Network(128, 784, lamda=0.01)
layer2 = Network(64, 128, lamda=0.01)
layer3 = Network(10, 64, lamda=0)
relu1 = Relu()
relu2 = Relu()
softmax = Softmax()
loss_function = CategoricalCrossEntropyLoss()

layers_for_fit = [layer1, relu1, layer2, relu2, layer3, softmax]
layers = [layer1, layer2, layer3]

# Train model
batch_size = 32
learning_rate = 0.0001
optimizer = 'adam'
model = Fit(layers, loss_function, layers_for_fit, learning_rate, optimizer)
model.fit(5, X_train, y_train, batch_size)
