In [217]:
from abc import ABC, abstractmethod
import warnings
import random
import math
from copy import deepcopy

from typing import Literal

import json

from numpy.f2py.auxfuncs import throw_error

# Activstion Functions:

In [2]:
class ActivationFunction(ABC):

    @abstractmethod
    def __call__(self, x: list):
        raise Exception("Not implemented call method in activation function.")

    @abstractmethod
    def ddx(self, x: list):
        raise Exception("Not implemented d/dx method in activation function.")


class ReLu(ActivationFunction):
    def __call__(self, x: list):
        return [max(0, x_i) for x_i in x]

    def ddx(self, x: list):
        return [1 if x_i >= 0 else 0 for x_i in x]


class SoftMax(ActivationFunction):

    def __call__(self, x: list[float]):
        try:
            ex = [math.exp(x_i) for x_i in x]
        except Exception as e:
            print(x)
            print(e)
            raise e

        return [ex_i / sum(ex) for ex_i in ex]

    def ddx(self, x: list):
        # return [[1,0],[0,1]]
        ex = self(x)
        ddx = []
        for i in range(len(x)):
            ddx.append(
                [ex[i] * (1 - ex[i]) if i == j else -1 * ex[i] * ex[j] for j in range(len(x))])

        return ddx


class I(ActivationFunction):  # Identity function I(x) = x
    def __call__(self, x: list[float]):
        return x

    def ddx(self, x: list[float]):
        return [1 for i in range(len(x))]




# Losses Functions:

In [3]:
class LossFunction(ABC):
    @abstractmethod
    def __call__(self, *args, **kwargs):
        pass

    @abstractmethod
    def sample_loss_grad(self, y_true_sample: list[float], y_pred_sample: list[float]):
        pass


class CrossEntropy(LossFunction):

    def __init__(self, batch_reduction: Literal["mean", "sum"] = "mean"):
        self.batch_reduction = batch_reduction

    def __call__(self, y_pred: list[list], y_true: list[list]):
        if len(y_pred) != len(y_true):
            raise Exception(f"y_pred len is {len(y_pred)} not y_true len {len(y_true)}.")
        batch_loss = 0
        for y_pred_sample, y_true_sample in zip(y_pred, y_true):
            if len(y_pred_sample) != len(y_true_sample):
                raise Exception(
                    f"In some sample y_pred_sample len is {len(y_pred_sample)} not y_true_sample len {len(y_true_sample)}.")
            sample_loss = 0
            for y_pred_sample_value, y_true_sample_value in zip(y_pred_sample, y_true_sample):
                sample_loss -= y_true_sample_value * math.log(y_pred_sample_value)

            batch_loss += sample_loss
        if self.batch_reduction == "mean":
            batch_loss /= len(y_pred)
        return batch_loss

    def sample_loss_grad(self, y_true_sample: list[float], y_pred_sample: list[float]):
        return [-y_true_sample_value / (y_pred_sample_value + 1e-5) for y_true_sample_value, y_pred_sample_value in
                zip(y_true_sample, y_pred_sample)]


class SoftmaxCrossEntropy(LossFunction):
    def __init__(self, batch_reduction: Literal["mean", "sum"] = "mean"):
        self.batch_reduction = batch_reduction
        self.softmax = SoftMax()

    def __call__(self, y_pred: list[list], y_true: list[list]):
        if len(y_pred) != len(y_true):
            raise Exception(f"y_pred len is {len(y_pred)} not y_true len {len(y_true)}.")

        y_pred = self.softmax(y_pred)
        batch_loss = 0
        for y_pred_sample, y_true_sample in zip(y_pred, y_true):
            if len(y_pred_sample) != len(y_true_sample):
                raise Exception(
                    f"In some sample y_pred_sample len is {len(y_pred_sample)} not y_true_sample len {len(y_true_sample)}.")
            sample_loss = 0
            for y_pred_sample_value, y_true_sample_value in zip(y_pred_sample, y_true_sample):
                sample_loss -= y_true_sample_value * math.log(y_pred_sample_value)

            batch_loss += sample_loss
        if self.batch_reduction == "mean":
            batch_loss /= len(y_pred)
        return batch_loss

    def sample_loss_grad(self, y_true_sample: list[float], y_pred_sample: list[float]):
        y_pred_sample = self.softmax(y_pred_sample)
        return [y_pred_sample_value - y_true_sample_value for y_true_sample_value, y_pred_sample_value in
                zip(y_true_sample, y_pred_sample)]


class MSE(LossFunction):

    def __init__(self, sample_reduntion: Literal["mean", "sum"] = "mean",
                 batch_reduction: Literal["mean", "sum"] = "mean"):
        self.sample_reduction = sample_reduntion
        self.batch_reduction = batch_reduction

    def __call__(self, y_pred: list[list], y_true: list[list]):
        if len(y_pred) != len(y_true):
            raise Exception(f"y_pred len is {len(y_pred)} not y_true len {len(y_true)}.")

        batch_loss = 0
        for y_pred_sample, y_true_sample in zip(y_pred, y_true):

            if len(y_pred_sample) != len(y_true_sample):
                raise Exception(
                    f"In some sample y_pred_sample len is {len(y_pred_sample)} not y_true_sample len {len(y_true_sample)}.")

            sample_loss = 0
            for y_pred_sample_value, y_true_sample_value in zip(y_pred_sample, y_true_sample):
                sample_loss += (y_pred_sample_value - y_true_sample_value) ** 2
            if self.sample_reduction == "mean":
                sample_loss /= len(y_true_sample_value)
            batch_loss += sample_loss
        if self.batch_reduction == "mean":
            batch_loss /= len(y_pred)
        return batch_loss

    def sample_loss_grad(self, y_true_sample: list[float], y_pred_sample: list[float]):
        if len(y_pred_sample) != len(y_true_sample):
            raise Exception(f"y_pred len is {len(y_pred_sample)}, not y_true len {len(y_true_sample)}")
        loss_grad = [2 * (y_pred_sample[i] - y_true_sample[i]) for i in range(len(y_pred_sample))]

        if self.sample_reduction == "mean":
            loss_grad = [loss_grad[i] / len(y_pred_sample) for i in range(len(loss_grad))]

        return loss_grad





# Perceptron

In [None]:
class Perceptron():

    def __init__(self, input_size, initialization_type: Literal["he", "glorot" , "xavier", "random"] = "he"):
        """
        :param input_size: Size of input sample
        :param initialization_type: Type of initialization weights and biases.
        """


        self.bias = None
        self.weight = None

        # Grad of value in backward sample
        self.grad_weight: list[float] = [0 for i in range(len(self.weight))]
        self.grad_bias: float = 0
        self.backward_grad: list[float] = [0 for i in range(len(self.weight))]  # gradient for following perceptron

        # Summed Batch Grad
        self.batch_grad_weight: list[float] = [0 for i in range(len(self.weight))]
        self.batch_grad_bias: float = 0

        # Forward values of sample
        self.forward_value: float = None
        self.forward_x: list[float] = None

        # Initialization of weights and bias:
        match initialization_type.lower():
            case "he":
                self.weight: list[float] = [random.gauss(mu=0, sigma=math.sqrt(2 / input_size)) for i in
                                            range(input_size)]
                self.bias: float = random.gauss(mu=0, sigma=math.sqrt(2 / input_size))
            case "glorot" | "xavier":
                self.weight: list[float] = [random.gauss(mu=0, sigma=math.sqrt(1 / input_size)) for i in
                                            range(input_size)]
                self.bias: float = random.gauss(mu=0, sigma=math.sqrt(1 / input_size))

            case "random":
                self.weight: list[float] = [random.gauss(mu=0, sigma=math.sqrt(1)) for i in
                                            range(input_size)]
                self.bias: float = random.gauss(mu=0, sigma=math.sqrt(1))
            case _:
                print(f"{initialization_type} is not right initialization_type")


    def __call__(self, x):
        """
        :param x:
        :return:
        """

        if len(x) != len(self.weight):
            raise Exception("Size od weights not match input size.")
        output = [self.weight[i] * x[i] for i in range(len(x))]
        output.append(self.bias)
        return sum(output)

    def forward(self, x):
        self.forward_value = self(x)
        self.forward_x = x
        return self.forward_value

    def zero_batch_grad(self):
        self.batch_grad_weight = [0 for i in range(len(self.weight))]
        self.batch_grad_bias = 0

    def zero_grad(self):
        self.grad_weight = [0 for i in range(len(self.weight))]
        self.grad_bias = 0
        self.backward_grad = [0 for i in range(len(self.weight))]  # grad z poprzedniego

    def ddw_i(self, i):
        return self.forward_x[i]

    def ddx_i(self, i):
        return self.weight[i]

    def ddw(self, previous_grad):
        for i in range(len(self.grad_weight)):
            self.grad_weight[i] += self.ddw_i(i) * previous_grad

    def ddb(self, previous_grad):
        self.grad_bias += previous_grad

    def calculate_backward_grad(self, previous_grad):
        # self.backward_grad = ([self.backward_grad[i] + self.ddx_i(i) * previous_grad for i in range(len(self.grad_weight))])
        for i in range(len(self.backward_grad)):
            self.backward_grad[i] += self.ddx_i(i) * previous_grad

    def add_grad_to_batch_grad(self):
        self.batch_grad_bias += self.grad_bias
        for i in range(len(self.grad_weight)):
            self.batch_grad_weight[i] += self.grad_weight[i]

    def normalize_batch_grad(self, batch_size: int):
        self.batch_grad_bias /= batch_size
        for i in range(len(self.grad_weight)):
            self.batch_grad_weight[i] /= batch_size



# Layers

In [4]:
class Layer(ABC):

    def __init__(self):
        self.activation_function: ActivationFunction = None
        self.layer: list[Perceptron] = []

    @abstractmethod
    def __call__(self, x):
        pass

    @abstractmethod
    def backward_from_loss_function(self, losses_grads: list[float]):
        pass

    def forward(self, inputs):
        outputs = [perceptron.forward(inputs) for perceptron in self.layer]
        return self.activation_function(outputs)

    def get_forward(self):
        return [perceptron.forward_value for perceptron in self.layer]

    def print_forward(self):
        print(self.get_forward())

    def print_grad(self):
        print([[perceptron.grad_weight, perceptron.grad_bias] for perceptron in self.layer])

    def print_backward_grad(self):
        print([perceptron.backward_grad for perceptron in self.layer])

    def print_batch_grad(self):
        print([[perceptron.batch_grad_weight, perceptron.batch_grad_bias] for perceptron in self.layer])

    def print_all(self):
        self.print_backward_grad()
        self.print_grad()
        self.print_batch_grad()
        self.print_forward()

    def zero_batch_grad(self):
        for perceptron in self.layer:
            perceptron.zero_batch_grad()

    def zero_grad(self):
        for perceptron in self.layer:
            perceptron.zero_grad()

    def add_grad_to_batch_grad(self):
        for perceptron in self.layer:
            perceptron.add_grad_to_batch_grad()

    def normalize_batch_grad(self, batch_size: int):
        for perceptron in self.layer:
            perceptron.normalize_batch_grad(batch_size)


#-------------------------------------------------------------------------------------

class Softmax_layer(Layer):
    def __init__(self, input_size: int, output_size: int):
        self.activation_function = SoftMax()
        self.layer = [Perceptron(input_size=input_size) for i in range(output_size)]

    def __call__(self, inputs: list):
        outputs = [perceptron(inputs) for perceptron in self.layer]
        return self.activation_function(outputs)

    def backward_from_loss_function(self, losses_grads: list[float]):
        # print(self.get_forward())
        activation_function_grad = self.activation_function.ddx(self.get_forward())
        # print("xd",activation_function_grad)
        # print("xdddddddddd")
        # print(activation_function_grad)

        for i in range(len(losses_grads)):
            losses_grad = losses_grads[i]
            # print(losses_grad)
            for j in range(len(self.layer)):
                perceptron = self.layer[j]
                perceptron.ddb(losses_grad * activation_function_grad[i][j])
                perceptron.ddw(losses_grad * activation_function_grad[i][j])
                perceptron.calculate_backward_grad(losses_grad * activation_function_grad[i][j])


#-------------------------------------------------------------------------------------


class Linear(Layer):

    def __init__(self, input_size: int, output_size: int,
                 activation_function: Literal["ReLu", "Sigmoid", "Tanh", "LeakyRelu", "I"] = "ReLu"):
        """
        :param input_size: Size of input sample.
        :param output_size: Number of Perceptrons in layer.
        :param activation_function: Activation function of Perceptrons.
        """

        self.activation_function: ActivationFunction = None
        self.layer: list[Perceptron] = []

        match activation_function:
            case "ReLu":
                self.activation_function = ReLu()
                self.layer = [Perceptron(input_size=input_size, initialization_type="he") for i in range(output_size)]
            case "I":
                self.activation_function = I()
                self.layer = [Perceptron(input_size=input_size) for i in range(output_size)]
            # TODO:
            # case "Sigmoid":
            #     self.activation_function = ReLu()
            #     self.layers = [Perceptron() for i in range(output_size)]
            # case "Tanh":
            #     self.activation_function = ReLu()
            #     self.layers = [Perceptron() for i in range(output_size)]
            # case "LeakyReLu":
            #     self.activation_function = ReLu()
            #     self.layers = [Perceptron() for i in range(output_size)]

            # case "SoftMax":
            #     self.activation_function = SoftMax()
            #     self.layer = [Perceptron(input_size=input_size) for i in range(output_size)]

            case _:
                warnings.warn(f"{activation_function} is not a activation function.")
                self.activation_function = lambda x: x
                self.layer = [Perceptron(input_size=input_size) for i in range(output_size)]

    def __call__(self, inputs: list):
        outputs = [perceptron(inputs) for perceptron in self.layer]
        return self.activation_function(outputs)

    def backward_from_loss_function(self, losses_grads: list[float]):
        for perceptron, previous_grad, d_activation_dx in zip(self.layer, losses_grads,
                                                              self.activation_function.ddx(self.get_forward())):
            previous_grad = previous_grad * d_activation_dx
            perceptron.ddb(previous_grad)
            perceptron.ddw(previous_grad)
            perceptron.calculate_backward_grad(previous_grad)

    def backward_from_previous_layer(self, previous_layer: "Linear"):
        activation_function_grad = self.activation_function.ddx(self.get_forward())

        for previous_perceptron in previous_layer.layer:
            self.backward_from_previous_perceptron(previous_perceptron, activation_function_grad)

    def backward_from_previous_perceptron(self, previous_perceptron: Perceptron, activation_function_grad: list[float]):
        for perceptron, previous_grad, d_activation_dx in zip(self.layer, previous_perceptron.backward_grad,
                                                              activation_function_grad):
            previous_grad = previous_grad * d_activation_dx
            perceptron.ddb(previous_grad)
            perceptron.ddw(previous_grad)
            perceptron.calculate_backward_grad(previous_grad)


# Model

In [243]:
class Model(ABC):
    @abstractmethod
    def __init__(self):
        self.layers: list[Layer] = [Linear(2, 2, "ReLu"),
                                    Linear(2, 2, "ReLu"),
                                    Linear(2, 2, "I")]

    def __call__(self, x_original: list):
        x = deepcopy(x_original)
        for layer in self.layers:
            x = layer(x)
        return x

    def forward(self, x_original):
        x = deepcopy(x_original)
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def zero_batch_grad(self):
        for layer in self.layers:
            layer.zero_batch_grad()

    def zero_grad(self):
        for layer in self.layers:
            layer.zero_grad()

    def add_grad_to_batch_grad(self):
        for layer in self.layers:
            layer.add_grad_to_batch_grad()

    def normalize_batch_grad(self, batch_size: int):
        for layer in self.layers:
            layer.normalize_batch_grad(batch_size)

    def backward(self, loss_grad: list[float] = None):
        self.zero_grad()

        linear = self.layers[-1]

        if loss_grad is None:
            loss_grad = [1 for i in range(len(linear.layer))]

        self.layers.reverse()
        try:
            linear = self.layers[0]

            linear.backward_from_loss_function(loss_grad)

            for i in range(1, len(self.layers)):
                linear = self.layers[i]
                previous_linear = self.layers[i - 1]

                linear.backward_from_previous_layer(previous_linear)

        except Exception as e:
            self.layers.reverse()
            raise e

        self.layers.reverse()

    def print_values(self, type: str):
        self.layers.reverse()
        try:
            match type:
                case "backward_grad":
                    print("Backward_grad:")
                    for layer in self.layers:
                        layer.print_backward_grad()
                case "grad":
                    print("Weight_Bias_grad:")
                    for layer in self.layers:
                        layer.print_grad()
                case "forward":
                    print("Forward_X:")
                    for layer in self.layers:
                        layer.print_forward()
                case "batch":
                    print("Batch_Weight_Bias_grad")
                    for layer in self.layers:
                        layer.print_batch_grad()
                case "all":
                    print("Backward_grad:")
                    for layer in self.layers:
                        layer.print_backward_grad()
                    print("Weight_Bias_grad:")
                    for layer in self.layers:
                        layer.print_grad()
                    print("Batch_Weight_Bias_grad")
                    for layer in self.layers:
                        layer.print_batch_grad()
                    print("Forward_X:")
                    for layer in self.layers:
                        layer.print_forward()
                case _:
                    print(f"{type} is not correct type of print.")


        except Exception as e:
            self.layers.reverse()
            raise e

        self.layers.reverse()


class Optimizer(ABC):

    @abstractmethod
    def __init__(self):
        pass

    @abstractmethod
    def step(self):
        pass


class SGD(Optimizer):
    def __init__(self, model: Model, learning_rate: float = 1e-3):
        self.model = model
        self.learning_rate = learning_rate

    def step(self):
        for layer in self.model.layers:
            for perceptron in layer.layer:
                perceptron.weight = [w - batch_w * self.learning_rate for w, batch_w in
                                     zip(perceptron.weight, perceptron.batch_grad_weight)]
                perceptron.bias -= perceptron.batch_grad_bias * self.learning_rate



# Testing on Mnist
## Fetching and manipulate data:

In [50]:
from sklearn.datasets import fetch_openml


def one_hot_encode(sample: float):
    one_hot_sample = [0 for i in range(10)]
    sample = int(sample)
    one_hot_sample[sample] = 1
    return one_hot_sample


def normalize_in_sample(sample: list[float]):
    output = []
    for i in range(len(sample)):
        output.append(sample[i] / 255.0)
    return output


# Pobranie zbioru danych MNIST
mnist = fetch_openml('mnist_784', version=1)

X = list([list(normalize_in_sample(sample)) for sample in mnist["data"].to_numpy()])
y = list([one_hot_encode(sample) for sample in mnist["target"].to_numpy()])


In [52]:
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]


In [231]:
y_true = []
for idx in range(len(y_test)):
    y_test_idx = y_test[idx].index(max(y_test[idx]))
    y_true.append(y_test_idx)

## Building and training model

In [132]:
class MyModel(Model):
    def __init__(self, input_size=len(X_train[0]), n_neurons_in_layer=100, n_layers=5, output_size=len(y_train[0])):
        self.layers: list[Layer] = [Linear(input_size, n_neurons_in_layer, "ReLu")]

        self.layers += [Linear(n_neurons_in_layer, n_neurons_in_layer, "ReLu") for i in range(n_layers - 2)]

        self.layers.append(Linear(n_neurons_in_layer, output_size, "I"))

    def train(self, X, y, epochs=1, batch_size=64):
        criterion = SoftmaxCrossEntropy()
        # criterion = MSE()
        optimizer = SGD(self, 0.01)

        X_batches = [X[i * batch_size: min(i * batch_size + batch_size, len(X))] for i in
                     range(math.ceil(len(X) / batch_size))]
        y_batches = [y[i * batch_size: min(i * batch_size + batch_size, len(y))] for i in
                     range(math.ceil(len(X) / batch_size))]

        for i in range(epochs):
            j = 0
            for X_train, y_train in zip(X_batches, y_batches):
                print(j)

                self.zero_batch_grad()
                for X_sample, y_sample in zip(X_train, y_train):
                    y_pred_sample = self.forward(X_sample)
                    loss_grad = criterion.sample_loss_grad(y_sample, y_pred_sample)
                    self.backward(loss_grad)
                    self.add_grad_to_batch_grad()
                    print(j)
                    j += 1
                self.normalize_batch_grad(len(X_train))
                optimizer.step()


model = MyModel()
softmax = SoftMax()

In [238]:
model.train(X_train[16000:17000], y_train[16000:17000], epochs=1, batch_size=64)

0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
2

In [239]:
y_pred = []
for idx in range(len(X_test)):
    y_pred_idx = softmax(model(X_test[idx]))
    y_pred_idx = y_pred_idx.index(max(y_pred_idx))
    y_pred.append(y_pred_idx)

In [240]:
score = [1 if i == j else 0 for i, j in zip(y_pred, y_true)]
sum(score) / len(score)

0.8079

In [241]:
state_list = []

for layer in model.layers:
    layer_state_list = []
    for perceptron in layer.layer:
        layer_state_list.append([perceptron.weight, perceptron.bias])
    state_list.append(layer_state_list)
state_list = deepcopy(state_list)


In [242]:
# with open("state_list_1___k", "w") as f:
#     json.dump(state_list, f)

In [223]:
# with open("state_list_1_17k", "r") as f:
#    state_list_ = json.load(f)


In [191]:
# idx = 54
# y_pred_idx = softmax(model(X_train[idx]))
# y_pred_idx, y_train[idx]
#
# y_pred_idx.index(max(y_pred_idx))

0.784

In [229]:
for i, j in zip(y_pred, y_true):
    print(i, j)

7 7
2 2
1 1
0 0
4 4
1 1
4 4
9 9
6 5
9 9
0 0
0 6
9 9
0 0
1 1
5 5
9 9
7 7
8 3
4 4
7 9
6 6
6 6
5 5
7 4
0 0
7 7
4 4
0 0
1 1
3 3
1 1
3 3
6 4
7 7
2 2
7 7
1 1
3 2
1 1
1 1
7 7
4 4
1 2
3 3
3 5
3 1
2 2
4 4
4 4
6 6
3 3
5 5
5 5
2 6
8 0
4 4
1 1
9 9
7 5
7 7
6 8
9 9
2 3
7 7
9 4
2 6
4 4
3 3
0 0
7 7
0 0
2 2
7 9
1 1
7 7
3 3
7 2
7 9
7 7
9 7
6 6
2 2
7 7
8 8
4 4
7 7
3 3
6 6
1 1
3 3
6 6
7 9
3 3
1 1
4 4
1 1
1 7
6 6
4 9
6 6
0 0
5 5
4 4
4 9
9 9
2 2
1 1
9 9
4 4
3 8
1 7
3 3
9 9
7 7
4 4
9 4
4 4
9 9
7 2
5 5
4 4
7 7
6 6
4 7
4 9
0 0
5 5
8 8
5 5
6 6
6 6
5 5
7 7
8 8
1 1
0 0
1 1
6 6
4 4
6 6
7 7
3 3
1 1
7 7
1 1
8 8
2 2
0 0
2 2
9 9
8 9
5 5
5 5
1 1
5 5
6 6
6 0
3 3
4 4
4 4
6 6
5 5
4 4
6 6
5 5
4 4
2 5
1 1
9 4
4 4
7 7
3 2
3 3
2 2
1 7
1 1
8 8
1 1
8 8
1 1
8 8
8 5
0 0
3 8
4 9
2 2
5 5
0 0
1 1
1 1
3 1
0 0
8 9
0 0
3 3
1 1
6 6
9 4
2 2
3 3
6 6
1 1
1 1
1 1
3 3
9 9
5 5
2 2
9 9
4 4
7 5
9 9
3 3
9 9
5 0
3 3
8 6
7 5
3 5
7 7
2 2
6 2
7 7
1 1
2 2
8 8
7 4
1 1
7 7
3 3
3 3
8 8
7 8
7 7
9 9
2 2
2 2
4 4
1 1
5 5
8 9
7 8
9 7
1 2
0 3
0 0
2 4
4 4
1 2


In [1003]:
softmax = SoftMax()
X_sample = [1, 2]
y_true_sample = [1, 0]
y_pred_sample = softmax(model(X_sample))
print(y_pred_sample)
model.train(X_sample, y_true_sample)
y_pred_sample = softmax(model(X_sample))
print(y_pred_sample)


[0.1513408678029315, 0.8486591321970686]
[0.9988768091391338, 0.001123190860866108]


In [677]:
model.print_values("all")

Backward_grad:
[[-0.0704169225295599, -0.20019613722631815, -0.10421513966430072, -0.018232503849194278], [0.03812876083519064, -0.05338942538639418, 0.1526189953351175, -0.03379038140792716]]
[[0.028485830153055627, -0.026679090226682106, -0.03190396275626911, 0.00895377536049324], [0.052156511464427034, -0.36980508869310574, -0.07164481543081117, 0.18113445333869466], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]
Weight_Bias_grad:
[[[-0.24679333634383924, -0.016455746593804672, 0.0, 0.0], -0.17990025328573633], [[0.2467933363438393, 0.016455746593804672, 0.0, 0.0], 0.17990025328573636]]
[[[0.0, 0.0, 0.0, 0.0], -0.03228816169436926], [[0.0, 0.0, 0.0, 0.0], -0.2535855626127123], [[0.0, 0.0, 0.0, 0.0], 0.0], [[0.0, 0.0, 0.0, 0.0], 0.0]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
Batch_Weight_Bias_grad
[[[-0.24679333634383924, -0.016455746593804672, 0.0, 0.0], -0.17990025328573633], [[0.2467933363438393, 0.0

In [116]:
print(model([1, 2]))
print(model.forward([1, 2]))
model.backward()

[138, 138]
[138, 138]


In [88]:
model.add_grad_to_batch_grad()
model.add_grad_to_batch_grad()
model.add_grad_to_batch_grad()
model.normalize_batch_grad(3)

In [89]:
model.print_values("all")

Backward_grad
[[2, 2], [2, 2]]
[[8, 8], [8, 8]]
[[32, 32], [32, 32]]
Grads:
[[[34, 34], 1], [[34, 34], 1]]
[[[32, 32], 4], [[32, 32], 4]]
[[[16, 32], 16], [[16, 32], 16]]
Batch_grad:
[[[45.333333333333336, 45.333333333333336], 1.3333333333333333], [[45.333333333333336, 45.333333333333336], 1.3333333333333333]]
[[[42.666666666666664, 42.666666666666664], 5.333333333333333], [[42.666666666666664, 42.666666666666664], 5.333333333333333]]
[[[21.333333333333332, 42.666666666666664], 21.333333333333332], [[21.333333333333332, 42.666666666666664], 21.333333333333332]]
Forward
[138, 138]
[34, 34]
[8, 8]


In [517]:
lis = [Perceptron(2), Perceptron(1)]

for i in range(len(lis)):
    lis[i].backward_grad = 100

lis[0].backward_grad

100

In [None]:
# model_layerr = deepcopy(model.layers)

# for layer, layer_old in zip(model.layers, model_layerr ):
#     for perceptron, perceptron_old in zip(layer.layer, layer_old.layer):
#         perceptron.weight = deepcopy(perceptron_old.weight)
#         perceptron.bias = deepcopy(perceptron_old.bias)
#
#

# model.layers = deepcopy(model_layerr)
# model.layers[2].activation_function = SoftMax()