In [438]:
from abc import ABC, abstractmethod
import warnings
import random
import math
from copy import deepcopy

from typing import Literal

from numpy.f2py.auxfuncs import throw_error

In [542]:
class ActivationFunction(ABC):

    @abstractmethod
    def __call__(self, x: list):
        raise Exception("Not implemented call method in activation function.")

    @abstractmethod
    def ddx(self, x:list):
        raise Exception("Not implemented d/dx method in activation function.")


class ReLu(ActivationFunction):
    def __call__(self, x: list):
        return [max(0, x_i) for x_i in x]

    def ddx(self, x : list):

        return [1 if x_i >= 0 else 0 for x_i in x]


class SoftMax(ActivationFunction):

    # def __init__(self, output_size: int):

    def __call__(self, x:list):
        # return x
        ex = [math.exp(x_i) for x_i in x]
        return [ex_i / sum(ex) for ex_i in ex]

    def ddx(self, x:list):
        # return [[1,0],[0,1]]
        ex = [math.exp(x_i) for x_i in x]
        ddx=[]
        for i in range(len(x)):
            ddx.append([ ex[i]*(1-ex[i])/sum(ex)**2 if i == j else ex[i]*ex[j]/sum(ex)**2 for j in range(len(x)) ])
        print(ddx)
        return ddx



In [543]:
class Optimizer(ABC):
    pass

class SGD(Optimizer):


SyntaxError: incomplete input (1547674395.py, line 5)

In [544]:
class LossFunction(ABC):
    @abstractmethod
    def __call__(self, *args, **kwargs):
        pass

    def ddx(self):
        pass


class MSE(LossFunction):

    def __init__(self, sample_reduntion : Literal["mean", "sum"] = "mean", batch_reduction : Literal["mean", "sum"] = "mean"):
        self.sample_reduction = sample_reduntion
        self.batch_reduction = batch_reduction

    def __call__(self, y_pred: list[list], y_true: list[list]):
        if len(y_pred)!=len(y_true):
            raise Exception(f"y_pred len is {len(y_pred)} not y_true len {len(y_true)}.")


        batch_loss = 0
        for y_pred_sample, y_true_sample in zip(y_pred, y_true):

            if len(y_pred_sample)!=len(y_true_sample):
                raise Exception(f"In some sample y_pred_sample len is {len(y_pred_sample)} not y_true_sample len {len(y_true_sample)}.")

            sample_loss = 0
            for y_pred_sample_value, y_true_sample_value in zip(y_pred_sample, y_true_sample):
                sample_loss += (y_pred_sample_value - y_true_sample_value)**2
            if self.sample_reduction == "mean":
                sample_loss/=len(y_true_sample_value)
            batch_loss += sample_loss
        if self.batch_reduction == "mean":
            batch_loss /=len(y_pred)
        return batch_loss

    def batch_forward_backward( self, model, y_true : list[list], X: list[list]):

        if len(y_true)!=len(X):
            raise Exception(f"y_pred len is {len(y_true)} not y_true len {len(X)}.")

        model.zero_batch_grad()

        for y_true_sample, X_sample in zip(y_true, X):
            self.sample_forward_backward(model, y_true_sample, X_sample)
            model.add_grad_to_batch_grad()

        if self.batch_reduction == "mean":
            model.normalize_batch_grad(len(y_true))


    def sample_forward_backward(self, model, y_true_sample, X_sample):
        y_pred_sample = model(X_sample)


        if len(y_pred_sample) != len(y_true_sample):
            raise Exception(f"y_pred len is {len(y_pred_sample)}, not y_true len {len(y_true_sample)}")

        model.forward(X_sample)
        loss_grad = [2*(y_pred_sample[i] - y_true_sample[i]) for i in range(len(y_pred_sample))]


        if self.sample_reduction == "mean":
            loss_grad = [loss_grad[i] / len(y_pred_sample) for i in range (len(loss_grad))]

        # print(loss_grad)
        model.backward(loss_grad)





In [549]:
class Layer(ABC):

    def __init__(self):
        self.activation_function : ActivationFunction =None
        self.layer :list[Perceptron] = []

    @abstractmethod
    def __call__(self, x):
        pass


    def forward(self, inputs):
        outputs = [perceptron.forward(inputs) for perceptron in self.layer]
        return self.activation_function(outputs)

    def get_forward(self):
        return [perceptron.forward_value for perceptron in self.layer]
    def print_forward(self):
        print(self.get_forward())

    def print_grad(self):
        print([[perceptron.grad_weight, perceptron.grad_bias] for perceptron in self.layer])

    def print_backward_grad(self):
        print([perceptron.backward_grad for perceptron in self.layer])

    def print_batch_grad(self):
        print([[perceptron.batch_grad_weight, perceptron.batch_grad_bias] for perceptron in self.layer])


    def zero_batch_grad(self):
        for perceptron in self.layer:
            perceptron.zero_batch_grad()

    def zero_grad(self):
        for perceptron in self.layer:
            perceptron.zero_grad()

    def add_grad_to_batch_grad(self):
        for perceptron in self.layer:
            perceptron.add_grad_to_batch_grad()

    def normalize_batch_grad(self, batch_size: int):
        for perceptron in self.layer:
            perceptron.normalize_batch_grad(batch_size)

#-------------------------------------------------------------------------------------


class Perceptron():

    def __init__(self, input_size, initialization_type: str = "he"):

        self.bias = None
        self.weight = None

        match initialization_type:
            case "he":
                self.weight: list[float] = [random.gauss(mu=0, sigma=math.sqrt(2 / input_size)) for i in range(input_size)]
                self.bias: float = random.gauss(mu=0, sigma=math.sqrt(2 / input_size))
            case "glorot" | "xavier":
                self.weight: list[float] = [random.gauss(mu=0, sigma=math.sqrt(1 / input_size)) for i in range(input_size)]
                self.bias: float = random.gauss(mu=0, sigma=math.sqrt(1 / input_size))
            case _:
                print(f"{initialization_type} is not right initialization_type")


        self.grad_weight: list[float] = [0 for i in range(len(self.weight))]
        self.grad_bias: float = 0
        self.backward_grad: list[float] = [0 for i in range(len(self.weight))]  # gradient for following perceptron

        self.batch_grad_weight: list[float] = [0 for i in range(len(self.weight))]
        self.batch_grad_bias: float = 0

        self.forward_value: float = None
        self.forward_x: list[float] = None

        # self.weight = [2 for i in range(input_size)]
        # self.bias = 2
        # self.activate_function = ReLu()

    def __call__(self, x):

        if len(x) != len(self.weight):
            raise Exception("Size od weights not match input size.")
        output = [self.weight[i] * x[i] for i in range(len(x))]
        output.append(self.bias)
        return sum(output)

    def forward(self, x):
        self.forward_value = self(x)
        self.forward_x = x
        return self.forward_value

    def zero_batch_grad(self):
        self.batch_grad_weight = [0 for i in range(len(self.weight))]
        self.batch_grad_bias = 0

    def zero_grad(self):
        self.grad_weight = [0 for i in range(len(self.weight))]
        self.grad_bias = 0
        self.backward_grad = [0 for i in range(len(self.weight))]  # grad z poprzedniego

    def ddw_i(self, i):
        return self.forward_x[i]

    def ddx_i(self, i):
        return self.weight[i]

    def ddw(self, previous_grad):
        for i in range(len(self.grad_weight)):
            self.grad_weight[i] += self.ddw_i(i) * previous_grad

    def ddb(self, previous_grad):
        self.grad_bias += previous_grad

    def calculate_backward_grad(self, previous_grad):

        # self.backward_grad = ([self.backward_grad[i] + self.ddx_i(i) * previous_grad for i in range(len(self.grad_weight))])

        for i in range(len(self.backward_grad)):
            self.backward_grad[i] += self.ddx_i(i) * previous_grad

    def add_grad_to_batch_grad(self):
        self.batch_grad_bias += self.grad_bias
        for i in range(len(self.grad_weight)):
            self.batch_grad_weight[i] += self.grad_weight[i]

    def normalize_batch_grad(self, batch_size: int):
        self.batch_grad_bias /= batch_size
        for i in range(len(self.grad_weight)):
            self.batch_grad_weight[i] /= batch_size




#-------------------------------------------------------------------------------------

class Softmax_layer(Layer):
    def __init__(self, input_size: int, output_size: int):
        self.activation_function = SoftMax()
        self.layer = [Perceptron(input_size= input_size) for i in range(output_size)]

    def __call__(self, inputs:list ):
        outputs = [perceptron(inputs) for perceptron in self.layer]
        return self.activation_function(outputs)

    def backward_from_loss_function(self, losses_grads : list[float]):
        # print(self.get_forward())
        activation_function_grad = self.activation_function.ddx(self.get_forward())
        # print("xd",activation_function_grad)
        for i in range(len(losses_grads)):
            losses_grad = losses_grads[i]
            # print(losses_grad)
            for j in range(len(self.layer)):
                perceptron = self.layer[j]
                perceptron.ddb(losses_grad * activation_function_grad[i][j])
                perceptron.ddw(losses_grad * activation_function_grad[i][j])
                perceptron.calculate_backward_grad(losses_grad * activation_function_grad[i][j])
                print(perceptron.backward_grad, perceptron.grad_weight, perceptron.grad_bias)

#-------------------------------------------------------------------------------------


class Linear(Layer):

    def __init__(self, input_size: int, output_size: int, activation_function: str):

        self.activation_function: ActivationFunction = None
        self.layer: list[Perceptron] = []

        match activation_function:
            case "ReLu":
                self.activation_function = ReLu()
                self.layer = [Perceptron(input_size=input_size, initialization_type="he") for i in range(output_size)]
            # case "Sigmoid":
            #     self.activation_function = ReLu()
            #     self.layers = [Perceptron() for i in range(output_size)]
            # case "Tanh":
            #     self.activation_function = ReLu()
            #     self.layers = [Perceptron() for i in range(output_size)]
            # case "LeakyReLu":
            #     self.activation_function = ReLu()
            #     self.layers = [Perceptron() for i in range(output_size)]

            # case "SoftMax":
            #     self.activation_function = SoftMax()
            #     self.layer = [Perceptron(input_size=input_size) for i in range(output_size)]

            case _:
                warnings.warn(f"{activation_function} is not a activation function.")
                self.activation_function = lambda x: x
                self.layer = [Perceptron(input_size=input_size) for i in range(output_size)]

    def __call__(self, inputs: list):
        outputs = [perceptron(inputs) for perceptron in self.layer]
        return self.activation_function(outputs)
        # return [self.activation_function(output) for output in outputs]


    # def forward(self, inputs):
    #     outputs = [perceptron.forward(inputs) for perceptron in self.layer]
    #     return self.activation_function(outputs)

    def backward_from_loss_function(self, losses_grads : list[float]):
        for perceptron, previous_grad, d_activation_dx in zip(self.layer,losses_grads, self.activation_function.ddx(self.get_forward())):
            perceptron.ddb(previous_grad)
            perceptron.ddw(previous_grad)
            perceptron.calculate_backward_grad(previous_grad)


    def backward_from_previous_layer(self, previous_layer: "Linear"):
        activation_function_grad = self.activation_function.ddx(self.get_forward())

        for previous_perceptron in previous_layer.layer:
            self.backward_from_previous_perceptron(previous_perceptron, activation_function_grad)

    def backward_from_previous_perceptron(self, previous_perceptron: Perceptron, activation_function_grad : list[float]):
        for perceptron, previous_grad, d_activation_dx in zip( self.layer, previous_perceptron.backward_grad, activation_function_grad):
            previous_grad = previous_grad * d_activation_dx
            perceptron.ddb(previous_grad )
            perceptron.ddw(previous_grad )
            perceptron.calculate_backward_grad(previous_grad)

    # def zero_batch_grad(self):
    #     for perceptron in self.layer:
    #         perceptron.zero_batch_grad()
    #
    # def zero_grad(self):
    #     for perceptron in self.layer:
    #         perceptron.zero_grad()
    #
    # def add_grad_to_batch_grad(self):
    #     for perceptron in self.layer:
    #         perceptron.add_grad_to_batch_grad()
    #
    # def normalize_batch_grad(self, batch_size: int):
    #     for perceptron in self.layer:
    #         perceptron.normalize_batch_grad(batch_size)



    # def get_forward(self):
    #     return [perceptron.forward_value for perceptron in self.layer]
    # def print_forward(self):
    #     print(self.get_forward())
    #
    # def print_grad(self):
    #     print([[perceptron.grad_weight, perceptron.grad_bias] for perceptron in self.layer])
    #
    # def print_backward_grad(self):
    #     print([perceptron.backward_grad for perceptron in self.layer])
    #
    # def print_batch_grad(self):
    #     print([[perceptron.batch_grad_weight, perceptron.batch_grad_bias] for perceptron in self.layer])



In [550]:
class Model:
    def __init__(self):
        self.layers: list[Layer] = [Linear(2, 2, "ReLu"),
                                    Linear(2, 2, "ReLu"),
                                    Softmax_layer(2,2)]

    def __call__(self, x_original: list):
        x = deepcopy(x_original)
        for layer in self.layers:
            x = layer(x)
        return x

    def forward(self, x_original):
        x = deepcopy(x_original)
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def zero_batch_grad(self):
        for layer in self.layers:
            layer.zero_batch_grad()

    def zero_grad(self):
        for layer in self.layers:
            layer.zero_grad()

    def add_grad_to_batch_grad(self):
        for layer in self.layers:
            layer.add_grad_to_batch_grad()

    def normalize_batch_grad(self, batch_size: int):
        for layer in self.layers:
            layer.normalize_batch_grad(batch_size)

    # def backward(self):
    #     self.zero_grad()
    #
    #     for perceptron in self.layers[len(self.layers) - 1].layer:
    #         x = perceptron.forward_value
    #         previous_grad = self.layers[len(self.layers) - 1].activation_function.ddx(x)
    #         perceptron.ddb(previous_grad)
    #         perceptron.ddw(previous_grad)
    #         perceptron.calculate_backward_grad(previous_grad)
    #
    #     self.layers.reverse()
    #     try:
    #         for i in range(1, len(self.layers)):
    #             linear = self.layers[i]
    #             previous_linear = self.layers[i - 1]
    #             for j in range(len(linear.layer)):
    #                 perceptron = linear.layer[j]
    #                 for k in range(len(previous_linear.layer)):
    #                     previous_perceptron = previous_linear.layer[k]
    #                     previous_grad = previous_perceptron.backward_grad[j]* linear.activation_function.ddx(perceptron.forward_value)
    #                     # previous_grad *= d_a
    #                     # print(previous_grad)
    #                     # 1/0
    #
    #                     perceptron.ddb(previous_grad)
    #                     perceptron.ddw(previous_grad)  # TODO: domnóż to przez pochodną f aktywacji
    #                     perceptron.calculate_backward_grad(previous_grad)
    #
    #     except Exception as e:
    #         self.layers.reverse()
    #         raise e
    #
    #     self.layers.reverse()

    def backward(self, loss_grad:list[float] = None):
        self.zero_grad()

        linear = self.layers[-1]

        if loss_grad is None:
            loss_grad = [1 for i in range(len(linear.layer))]


        # activation_function_grad = linear.activation_function.ddx(linear.get_forward())
        # for perceptron, loss_grad, d_activation_dx  in zip( linear.layer, loss_grad, activation_function_grad):
        #
        #     previous_grad = d_activation_dx *loss_grad
        #     perceptron.ddb(previous_grad)
        #     perceptron.ddw(previous_grad)
        #     perceptron.calculate_backward_grad(previous_grad)

        self.layers.reverse()
        try:
            linear = self.layers[0]

            linear.backward_from_loss_function(loss_grad)

            for i in range(1, len(self.layers)):
                linear = self.layers[i]
                previous_linear = self.layers[i - 1]

                linear.backward_from_previous_layer(previous_linear)

                # for j in range(len(linear.layer)):
                #     perceptron = linear.layer[j]
                #     for k in range(len(previous_linear.layer)):
                #         previous_perceptron = previous_linear.layer[k]
                #         previous_grad = previous_perceptron.backward_grad[j]* linear.activation_function.ddx(perceptron.forward_value)
                #         # previous_grad *= d_a
                #         # print(previous_grad)
                #         # 1/0
                #
                #         perceptron.ddb(previous_grad)
                #         perceptron.ddw(previous_grad)  # TODO: domnóż to przez pochodną f aktywacji
                #         perceptron.calculate_backward_grad(previous_grad)

        except Exception as e:
            self.layers.reverse()
            raise e

        self.layers.reverse()


    # def backward_from_loss(self, loss_grad:list[float] = None):
    #     self.zero_grad()
    #
    #     if loss_grad is None:
    #         loss_grad = [1 for i in range(len(self.layers[-1].layer))]
    #
    #
    #     for perceptron, loss_grad in zip( self.layers[len(self.layers) - 1].layer, loss_grad):
    #         x = perceptron.forward_value
    #         previous_grad = self.layers[len(self.layers) - 1].activation_function.ddx(x)*loss_grad
    #         perceptron.ddb(previous_grad)
    #         perceptron.ddw(previous_grad)
    #         perceptron.calculate_backward_grad(previous_grad)
    #
    #     self.layers.reverse()
    #     try:
    #         for i in range(1, len(self.layers)):
    #             linear = self.layers[i]
    #             previous_linear = self.layers[i - 1]
    #             for j in range(len(linear.layer)):
    #                 perceptron = linear.layer[j]
    #                 for k in range(len(previous_linear.layer)):
    #                     previous_perceptron = previous_linear.layer[k]
    #                     previous_grad = previous_perceptron.backward_grad[j]* linear.activation_function.ddx(perceptron.forward_value)
    #                     # previous_grad *= d_a
    #                     # print(previous_grad)
    #                     # 1/0
    #
    #                     perceptron.ddb(previous_grad)
    #                     perceptron.ddw(previous_grad)  # TODO: domnóż to przez pochodną f aktywacji
    #                     perceptron.calculate_backward_grad(previous_grad)
    #
    #     except Exception as e:
    #         self.layers.reverse()
    #         raise e
    #
    #     self.layers.reverse()

    def print_values(self, type: str):
        self.layers.reverse()
        try:
            match type:
                case "backward_grad":
                    for layer in self.layers:
                        print("Backward_grad")
                        layer.print_backward_grad()

                case "grad":
                    for layer in self.layers:
                        print("Grads:")
                        layer.print_grad()

                case "forward":
                    for layer in self.layers:
                        print("Forward")
                        layer.print_forward()

                case "batch":
                    for layer in self.layers:
                        print("Batch_rads:")
                        layer.print_batch_grad()
                case "all":

                    print("Backward_grad")
                    for layer in self.layers:
                        layer.print_backward_grad()
                    print("Grads:")
                    for layer in self.layers:
                        layer.print_grad()

                    print("Batch_grad:")
                    for layer in self.layers:
                        layer.print_batch_grad()

                    print("Forward")
                    for layer in self.layers:
                        layer.print_forward()
                case _:
                    print(f"{type} is not correct type of print.")


        except Exception as e:
            self.layers.reverse()
            raise e

        self.layers.reverse()


model = Model()
criterion = MSE()

In [551]:
X_sample = [1,2]
y_true_sample = [137, 137]
y_pred_sample = model(X_sample)
y_pred_sample

[0.004314354318419335, 0.9956856456815807]

In [552]:
criterion.batch_forward_backward(model, [y_true_sample], [X_sample])

[[0.002302932891856047, 0.004295740665234472], [0.004295740665234472, -0.45561319311990994]]
[-0.04184717820465668, 0.5407900663257422] [-0.15388457102309394, -0.9248821283873824] -0.31549187051581146
[-0.5550566197662258, -0.24298478818241737] [-0.28704623214760533, -1.7252147396532291] -0.5884979377898328
[-0.11934129729947271, 1.5422446829354013] [-0.4388535888956373, -2.6376123262372126] -0.8997311343349894
[57.88912988095613, 25.34187947909323] [29.937228064463692, 179.92971632045146] 61.37686200326518


In [553]:
model.print_values("all")

Backward_grad
[[-0.11934129729947271, 1.5422446829354013], [57.88912988095613, 25.34187947909323]]
[[19.496456824878184, -37.831724934770726], [20.551617122377387, -9.86862872174609]]
[[31.911667724248193, 112.52818793596916], [-78.51578431975193, 13.42318528315888]]
Grads:
[[[-0.4388535888956373, -2.6376123262372126], -0.8997311343349894], [[29.937228064463692, 179.92971632045146], 61.37686200326518]]
[[[246.64669918050464, 50.084538010163094], 57.76978858365666], [[114.78111046435643, 23.307666021890064], 26.88412416202863]]
[[[40.048073947255574, 80.09614789451115], 40.048073947255574], [[-47.70035365651682, -95.40070731303364], -47.70035365651682]]
Batch_grad:
[[[-0.4388535888956373, -2.6376123262372126], -0.8997311343349894], [[29.937228064463692, 179.92971632045146], 61.37686200326518]]
[[[246.64669918050464, 50.084538010163094], 57.76978858365666], [[114.78111046435643, 23.307666021890064], 26.88412416202863]]
[[[40.048073947255574, 80.09614789451115], 40.048073947255574], [[-47

In [502]:
model.print_values("all")

Backward_grad
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
Grads:
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
Batch_grad:
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
Forward
[138, 138]
[34, 34]
[8, 8]


In [503]:
X_sample_new = [1,1]
y_pred_sample_new = model(X_sample)
y_true_sample_new = [105,106]

criterion.batch_forward_backward(model, [y_true_sample, y_true_sample_new], [X_sample, X_sample_new])
model.print_values("all")

-136.5
-136.5
-104.5
-105.5
Backward_grad
[[-2.3521237068610334e+92, -2.3521237068610334e+92], [2.3521237068610334e+92, 2.3521237068610334e+92]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
Grads:
[[[-3.0577608189193546e+93, -3.0577608189193546e+93], -1.1760618534305167e+92], [[3.0577608189193546e+93, 3.0577608189193546e+93], 1.1760618534305167e+92]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
Batch_grad:
[[[-1.5288804094596773e+93, -1.5288804094596773e+93], -5.8803092671525835e+91], [[1.5288804094596773e+93, 1.5288804094596773e+93], 5.8803092671525835e+91]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
[[[0.0, 0.0], 0.0], [[0.0, 0.0], 0.0]]
Forward
[106, 106]
[26, 26]
[6, 6]


In [193]:
criterion.batch_forward_backward(model, [y_true_sample_new], [X_sample_new])
model.print_values("all")

Backward_grad
[[2.0, 2.0], [0.0, 0.0]]
[[4.0, 4.0], [4.0, 4.0]]
[[16.0, 16.0], [16.0, 16.0]]
Grads:
[[[26.0, 26.0], 1.0], [[0.0, 0.0], 0.0]]
[[[12.0, 12.0], 2.0], [[12.0, 12.0], 2.0]]
[[[8.0, 8.0], 8.0], [[8.0, 8.0], 8.0]]
Batch_grad:
[[[26.0, 26.0], 1.0], [[0.0, 0.0], 0.0]]
[[[12.0, 12.0], 2.0], [[12.0, 12.0], 2.0]]
[[[8.0, 8.0], 8.0], [[8.0, 8.0], 8.0]]
Forward
[106, 106]
[26, 26]
[6, 6]


In [116]:
print(model([1, 2]))
print(model.forward([1, 2]))
model.backward()

[138, 138]
[138, 138]


In [88]:
model.add_grad_to_batch_grad()
model.add_grad_to_batch_grad()
model.add_grad_to_batch_grad()
model.normalize_batch_grad(3)

In [89]:
model.print_values("all")

Backward_grad
[[2, 2], [2, 2]]
[[8, 8], [8, 8]]
[[32, 32], [32, 32]]
Grads:
[[[34, 34], 1], [[34, 34], 1]]
[[[32, 32], 4], [[32, 32], 4]]
[[[16, 32], 16], [[16, 32], 16]]
Batch_grad:
[[[45.333333333333336, 45.333333333333336], 1.3333333333333333], [[45.333333333333336, 45.333333333333336], 1.3333333333333333]]
[[[42.666666666666664, 42.666666666666664], 5.333333333333333], [[42.666666666666664, 42.666666666666664], 5.333333333333333]]
[[[21.333333333333332, 42.666666666666664], 21.333333333333332], [[21.333333333333332, 42.666666666666664], 21.333333333333332]]
Forward
[138, 138]
[34, 34]
[8, 8]


In [543]:
for i in range(len(model.layers)):
    number_layer = i
    print(i, model.layers[number_layer].layer[0].backward_grad, model.layers[number_layer].layer[0].grad_weight,
          model.layers[number_layer].layer[0].grad_bias)

0 [8, 8] [8, 16] 8
1 [4, 4] [16, 16] 2
2 [2, 2] [34, 34] 1


In [104]:
a = Perceptron(2)
a([1, 2])

-0.188139308930177

In [105]:
# a.forward([1,2])
# print(a.value)

-0.188139308930177


In [36]:
a.bias

-0.08249214176724436

In [39]:
a = None
warnings.warn(f"its {a}")



In [205]:
print([1, 2].reverse())

None


In [568]:
a = [1,2,3,]
b = [10, 100, 1000]
for i,j in zip(a,b):
    j *= i
    print(j)
print(b)

10
200
3000
[10, 100, 1000]


<zip at 0x10bfa3440>

In [232]:
a = "e"
match a:
    case "xd":
        print("jdslfn")
    case "x"| "d":
        print("kkkk")

In [229]:
a

('x', 'd')

In [517]:
lis = [Perceptron(2), Perceptron(1)]

for i in range(len(lis)):
    lis[i].backward_grad= 100

lis[0].backward_grad

100