# **Verificacion con Micrograd**

In [21]:
import math

class Value:
    """ stores a single scalar value and its gradient """

    def __init__(self, data, _children=(), _op=''):
        self.data = data
        self.grad = 0
        # internal variables used for autograd graph construction
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op # the op that produced this node, for graphviz / debugging / etc

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = Value(self.data**other, (self,), f'**{other}')

        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad
        out._backward = _backward

        return out

    def relu(self):
        out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU')

        def _backward():
            self.grad += (out.data > 0) * out.grad
        out._backward = _backward

        return out

    ###---Added OmarP

    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out = Value(t, (self,), 'tanh')

        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward

        return out


    ####

    def backward(self):

        # topological order all of the children in the graph
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        # go one variable at a time and apply the chain rule to get its gradient
        self.grad = 1
        for v in reversed(topo):
            v._backward()

    def __neg__(self): # -self
        return self * -1

    def __radd__(self, other): # other + self
        return self + other

    def __sub__(self, other): # self - other
        return self + (-other)

    def __rsub__(self, other): # other - self
        return other + (-self)

    def __rmul__(self, other): # other * self
        return self * other

    def __truediv__(self, other): # self / other
        return self * other**-1

    def __rtruediv__(self, other): # other / self
        return other * self**-1

    def __repr__(self):
        return f"Value(data={self.data}, grad={self.grad})"


class Neuron:
    def __init__(self, num_inputs, weights=None, bias=None, activation='relu'):
        # Inicializar Pesos y valores de vias
        self.weights = [Value(w) for w in weights]
        self.bias = Value(bias)
        self.activation = activation

    def __call__(self, inputs):
        act = sum((w * x for w, x in zip(self.weights, inputs)), self.bias)
        # Aplicar la funcion de activacion correspondiente
        if self.activation == 'relu':
            return act.relu()
        elif self.activation == 'tanh':
            return act.tanh()
        else:
            return act

    def parameters(self):
        return self.weights + [self.bias]

class Layer:
    def __init__(self, num_inputs, num_neurons, weights_list=None, biases=None, activation='relu'):
        self.neurons = []
        for i in range(num_neurons):
            weights = weights_list[i]
            bias = biases[i]
            self.neurons.append(Neuron(num_inputs, weights=weights, bias=bias, activation=activation))

    def __call__(self, inputs):
        return [neuron(inputs) for neuron in self.neurons]

    def parameters(self):
        return [param for neuron in self.neurons for param in neuron.parameters()]

class MLP:
    def __init__(self, num_inputs, hidden_layer_sizes, weights=None, biases=None):
        sizes = [num_inputs] + hidden_layer_sizes + [1]
        self.layers = []

        # Crear capas con su respectiva funcion de activacion
        for i in range(len(sizes) - 1):
            # Usar tanh para la ultima capa, relu para capas ocultas
            activation = 'tanh' if i == len(sizes) - 2 else 'relu'
            layer_weights = weights[i]
            layer_biases = biases[i]
            self.layers.append(Layer(sizes[i], sizes[i + 1],
                                   weights_list=layer_weights,
                                   biases=layer_biases,
                                   activation=activation))

    def __call__(self, inputs):
        for layer in self.layers:
            inputs = layer(inputs)
        return inputs[0]

    def parameters(self):
        return [param for layer in self.layers for param in layer.parameters()]


In [23]:
def main():
    # Definicion de pesos y bias iniciales
    initial_weights = [
        # Primera capa (3 inputs -> 4 neurons)
        [
            [0.1, 0.5, -0.2],  # weights para primera neurona
            [-0.3, 0.4, 0.1],  # weights para segunda neurona
            [0.2, -0.1, -0.5],  # weights para tercera neurona
            [0.05, 0.3, 0.2]   # weights para  cuarta neurona
        ],
        # Segunda Capa (4 inputs -> 4 neuronas)
        [
            [0.1, 0.4, -0.1, 0.2],
            [-0.2, 0.3, 0.2, 0.4],
            [0.3, -0.1, -0.4, -0.1],
            [0.45, 0.2, 0.35, 0.3]
        ],
        # Capa de salida (4 inputs -> 1 neurona)
        [
            [0.5, -0.4, 0.4, -0.2]
        ]
    ]

    initial_biases = [
        [0.05, -0.07, 0.02, -0.1],  # biases para primera capa
        [0.05, 0.1, 0.3, -0.1],  # biases para segunda capa
        [0.1]                   # bias para ultima capa
    ]

    model = MLP(3, [4, 4], weights=initial_weights, biases=initial_biases)

    # Data de prueba
    X_s = [
        [2.5, 3.5, -0.5],
        [4.0, -1.0, 0.5],
        [0.5, 1.5, 1.0],
        [3.0, 2.0, -1.5]
    ]
    y_s = [1.0, -1.0, -1.0, 1.0]

    # Parametros de entrenamiento
    learning_rate = 0.01
    epochs = 10

    ##--- Valores de primer Forward
    y_pred_1 = [model([Value(x) for x in x_s]) for x_s in X_s]
    print("Valores de primer Forward antes de aplicar tanh:")
    for y_value in y_pred_1:
        print(y_value.data)



    # Entrenamiento
    for epoch in range(epochs):
        # Forward
        y_pred = [model([Value(x) for x in x_s]) for x_s in X_s]

        # Perdida
        loss = sum((y_pred_i - Value(y_s_i))**2 for y_pred_i, y_s_i in zip(y_pred, y_s)) / len(y_s)

        # Backward
        for param in model.parameters():
            param.grad = 0.0
        loss.backward()

        # Actualizar paramentros
        for param in model.parameters():
            param.data -= learning_rate * param.grad

        if (epoch + 1) % 1 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.data:.4f}')

    # Predicciones finales
    print("\nPrediciones finales (despues de tanh):")
    for x_s in X_s:
        pred = model([Value(x) for x in x_s])
        print(f"Prediccion: {pred.data:.4f}")

if __name__ == "__main__":
    main()

Valores de primer Forward antes de aplicar tanh:
0.25597836133221197
-0.007699847825942281
0.23266012946007794
0.011799452353168314
Epoch [1/10], Loss: 1.0086
Epoch [2/10], Loss: 0.9768
Epoch [3/10], Loss: 0.9490
Epoch [4/10], Loss: 0.9243
Epoch [5/10], Loss: 0.9020
Epoch [6/10], Loss: 0.8815
Epoch [7/10], Loss: 0.8624
Epoch [8/10], Loss: 0.8448
Epoch [9/10], Loss: 0.8301
Epoch [10/10], Loss: 0.8157

Prediciones finales (despues de tanh):
Prediccion: 0.4902
Prediccion: -0.0761
Prediccion: 0.2319
Prediccion: 0.2394


# **Verificacion con Pytorch:**

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim

#Data de prueba
X_s = torch.tensor([[2.5, 3.5, -0.5],
                    [4.0, -1.0, 0.5],
                    [0.5, 1.5, 1.0],
                    [3.0, 2.0, -1.5]], dtype=torch.float32)

y_s = torch.tensor([[1.0],
                    [-1.0],
                    [-1.0],
                    [1.0]], dtype=torch.float32)


class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(3, 4)
        self.layer2 = nn.Linear(4, 4)
        self.layer3 = nn.Linear(4, 1)

        self.activation = nn.ReLU()   # Activation function (ReLU for capas ocultas)
        self.output_activation = nn.Tanh()  # Tanh activation para la salida

    def forward(self, x):
        x = self.activation(self.layer1(x))
        x = self.activation(self.layer2(x))
        x = self.output_activation(self.layer3(x))
        return x

#
model = MLP()

# Funcion Loss y optimizador
criterion = nn.MSELoss()  # Mean Squared Error
optimizer = optim.SGD(model.parameters(), lr=0.01)  # Stochastic Gradient Descent con learning rate 0.01

# Forward propagation (initial predictions)
print("Predicciones iniciales:")
with torch.no_grad():
    predictions = model(X_s)
    print(predictions)

#
epochs = 1000

# Training
for epoch in range(epochs):
    # Forward
    y_pred = model(X_s)

    # Computar loss
    loss = criterion(y_pred, y_s)

    # Backward propagation
    optimizer.zero_grad()  #
    loss.backward()  # Backpropagate la perdida

    # actualizacion de pesos
    optimizer.step()  #

    #
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Final predictions after training
print("\nPredicciones finales despues de entrenamiento:")
with torch.no_grad():
    final_predictions = model(X_s)
    print(final_predictions)


Predicciones iniciales:
tensor([[-0.3817],
        [-0.6963],
        [-0.4377],
        [-0.3915]])
Epoch [100/1000], Loss: 0.3154
Epoch [200/1000], Loss: 0.1306
Epoch [300/1000], Loss: 0.0702
Epoch [400/1000], Loss: 0.0452
Epoch [500/1000], Loss: 0.0325
Epoch [600/1000], Loss: 0.0249
Epoch [700/1000], Loss: 0.0201
Epoch [800/1000], Loss: 0.0167
Epoch [900/1000], Loss: 0.0143
Epoch [1000/1000], Loss: 0.0124

Predicciones finales despues de entrenamiento:
tensor([[ 0.8525],
        [-0.9793],
        [-0.9249],
        [ 0.8525]])
