In [None]:
import torch
import torch.nn as nn
import numpy as np

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
W1_np = np.array([
    [-0.34108091502837, 0.4875444774924518, -0.5786687891714137],
    [-0.7406084700295616, -0.5419469493214886, -0.7157868250555495],
    [-0.4315505068114813, -0.3888294993216346, -0.7260616380268912],
    [0.8544519713956533, -0.24436625968891335, -0.29745436944045234]
])
b1_np = np.array([
    [0.42066411352123567],
    [-0.06842417572242421],
    [0.3917250492343052],
    [0.07861738679252617]
])

W2_np = np.array([
    [0.8240668394757076, -0.7925535106179675, 0.856718957818575, 0.38940887530348656],
    [-0.8401491947426882, 0.6005090770290116, -0.710761829952594, -0.5376371931922113],
    [-0.9617475006414558, -0.1484109682541901, 0.315131084013728, 0.843811249537862],
    [0.7589476929906058, -0.6751635852663345, -0.05422983414165872, 0.43000805343850246]
])
b2_np = np.array([
    [0.2807627460701372],
    [0.6705844066805486],
    [0.2750476238785533],
    [0.20918398742213418]
])

W3_np = np.array([
    [-0.3547048938594579, -0.3011280508308256, 0.5691627652457749, -0.9665772078201607]
])
b3_np = np.array([[0.9979425023631463]])

W1 = torch.tensor(W1_np, dtype=torch.float32)
b1 = torch.tensor(b1_np, dtype=torch.float32)
W2 = torch.tensor(W2_np, dtype=torch.float32)
b2 = torch.tensor(b2_np, dtype=torch.float32)
W3 = torch.tensor(W3_np, dtype=torch.float32)
b3 = torch.tensor(b3_np, dtype=torch.float32)

In [None]:
X = torch.tensor([[2.5, 3.5, -0.5],
                  [4.0, -1.0, 0.5],
                  [0.5, 1.5, 1.0],
                  [3.0, 2.0, -1.5]], dtype=torch.float32).to(device)
y = torch.tensor([[1.0], [-1.0], [-1.0], [1.0]], dtype=torch.float32).to(device)

In [None]:
class MLP(nn.Module):
    def __init__(self, W1, b1, W2, b2, W3, b3):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(3, 4)
        self.layer2 = nn.Linear(4, 4)
        self.output_layer = nn.Linear(4, 1)

        with torch.no_grad():
            self.layer1.weight = nn.Parameter(W1)
            self.layer1.bias = nn.Parameter(b1)
            self.layer2.weight = nn.Parameter(W2)
            self.layer2.bias = nn.Parameter(b2)
            self.output_layer.weight = nn.Parameter(W3)
            self.output_layer.bias = nn.Parameter(b3)

    def forward(self, x):
        x = torch.tanh(self.layer1(x))
        x = torch.tanh(self.layer2(x))
        x = torch.tanh(self.output_layer(x))
        return x

In [None]:
model = MLP(W1, b1, W2, b2, W3, b3).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [None]:
def train_step(X, y):
    optimizer.zero_grad()
    y_pred = model(X)
    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()
    return loss.item()

In [None]:
print("Pesos y bias iniciales:")
for name, param in model.named_parameters():
    print(f"{name}: {param.data}")

Pesos y bias iniciales:
layer1.weight: tensor([[-0.3411,  0.4875, -0.5787],
        [-0.7406, -0.5419, -0.7158],
        [-0.4316, -0.3888, -0.7261],
        [ 0.8545, -0.2444, -0.2975]], device='cuda:0')
layer1.bias: tensor([[ 0.4207],
        [-0.0684],
        [ 0.3917],
        [ 0.0786]], device='cuda:0')
layer2.weight: tensor([[ 0.8241, -0.7926,  0.8567,  0.3894],
        [-0.8401,  0.6005, -0.7108, -0.5376],
        [-0.9617, -0.1484,  0.3151,  0.8438],
        [ 0.7589, -0.6752, -0.0542,  0.4300]], device='cuda:0')
layer2.bias: tensor([[0.2808],
        [0.6706],
        [0.2750],
        [0.2092]], device='cuda:0')
output_layer.weight: tensor([[-0.3547, -0.3011,  0.5692, -0.9666]], device='cuda:0')
output_layer.bias: tensor([[0.9979]], device='cuda:0')


In [None]:
loss = train_step(X, y)

print("\nPesos y bias después de la retropropagación:")
for name, param in model.named_parameters():
    print(f"{name}: {param.data}")

print(f"\nPérdida final: {loss}")


Pesos y bias después de la retropropagación:
layer1.weight: tensor([[-0.3652,  0.4924, -0.5307],
        [-0.7427, -0.5432, -0.7167],
        [-0.4137, -0.3822, -0.7352],
        [ 0.8516, -0.2657, -0.3195]], device='cuda:0')
layer1.bias: tensor([[ 0.4201],
        [-0.0663],
        [ 0.3961],
        [ 0.0718]], device='cuda:0')
layer2.weight: tensor([[ 0.8030, -0.8150,  0.8352,  0.4034],
        [-0.8521,  0.5926, -0.7187, -0.5403],
        [-0.9284, -0.1723,  0.2961,  0.8872],
        [ 0.7380, -0.7009, -0.0788,  0.4464]], device='cuda:0')
layer2.bias: tensor([[0.2956],
        [0.7166],
        [0.2862],
        [0.2188]], device='cuda:0')
output_layer.weight: tensor([[-0.3148, -0.4109,  0.5292, -0.9650]], device='cuda:0')
output_layer.bias: tensor([[0.9839]], device='cuda:0')

Pérdida final: 1.1816823482513428
