<a href="https://colab.research.google.com/github/Noodle96/Topicos_Inteligencia_Artificial/blob/main/introduccion_deep_learning_with_pytorch/05_using_derivates_to_update_the_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [68]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

**Modelo y descripcion del modelo**

In [69]:
# Create the model and run a forward pass
model = nn.Sequential(
    nn.Linear(5, 3),
    nn.Linear(3, 4),
    nn.Linear(4, 2)
)

print(model)
for name, param in model.named_parameters():
    print(f'''name: {name}, param: {param}, shape: {param.shape}\n''')


Sequential(
  (0): Linear(in_features=5, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=4, bias=True)
  (2): Linear(in_features=4, out_features=2, bias=True)
)
name: 0.weight, param: Parameter containing:
tensor([[ 0.4423,  0.1999,  0.1287,  0.3831,  0.3197],
        [ 0.3443, -0.3235,  0.3015,  0.3319,  0.1389],
        [-0.3534, -0.3499, -0.1805, -0.1696,  0.3065]], requires_grad=True), shape: torch.Size([3, 5])

name: 0.bias, param: Parameter containing:
tensor([-0.0097,  0.1617,  0.4442], requires_grad=True), shape: torch.Size([3])

name: 1.weight, param: Parameter containing:
tensor([[-0.4676,  0.4952, -0.2614],
        [ 0.5773, -0.2147,  0.2956],
        [ 0.4218,  0.2232,  0.4242],
        [-0.3239,  0.3971, -0.0646]], requires_grad=True), shape: torch.Size([4, 3])

name: 1.bias, param: Parameter containing:
tensor([-0.5539, -0.1653,  0.5445, -0.2593], requires_grad=True), shape: torch.Size([4])

name: 2.weight, param: Parameter containing:
tensor([[-0.076

In [70]:
sample = torch.randn(5)
sample

tensor([-1.9496, -0.0261,  1.7572,  0.6105, -0.4779])

In [71]:
prediction = model(sample)
prediction

tensor([ 0.1728, -0.8254], grad_fn=<ViewBackward0>)

In [72]:
grad_before = model[0].weight.grad
print(grad_before) # None

None


In [73]:
# Calculate the loss and compute the gradients
criterion = CrossEntropyLoss()
target = torch.randn(size=(2,))
print(target)

tensor([-0.2479, -1.1055])


In [74]:
loss = criterion(prediction, target)
print(loss)
loss.backward()

tensor(-1.5281, grad_fn=<DivBackward1>)


In [75]:
grad_after = model[0].weight.grad
print(grad_after) # None

tensor([[-0.0607, -0.0008,  0.0547,  0.0190, -0.0149],
        [ 0.0805,  0.0011, -0.0725, -0.0252,  0.0197],
        [ 0.0532,  0.0007, -0.0480, -0.0167,  0.0130]])


**Accedemos a la gradiente de cada capa**

In [77]:
# Access each layer's gradients
model[0].weight.grad, model[0].bias.grad

(tensor([[-0.0607, -0.0008,  0.0547,  0.0190, -0.0149],
         [ 0.0805,  0.0011, -0.0725, -0.0252,  0.0197],
         [ 0.0532,  0.0007, -0.0480, -0.0167,  0.0130]]),
 tensor([ 0.0311, -0.0413, -0.0273]))

In [78]:
model[1].weight.grad, model[1].bias.grad

(tensor([[-0.0434,  0.0126,  0.0438],
         [-0.2726,  0.0789,  0.2751],
         [ 0.1794, -0.0519, -0.1810],
         [-0.1349,  0.0390,  0.1361]]),
 tensor([ 0.0762,  0.4783, -0.3147,  0.2366]))

In [79]:
model[2].weight.grad, model[2].bias.grad

(tensor([[ 0.2639,  0.2666, -0.4334,  0.0343],
         [-0.2639, -0.2666,  0.4334, -0.0343]]),
 tensor([-0.7410,  0.7410]))

**Updating model parameter**
*   Update the weights by subtracting local gradients scaled by the **learning rate**

In [87]:
# Learning rate is typically small
lr = 0.001
# Update the weights
weight = model[0].weight
print("weight before: ", weight)
weight_grad = model[0].weight.grad
weight = weight - lr * weight_grad
print("weight after: ", weight)

weight before:  Parameter containing:
tensor([[ 0.4423,  0.1999,  0.1287,  0.3831,  0.3197],
        [ 0.3443, -0.3235,  0.3015,  0.3319,  0.1389],
        [-0.3534, -0.3499, -0.1805, -0.1696,  0.3065]], requires_grad=True)
weight after:  tensor([[ 0.4423,  0.1999,  0.1286,  0.3830,  0.3197],
        [ 0.3442, -0.3235,  0.3016,  0.3319,  0.1388],
        [-0.3535, -0.3499, -0.1804, -0.1696,  0.3065]], grad_fn=<SubBackward0>)


In [88]:
# Update the biases
bias = model[0].bias
print("bias before: ", bias)
bias_grad = model[0].bias.grad
bias = bias - lr * bias_grad
print("bias after: ", bias)


bias before:  Parameter containing:
tensor([-0.0097,  0.1617,  0.4442], requires_grad=True)
bias after:  tensor([-0.0097,  0.1617,  0.4442], grad_fn=<SubBackward0>)
