In [2]:
import torch.nn.functional as F # For binary cross entropy
from torch.autograd import grad
import torch

x1 = torch.tensor([1.1], requires_grad = True)
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad = True)
y = torch.tensor([1.0])

z = x1 * w1 + b

a = torch.sigmoid(z)
loss = F.binary_cross_entropy(a, y)

grad_l_w1 = grad(loss, w1, retain_graph=True)
grad_l_x1 = grad(loss, x1, retain_graph=True)
grad_l_b = grad(loss, b, retain_graph=True)

print(grad_l_w1, grad_l_x1, grad_l_b)

(tensor([-0.0898]),) (tensor([-0.1797]),) (tensor([-0.0817]),)


In [4]:
class NeuralNetwork(torch.nn.Module):
  def __init__(self, num_inputs, num_outputs):
    super().__init__()

    self.layers = torch.nn.Sequential(
        # 1st Hidden Layer
        torch.nn.Linear(num_inputs, 30),
        torch.nn.ReLU(),

        # 2nd Hidden Layer
        torch.nn.Linear(30, 20),
        torch.nn.ReLU(),

        # Output Layer
        torch.nn.Linear(20, num_outputs)
    )

  def forward(self, x):
    logits = self.layers(x)
    return logits

model = NeuralNetwork(50, 3)

In [5]:
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [10]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad == True)

print(f"Total number of parameters in the model are: {num_params}")

Total number of parameters in the model are: 2213


In [12]:
print(model.layers[0].weight)

Parameter containing:
tensor([[ 0.0714,  0.0363, -0.0687,  ..., -0.0429, -0.0765,  0.0044],
        [ 0.1295,  0.0685, -0.0509,  ..., -0.0074,  0.1233,  0.0608],
        [-0.0617,  0.1156, -0.0155,  ..., -0.1365,  0.1253, -0.0089],
        ...,
        [ 0.0893,  0.1142,  0.0590,  ..., -0.0798,  0.0860,  0.0544],
        [-0.0216,  0.0721, -0.1077,  ..., -0.0061, -0.0851,  0.0069],
        [ 0.0023,  0.0969,  0.0873,  ..., -0.0815, -0.1350, -0.0048]],
       requires_grad=True)


In [16]:
print(model.layers[0].weight.shape) # You can do this for all layers. Check the significance for this again

torch.Size([30, 50])


In [17]:
print(model.layers[0].bias)

Parameter containing:
tensor([-0.0010, -0.0828, -0.0295, -0.1003, -0.0781, -0.0969,  0.1121,  0.1172,
        -0.0983,  0.1357,  0.0099,  0.0482, -0.0004,  0.0936, -0.1108,  0.0384,
         0.1132,  0.0696,  0.0870,  0.0987, -0.0625, -0.0950,  0.1193, -0.0808,
         0.0698,  0.0223,  0.0603,  0.0678, -0.1373, -0.1292],
       requires_grad=True)


In [18]:
# While the weights are randomly assigned and are very small numbers we would want to have reproduicble numbers
# We can use manual_Seed for that

torch.manual_seed(123)

model = NeuralNetwork(50, 3)

print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=True)


In [20]:
# During inference, we do not need the grad module since we're just inferring it
# So it is a good practice to set it to no_grad() which can be efficient and save us some memory.

X = torch.rand((1, 50))
with torch.no_grad():
  output = model(X)

print(X)
print(output)

tensor([[0.5131, 0.6978, 0.4537, 0.9035, 0.5088, 0.7786, 0.9455, 0.6622, 0.5138,
         0.4649, 0.0926, 0.1094, 0.6668, 0.5465, 0.6295, 0.0483, 0.7799, 0.4483,
         0.6947, 0.2243, 0.6045, 0.7574, 0.1262, 0.5446, 0.3269, 0.9105, 0.3953,
         0.2075, 0.1796, 0.4544, 0.7271, 0.6692, 0.9545, 0.8872, 0.5824, 0.6379,
         0.2836, 0.6754, 0.8838, 0.4898, 0.5963, 0.0890, 0.7804, 0.9223, 0.9605,
         0.7099, 0.3075, 0.5226, 0.2881, 0.2615]])
tensor([[-0.1712,  0.0876, -0.1625]])


In [22]:
with torch.no_grad():
  output = torch.softmax(model(X), dim=1)

print(output)

tensor([[0.3026, 0.3921, 0.3053]])


In [26]:
class NeuralNetwork(torch.nn.Module):
  def __init__(self, input_size, output_size):
    super().__init__()

    self.layers = torch.nn.Sequential(
        # 1st Layer
        torch.nn.Linear(input_size, 30),
        torch.nn.ReLU(),

        # 2nd Hidden Layer
        torch.nn.Linear(30, 20),
        torch.nn.ReLU(),

        # 3rd Hidden Layer
        torch.nn.Linear(20, 10),
        torch.nn.ReLU(),

        # Output Layer
        torch.nn.Linear(10, output_size)
    )

  def forward(self, x):
    logits = self.layers(x)
    return logits


model = NeuralNetwork(50, 3)

print(model.layers)

Sequential(
  (0): Linear(in_features=50, out_features=30, bias=True)
  (1): ReLU()
  (2): Linear(in_features=30, out_features=20, bias=True)
  (3): ReLU()
  (4): Linear(in_features=20, out_features=10, bias=True)
  (5): ReLU()
  (6): Linear(in_features=10, out_features=3, bias=True)
)


In [31]:
# Model bias

print(model.layers[2].bias)
print(model.layers[2].weight)

Parameter containing:
tensor([ 0.1568, -0.0106, -0.1436, -0.0494,  0.0016, -0.1353,  0.0561, -0.0634,
         0.0907,  0.1818,  0.0049,  0.0597,  0.1344,  0.1318, -0.0168, -0.0886,
         0.1197,  0.1229,  0.0204,  0.0334], requires_grad=True)
Parameter containing:
tensor([[ 1.1081e-01,  7.2104e-02,  1.2603e-01,  6.0945e-02,  1.4608e-01,
          1.1479e-01, -6.7819e-02,  1.1978e-01, -1.7576e-01,  3.9393e-02,
          1.2595e-01,  1.4288e-01,  9.5834e-02,  1.1678e-01,  1.6803e-01,
         -1.1938e-01,  1.5281e-01, -6.3345e-02, -1.5574e-01,  4.1657e-02,
          3.7720e-02, -2.3873e-02,  4.5442e-02,  1.4656e-01,  6.4622e-02,
         -7.6131e-02, -1.6842e-01, -8.2066e-02,  3.8972e-02,  2.2720e-02],
        [-7.3953e-02,  1.5360e-01,  8.8083e-02,  5.6917e-02, -5.0214e-02,
          1.6878e-01,  2.5733e-02,  1.7308e-01,  4.9521e-02, -3.6096e-02,
         -1.5461e-01,  3.8970e-02,  2.9243e-02, -1.6099e-01,  6.7480e-02,
          9.0143e-02,  1.4435e-01, -1.0338e-01,  1.1309e-01, -1.

In [32]:
num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad == True)

print(num_parameters)

2393


In [33]:
with torch.no_grad():
  output = torch.softmax(model(X), dim=1)

print(output)

tensor([[0.4577, 0.2982, 0.2441]])
