In [1]:
import torch

### Optimizers

In [None]:
import torch.optim as optim

In [None]:
# SGD

sgd = optim.SGD()

## Artificial Neural Networks

```
nn.Linear
nn.Module
nn.Sequential
```

### `nn.Linear` -> Fully Connected Layer

In [9]:
import torch
import torch.nn as nn

layer1 = nn.Linear(in_features=3, out_features=2)

input1 = torch.tensor([5., 8, 78])

output1 = layer1(input1)

output1

tensor([-0.4326,  8.3574], grad_fn=<ViewBackward0>)

- PyTorch uses **Kaiming Uniform** (also known as **He initialization**) for weight initialization in the `nn.Linear` layers.
- The above code doesn't invlove any activation functions, so activation functions are not used.

In [10]:
layer1.weight

Parameter containing:
tensor([[-0.2250, -0.4629,  0.0513],
        [ 0.1548, -0.5298,  0.1502]], requires_grad=True)

In [13]:
layer1.weight.data

tensor([[-0.2250, -0.4629,  0.0513],
        [ 0.1548, -0.5298,  0.1502]])

In [12]:
type(layer1.weight)

torch.nn.parameter.Parameter

In [11]:
layer1.bias

Parameter containing:
tensor([0.3948, 0.1081], requires_grad=True)

Everytime, we run this code, the `output`, `weights` and `biases` all changes

### We can also pass in our custom weights

In [17]:
new_weights = torch.tensor([[1., 5., 7], [9, 8, 12]])
new_bias = torch.tensor([34., 67])

layer1.weight.data = new_weights
layer1.bias.data = new_bias

layer1(input1)

tensor([ 625., 1112.], grad_fn=<ViewBackward0>)

Whenever we assign weights from our side, eventhough we didn't explicitly set `requires_grad=True`, it would automatically be set when it is passes into the layer

In [18]:
new_weights.requires_grad

False

In [19]:
layer1.weight.requires_grad

True

#### We can also send in inputs as batches and get the output as batches 

In [20]:
batch_input = torch.tensor([[2., 5, 6], [4, 3, 12], [3, 5, 90], [45, 32, 12]])

batch_output = layer1(batch_input)

batch_output

tensor([[ 103.,  197.],
        [ 137.,  271.],
        [ 692., 1214.],
        [ 323.,  872.]], grad_fn=<AddmmBackward0>)

### `nn.Module`

If we wanted to create a model, we need to create a class for that model, that should inherit from the class `nn.Module`

In [23]:
class LinearRegressionModel(nn.Module):

    def __init__(self):
        super(LinearRegressionModel, self).__init__()
        self.layer1 = nn.Linear(in_features=1, out_features=1)

    def forward(self, x):
        return self.layer1(x)  

In the `__init__` function, we need to define the attributes which we will be using in our network

In [25]:
model = LinearRegressionModel()

X = torch.linspace(0, 10, 15).reshape(-1, 1) # reshaping into a single column to feed them as a batch

pred = model(X)

pred

tensor([[0.2982],
        [0.7075],
        [1.1167],
        [1.5259],
        [1.9352],
        [2.3444],
        [2.7536],
        [3.1628],
        [3.5721],
        [3.9813],
        [4.3905],
        [4.7998],
        [5.2090],
        [5.6182],
        [6.0275]], grad_fn=<AddmmBackward0>)

##### NOTE : There is no training happening here, the outputs are based on randomly initialized weights

We can access the weights and biases of each layer

In [26]:
model.layer1.weight

Parameter containing:
tensor([[0.5729]], requires_grad=True)

In [27]:
model.layer1.bias

Parameter containing:
tensor([0.2982], requires_grad=True)

#### Let us create a little more complicated architecture

In [28]:
class SimpleModel(nn.Module):

    def __init__(self):
        super(SimpleModel, self).__init__()
        self.layer1 = nn.Linear(10, 5) # We can pass in the atributes as positional arguments
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(5, 1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

In [29]:
net = SimpleModel()
net

SimpleModel(
  (layer1): Linear(in_features=10, out_features=5, bias=True)
  (relu): ReLU()
  (layer2): Linear(in_features=5, out_features=1, bias=True)
)

Let us test this architecture

In [30]:
input = torch.randn(1, 10) # This function samples random numbers from normal gaussian distribution

input

tensor([[-1.2133, -0.6530, -0.4642,  0.4322, -0.3934, -0.3582,  0.1788, -0.5381,
         -2.9398,  0.3000]])

In [31]:
output = net(input)

output

tensor([[0.1228]], grad_fn=<AddmmBackward0>)

#### Accessing `weights` and `biases`

In [33]:
print(net.layer1.weight)
print(net.layer1.bias)
print(net.layer2.weight)
print(net.layer2.bias)

Parameter containing:
tensor([[-0.0409, -0.2381, -0.1773, -0.0067,  0.2568, -0.1257, -0.1149,  0.0955,
         -0.1825, -0.1549],
        [ 0.1729,  0.1043, -0.1728, -0.1504,  0.2014,  0.2415,  0.1694,  0.1043,
          0.0391,  0.2934],
        [-0.0633,  0.2307,  0.1636, -0.0048, -0.1066, -0.0436,  0.1575, -0.2524,
          0.2767,  0.1501],
        [-0.1252, -0.2697,  0.1006,  0.0077,  0.2431,  0.2081, -0.2368,  0.1665,
          0.0122,  0.1779],
        [-0.1188,  0.2325,  0.0076,  0.0448, -0.2391,  0.3124, -0.0504,  0.2967,
         -0.0150, -0.0129]], requires_grad=True)
Parameter containing:
tensor([ 0.2813, -0.2151, -0.2072,  0.1852,  0.0900], requires_grad=True)
Parameter containing:
tensor([[ 0.1299, -0.2960,  0.4257, -0.2743, -0.0747]], requires_grad=True)
Parameter containing:
tensor([0.0531], requires_grad=True)


We can also access all the parameters of the architecture in a single function

In [34]:
net.parameters()

<generator object Module.parameters at 0x7f44538433e0>

This is a generator object, we can iterate this to print its components

In [35]:
for params in net.parameters():
    print(params)

Parameter containing:
tensor([[-0.0409, -0.2381, -0.1773, -0.0067,  0.2568, -0.1257, -0.1149,  0.0955,
         -0.1825, -0.1549],
        [ 0.1729,  0.1043, -0.1728, -0.1504,  0.2014,  0.2415,  0.1694,  0.1043,
          0.0391,  0.2934],
        [-0.0633,  0.2307,  0.1636, -0.0048, -0.1066, -0.0436,  0.1575, -0.2524,
          0.2767,  0.1501],
        [-0.1252, -0.2697,  0.1006,  0.0077,  0.2431,  0.2081, -0.2368,  0.1665,
          0.0122,  0.1779],
        [-0.1188,  0.2325,  0.0076,  0.0448, -0.2391,  0.3124, -0.0504,  0.2967,
         -0.0150, -0.0129]], requires_grad=True)
Parameter containing:
tensor([ 0.2813, -0.2151, -0.2072,  0.1852,  0.0900], requires_grad=True)
Parameter containing:
tensor([[ 0.1299, -0.2960,  0.4257, -0.2743, -0.0747]], requires_grad=True)
Parameter containing:
tensor([0.0531], requires_grad=True)


This is same as what we maually printed above, so the parameters function stores the parameters sequentially with respect to forward pass

Let us cross check the shape of each of the parameters

In [36]:
for params in net.parameters():
    print(params.shape)

torch.Size([5, 10])
torch.Size([5])
torch.Size([1, 5])
torch.Size([1])


### `nn.Sequential`

We could create same kind of neural net architecture using this `Sequential` module too

In [37]:
model = nn.Sequential(
    nn.Linear(10, 5),
    nn.ReLU(),
    nn.Linear(5, 1)
)

print(model)

Sequential(
  (0): Linear(in_features=10, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=1, bias=True)
)


There is no creation of class involved simple creation and calling similar to `TensorFlow`

In [39]:
input = torch.randn(1, 10)
output = model(input)
output

tensor([[0.0470]], grad_fn=<AddmmBackward0>)

Here we haven'y explicitly named the layers, so can't call these individual layers by name, but we can access it using its indices

In [44]:
print(model[0].weight)
print(model[0].bias)
print(model[2].weight)
print(model[2].bias)

Parameter containing:
tensor([[-0.0788,  0.1867, -0.2808, -0.0681,  0.2860,  0.1389, -0.1130,  0.2014,
         -0.1127, -0.0666],
        [ 0.1796, -0.0856,  0.2900, -0.2269, -0.2764, -0.0511, -0.0214, -0.0933,
         -0.1643,  0.1824],
        [-0.1410, -0.3136, -0.3014,  0.1758,  0.0503, -0.1323, -0.1685,  0.0956,
         -0.1987,  0.1828],
        [-0.3150, -0.1256,  0.2114, -0.2085,  0.2123,  0.0087, -0.0036,  0.0950,
         -0.0398,  0.0453],
        [-0.0447,  0.0477,  0.1345, -0.1228, -0.3094,  0.1953, -0.1709,  0.1493,
          0.1032, -0.0887]], requires_grad=True)
Parameter containing:
tensor([ 0.1045,  0.0719, -0.1628,  0.1493,  0.2043], requires_grad=True)
Parameter containing:
tensor([[-0.1367, -0.3759,  0.3436,  0.3287,  0.0239]], requires_grad=True)
Parameter containing:
tensor([0.0386], requires_grad=True)
