<a href="https://colab.research.google.com/github/Dmitri9149/TensorFlow-PyTorch-basics/blob/master/PyTorch_Blocks_and_Layers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
import torch
from torch import nn
from torch.nn import functional as F

Some part of the code is based on d2l.ai book: http://d2l.ai/

In [45]:
### basing on the d2l.ai book 

In [46]:
net = nn.Sequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))

X=torch.rand(2,20)
net(X)

tensor([[-0.2804, -0.4991, -0.0462,  0.0339, -0.1904, -0.2431,  0.0452, -0.0949,
         -0.0366, -0.1548],
        [-0.1632, -0.3695, -0.0969, -0.0072, -0.1748, -0.1524, -0.0425, -0.0961,
         -0.0575, -0.2525]], grad_fn=<AddmmBackward>)

In [47]:
net.forward(X)

tensor([[-0.2804, -0.4991, -0.0462,  0.0339, -0.1904, -0.2431,  0.0452, -0.0949,
         -0.0366, -0.1548],
        [-0.1632, -0.3695, -0.0969, -0.0072, -0.1748, -0.1524, -0.0425, -0.0961,
         -0.0575, -0.2525]], grad_fn=<AddmmBackward>)

In [48]:
class MLP(nn.Module):

    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)  # Hidden layer
        self.out = nn.Linear(256, 10)  # Output layer

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))


In [49]:
net = MLP()
net(X)

tensor([[ 0.0216, -0.1294,  0.0151, -0.1098,  0.1248,  0.0929,  0.2009, -0.0610,
         -0.0437,  0.2804],
        [-0.0483, -0.1203, -0.1449, -0.2851,  0.0821, -0.0248,  0.2308, -0.0995,
         -0.0713,  0.2215]], grad_fn=<AddmmBackward>)

In [50]:
### Custom Sequential Block

In [51]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # Here, `block` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[block] = block

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._modules.values():
            X = block(X)
        return X



In [52]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[-0.2161, -0.0770, -0.2236,  0.1901,  0.1085,  0.0229,  0.1257,  0.0318,
          0.0776,  0.0261],
        [-0.1836, -0.0067, -0.0548,  0.1798,  0.1161,  0.0805,  0.0320, -0.0351,
         -0.0265,  0.0089]], grad_fn=<AddmmBackward>)

MyParallel model: several models use the same input, executed in paralle and the results of the models are concatenated in one output.

In [53]:
class MyParallel(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # Here, `block` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[block] = block

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        list_res=[]
        for block in self._modules.values():
          list_res.append(block(X))
        concat_fin=torch.cat(list_res, dim = -1)
        return concat_fin



In [54]:
list_models = [nn.Linear(20,20),nn.Linear(20,20),nn.Linear(20,20)]

In [55]:
net = MyParallel(nn.Linear(20,20),nn.Linear(20,20),nn.Linear(20,20))
#X = torch.tensor([1,10])
net(X)

tensor([[-0.1381, -0.9186,  0.1384, -0.3595,  0.1742, -0.1128,  0.0997, -0.1331,
         -0.1417, -0.0759,  0.0179, -0.0372, -0.1960,  0.1404, -0.2755, -0.7322,
         -0.1157,  0.5080,  0.2541,  0.0569,  0.0549, -0.5127,  0.4420,  0.2128,
          0.0402,  0.1178,  0.0161,  0.0668, -0.2954, -0.1269,  0.0177, -0.1807,
         -0.1120,  0.0484, -0.5433,  0.0505,  0.3008, -0.1256,  0.0512,  0.1543,
          0.1877,  0.4995, -0.0361,  0.0569, -0.2941,  0.5504, -0.1185,  0.1001,
         -0.2737,  0.3776, -0.4990,  0.7391, -0.7186, -0.2338,  0.4472, -0.6493,
         -0.1084, -0.0411, -0.4154,  0.1256],
        [-0.2359, -0.4456,  0.1814, -0.3587,  0.0990, -0.1007,  0.1236,  0.0292,
          0.0165, -0.0437,  0.1397,  0.0313, -0.0964,  0.2556,  0.0154, -0.6830,
         -0.2726,  0.0542, -0.1530,  0.2660,  0.4424, -0.5897,  0.3882,  0.1908,
         -0.2487,  0.2861, -0.1471,  0.2767, -0.3753, -0.2010,  0.0717, -0.3997,
         -0.0498,  0.0349, -0.1984,  0.2075,  0.0664,  0.2729, 

In [56]:
net = MyParallel(*list_models)
net(X)

tensor([[-4.3033e-01, -3.1271e-01, -3.6080e-01,  6.1834e-01,  1.1506e-01,
          6.7665e-02, -1.4845e-01, -9.3302e-02, -8.9880e-01,  4.1215e-01,
          5.8891e-01,  4.6084e-01,  3.9009e-02, -1.8367e-01,  1.6854e-01,
          5.3684e-02,  6.4910e-02,  5.4176e-01,  1.8570e-01,  2.6101e-01,
         -4.9938e-01, -4.7961e-01, -4.2152e-01, -1.4696e-01,  3.5790e-01,
         -5.5765e-01,  5.1654e-02,  1.7400e-01, -1.9274e-02,  2.2596e-01,
          1.5299e-01, -1.1310e-01,  6.4203e-01, -5.5497e-01, -1.2677e-01,
         -4.3918e-01,  4.8439e-01,  1.2280e-01, -3.7679e-01, -3.2435e-01,
          1.8381e-01, -2.1720e-01,  2.6337e-01, -2.8383e-01,  5.6105e-01,
         -5.2851e-01, -1.9461e-01,  1.8524e-03, -2.5237e-01,  3.1425e-01,
         -3.7881e-01, -4.0967e-01, -8.4522e-01,  3.6736e-01, -2.4191e-01,
         -4.1960e-01, -3.9213e-01,  1.3075e-01,  2.7511e-01, -3.4025e-01],
        [ 3.1133e-01,  9.1769e-02, -1.9732e-01,  6.4948e-01,  7.5939e-02,
          4.9867e-02, -9.3674e-02,  1

In [57]:
##### Nesting Modules
class NestedBlocks(nn.Module):
  def __init__(self,*args):
    super().__init__()
    self.net = nn.Sequential(nn.Linear(20,20),nn.Linear(20,20))
    self.linear = nn.Linear(20,20)

  def forward(self, X):
    return self.linear(self.net(X))



In [58]:
tensor = torch.ones([2,20])
net = NestedBlocks()
net(X)

tensor([[ 0.0506, -0.3565, -0.1180,  0.0240,  0.0812, -0.0535, -0.2745, -0.4324,
          0.1109, -0.1391,  0.1872, -0.1268, -0.0063,  0.0292, -0.0652, -0.1360,
         -0.1183, -0.0652, -0.3360, -0.1625],
        [ 0.0015, -0.3873, -0.0486,  0.0112, -0.0041, -0.1277, -0.2602, -0.4013,
          0.0392, -0.1294,  0.2155, -0.1048, -0.0680,  0.1119, -0.2208, -0.1639,
         -0.1498, -0.1305, -0.2400, -0.0779]], grad_fn=<AddmmBackward>)

Parameters access

In [59]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X=torch.rand((2,4))
net(X)

tensor([[-0.3345],
        [-0.4019]], grad_fn=<AddmmBackward>)

In [60]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.3482, -0.2223,  0.0215,  0.2871,  0.2900,  0.0554, -0.0178, -0.1432]])), ('bias', tensor([-0.3284]))])


In [61]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.3284], requires_grad=True)
tensor([-0.3284])


In [62]:
net[2].weight.grad == True

False

In [63]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [64]:
net.state_dict()['0.weight'].data

tensor([[-0.3192, -0.0769, -0.2671,  0.4910],
        [ 0.1247,  0.3919,  0.0449, -0.1196],
        [-0.2766,  0.2767,  0.4941, -0.4080],
        [ 0.4813, -0.1165,  0.1303, -0.4229],
        [ 0.3512, -0.3629,  0.3730,  0.4921],
        [ 0.4711,  0.3425, -0.4567, -0.3472],
        [-0.3913, -0.4363, -0.0530, -0.0809],
        [-0.2796, -0.2863,  0.1906, -0.4907]])

In [65]:
def block1():
  return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                        nn.Linear(8, 4), nn.ReLU())

def block2():
  net = nn.Sequential()
  for i in range(4):
    net.add_module(f'block {i}', block1())
  return net

rgnet = nn.Sequential(block2(), nn.Linear(4,1))
rgnet(X)

tensor([[0.2429],
        [0.2429]], grad_fn=<AddmmBackward>)

In [66]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [67]:
"""
net_prl = MyParallel(nn.Linear(4,4),nn.Linear(4,4),nn.Linear(4,4))
X=torch.rand(1,4)
net_prl(X)
print(net_prl)
"""

'\nnet_prl = MyParallel(nn.Linear(4,4),nn.Linear(4,4),nn.Linear(4,4))\nX=torch.rand(1,4)\nnet_prl(X)\nprint(net_prl)\n'

In [68]:
rgnet[0][1][0].bias.data

tensor([ 0.1703, -0.3748,  0.1369,  0.0237,  0.4212,  0.1085,  0.3136,  0.2844])

Parameter Initialization

In [69]:
def init_normal(m):
  if type(m)==nn.Linear:
    nn.init.normal_(m.weight, mean=0, std=0.01)
    nn.init.zeros_(m.bias)

net.apply(init_normal)
net[0].weight.data , net[0].bias.data

(tensor([[ 0.0068, -0.0011,  0.0145,  0.0170],
         [-0.0081,  0.0079,  0.0101, -0.0042],
         [-0.0034, -0.0111, -0.0122,  0.0064],
         [ 0.0046,  0.0180, -0.0065,  0.0004],
         [-0.0114,  0.0196, -0.0120, -0.0264],
         [ 0.0157, -0.0083,  0.0102,  0.0100],
         [ 0.0161,  0.0110, -0.0098,  0.0022],
         [-0.0102,  0.0005,  0.0042, -0.0065]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [70]:
def xavier(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        torch.nn.init.constant_(m.weight, 42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data)
print(net[2].weight.data)

tensor([[-0.5383,  0.6290,  0.4848,  0.0479],
        [ 0.0982,  0.3911,  0.3968,  0.1660],
        [-0.2841,  0.4731,  0.4776, -0.0694],
        [-0.5045,  0.2483, -0.3705, -0.1476],
        [-0.4987,  0.0047, -0.2438, -0.2830],
        [-0.4927, -0.2710, -0.4000,  0.0590],
        [-0.6501, -0.2696, -0.1490,  0.2906],
        [ 0.4186, -0.4583, -0.2008, -0.3266]])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [71]:
def my_init(m):
    if type(m) == nn.Linear:
        print("Init", *[(name, param.shape)
                        for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

net.apply(my_init)
net[0].weight

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


Parameter containing:
tensor([[ 0.0000, -0.0000,  5.7957,  0.0000],
        [-0.0000, -7.0012,  0.0000,  8.0639],
        [-0.0000,  0.0000, -0.0000,  0.0000],
        [ 9.7996, -0.0000, -7.3908, -9.0815],
        [ 7.9830,  9.9064, -9.3752, -0.0000],
        [ 0.0000, -0.0000, -6.8125, -0.0000],
        [-9.8202, -0.0000, -0.0000,  5.0924],
        [-0.0000,  5.4560,  0.0000, -0.0000]], requires_grad=True)

In [72]:
### we can set parameters
net[0].weight.data[:] += 1
net[0].weight.data[0, 0] = 42
net[0].weight.data[0]

tensor([42.0000,  1.0000,  6.7957,  1.0000])

In [73]:
### shared layers 
### give a name
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.Linear(8, 1))
### has same value
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])


In [74]:
### actually same objects
net[2].weight.data[0, 0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
