<a href="https://colab.research.google.com/github/Dmitri9149/TensorFlow-PyTorch-basics/blob/master/PyTorch_Blocks_and_Layers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [65]:
import torch
from torch import nn
from torch.nn import functional as F

Some part of the code is based on d2l.ai book: http://d2l.ai/

In [66]:
### basing on the d2l.ai book 

In [67]:
net = nn.Sequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))

X=torch.rand(2,20)
net(X)

tensor([[ 0.1508, -0.1937,  0.0299, -0.1798,  0.1195,  0.0633, -0.0769,  0.0463,
          0.1490,  0.0809],
        [ 0.1002, -0.1470, -0.1017, -0.0778,  0.0913,  0.0930, -0.0092,  0.1837,
          0.0524,  0.1845]], grad_fn=<AddmmBackward>)

In [68]:
net.forward(X)

tensor([[ 0.1508, -0.1937,  0.0299, -0.1798,  0.1195,  0.0633, -0.0769,  0.0463,
          0.1490,  0.0809],
        [ 0.1002, -0.1470, -0.1017, -0.0778,  0.0913,  0.0930, -0.0092,  0.1837,
          0.0524,  0.1845]], grad_fn=<AddmmBackward>)

In [69]:
class MLP(nn.Module):

    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)  # Hidden layer
        self.out = nn.Linear(256, 10)  # Output layer

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))


In [70]:
net = MLP()
net(X)

tensor([[ 0.0216, -0.2836,  0.1034, -0.0380, -0.1550, -0.1335, -0.0258,  0.3418,
         -0.4213, -0.3076],
        [ 0.0489, -0.1923,  0.0660, -0.0470, -0.1569, -0.2809, -0.0202,  0.2519,
         -0.3093, -0.2634]], grad_fn=<AddmmBackward>)

In [71]:
### Custom Sequential Block

In [72]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # Here, `block` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[block] = block

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._modules.values():
            X = block(X)
        return X



In [73]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.1372, -0.1042, -0.0640,  0.1020,  0.1535,  0.0882, -0.1206, -0.0757,
          0.3468, -0.0514],
        [ 0.1039,  0.0037, -0.0246,  0.1561,  0.1167,  0.1520, -0.1072, -0.0512,
          0.1491, -0.0059]], grad_fn=<AddmmBackward>)

MyParallel model: several models use the same input, executed in paralle and the results of the models are concatenated in one output.

In [74]:
class MyParallel(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # Here, `block` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[block] = block

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        list_res=[]
        for block in self._modules.values():
          list_res.append(block(X))
        concat_fin=torch.cat(list_res, dim = -1)
        return concat_fin



In [75]:
list_models = [nn.Linear(20,20),nn.Linear(20,20),nn.Linear(20,20)]

In [76]:
net = MyParallel(nn.Linear(20,20),nn.Linear(20,20),nn.Linear(20,20))
#X = torch.tensor([1,10])
net(X)

tensor([[ 0.4196, -0.1460, -0.3654,  0.3122, -0.1946,  0.2441,  0.0153, -0.4368,
          0.4916,  0.3368,  0.0068, -0.6394,  0.4057, -0.7122,  0.4469,  0.8249,
         -0.1506, -0.1320, -0.5006, -0.1041, -0.1681, -0.6907, -0.5793,  0.2690,
         -0.2322,  0.0701,  0.2385,  0.0569,  0.2528,  0.4342, -0.0402,  0.4694,
          0.1835, -0.1803, -0.5139, -0.3105, -0.6059, -0.3592,  0.2462,  0.3861,
          0.0352,  0.4877, -0.2610,  0.0430,  0.1298, -0.1540, -0.2390, -0.5215,
         -0.3234, -0.3593,  0.3615, -0.2322, -0.6519,  0.2396,  0.1803, -0.2625,
          0.0419,  0.9004,  0.0754,  0.0334],
        [ 0.5085, -0.0698, -0.0334,  0.3177, -0.1846,  0.2379, -0.0227, -0.4316,
          0.6455,  0.1079,  0.0677, -0.7253,  0.2702, -0.4876,  0.2787,  0.6003,
         -0.0291, -0.0032, -0.4148, -0.2535, -0.2153, -0.6866, -0.5551,  0.3684,
         -0.0129, -0.4252, -0.0010,  0.1776,  0.1193,  0.3327,  0.1844,  0.6265,
          0.5831, -0.3386, -0.6703, -0.5673, -0.2399, -0.2471, 

In [77]:
net = MyParallel(*list_models)
net(X)

tensor([[-0.1432,  0.1187,  0.0855,  0.0294,  0.1137, -0.3956,  0.1789, -0.4555,
         -0.8808, -0.4197,  0.4581, -0.4608,  0.1817,  0.3451, -0.6367, -0.2924,
         -0.3933, -0.2873, -0.1519, -1.0336, -0.0215,  0.1770, -0.4531, -0.0420,
          0.3889,  0.0556, -0.3836,  0.0686, -0.0863, -0.2474, -0.4251, -0.1388,
         -0.3776, -0.2114,  0.2643,  0.0231, -0.4615,  0.2453,  0.4637, -0.1754,
         -0.2522,  0.2329, -0.0707,  0.0289, -0.6230,  0.2822,  0.1822,  0.5659,
          0.1065, -0.0446,  0.0519, -0.6477,  0.7046,  0.0430,  0.1737, -0.2738,
          0.1528,  0.3511,  0.0858, -0.4835],
        [-0.3168,  0.3287, -0.2461, -0.0563, -0.0793, -0.1951,  0.1665, -0.4017,
         -0.7182, -0.3564,  0.3560, -0.2456,  0.0190,  0.3669, -0.2081, -0.3090,
         -0.1568, -0.3878, -0.1500, -0.9001, -0.1237,  0.1743, -0.3717,  0.2347,
          0.4466,  0.0402, -0.1585, -0.2358, -0.6844,  0.0512, -0.3142, -0.2065,
         -0.2740, -0.0160, -0.2677, -0.2430, -0.0750,  0.1336, 

In [78]:
##### Nesting Modules
class NestedBlocks(nn.Module):
  def __init__(self,*args):
    super().__init__()
    self.net = nn.Sequential(nn.Linear(20,20),nn.Linear(20,20))
    self.linear = nn.Linear(20,20)

  def forward(self, X):
    return self.linear(self.net(X))



In [79]:
tensor = torch.ones([2,20])
net = NestedBlocks()
net(X)

tensor([[-0.0162, -0.0089, -0.2248, -0.4150, -0.0856, -0.1649,  0.1004,  0.0882,
         -0.0928, -0.0898,  0.0828, -0.0410,  0.2575, -0.1062, -0.0780, -0.0113,
          0.2728,  0.0405, -0.0624, -0.1912],
        [ 0.0706, -0.0495, -0.1354, -0.3141, -0.1832, -0.1608, -0.0408,  0.1699,
         -0.0833, -0.1034,  0.0884, -0.1836,  0.1931,  0.0606,  0.1496,  0.0304,
          0.2806, -0.0571,  0.1744, -0.0839]], grad_fn=<AddmmBackward>)

Parameters access

In [80]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X=torch.rand((2,4))
net(X)

tensor([[-0.4814],
        [-0.4140]], grad_fn=<AddmmBackward>)

In [81]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.0595,  0.0090, -0.3076, -0.2955, -0.1566, -0.2365, -0.2095, -0.2976]])), ('bias', tensor([-0.3114]))])


In [82]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.3114], requires_grad=True)
tensor([-0.3114])


In [83]:
net[2].weight.grad == True

False

In [84]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [85]:
net.state_dict()['0.weight'].data

tensor([[ 0.0069,  0.0741, -0.2788, -0.3613],
        [-0.3495, -0.3864,  0.3356,  0.4863],
        [ 0.1468,  0.3448, -0.2169,  0.3896],
        [-0.3947, -0.3415, -0.4206,  0.2676],
        [-0.2347, -0.3795, -0.1329, -0.0062],
        [ 0.4792,  0.0993,  0.0911, -0.4554],
        [ 0.4072, -0.1371,  0.1791, -0.3076],
        [ 0.4700, -0.2410,  0.1867,  0.2960]])

In [86]:
def block1():
  return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                        nn.Linear(8, 4), nn.ReLU())

def block2():
  net = nn.Sequential()
  for i in range(4):
    net.add_module(f'block {i}', block1())
  return net

rgnet = nn.Sequential(block2(), nn.Linear(4,1))
rgnet(X)

tensor([[0.6265],
        [0.6265]], grad_fn=<AddmmBackward>)

In [87]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [88]:
"""
net_prl = MyParallel(nn.Linear(4,4),nn.Linear(4,4),nn.Linear(4,4))
X=torch.rand(1,4)
net_prl(X)
print(net_prl)
"""

'\nnet_prl = MyParallel(nn.Linear(4,4),nn.Linear(4,4),nn.Linear(4,4))\nX=torch.rand(1,4)\nnet_prl(X)\nprint(net_prl)\n'

In [89]:
rgnet[0][1][0].bias.data

tensor([ 0.3958,  0.3020,  0.4832, -0.3507, -0.4260, -0.4477, -0.0165, -0.3638])

Parameter Initialization

In [90]:
def init_normal(m):
  if type(m)==nn.Linear:
    nn.init.normal_(m.weight, mean=0, std=0.01)
    nn.init.zeros_(m.bias)

net.apply(init_normal)
net[0].weight.data , net[0].bias.data

(tensor([[-0.0027,  0.0014,  0.0028,  0.0141],
         [ 0.0047, -0.0120, -0.0162,  0.0010],
         [ 0.0093, -0.0016,  0.0021, -0.0106],
         [-0.0006,  0.0085,  0.0194,  0.0231],
         [ 0.0054, -0.0146,  0.0278, -0.0006],
         [-0.0056, -0.0019,  0.0048, -0.0099],
         [-0.0082, -0.0219, -0.0035,  0.0219],
         [-0.0071, -0.0150, -0.0040, -0.0086]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [91]:
def xavier(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        torch.nn.init.constant_(m.weight, 42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data)
print(net[2].weight.data)

tensor([[ 0.0229, -0.5890, -0.4278, -0.1478],
        [ 0.4840,  0.5249, -0.3299,  0.0500],
        [ 0.5816,  0.2276, -0.4317, -0.5499],
        [ 0.1319,  0.6717,  0.0510,  0.2633],
        [-0.3169, -0.4108,  0.5720,  0.4838],
        [ 0.4331,  0.2760,  0.5173, -0.5761],
        [ 0.2639, -0.5605, -0.3684, -0.3023],
        [ 0.2237,  0.5641, -0.3692,  0.2924]])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [92]:
def my_init(m):
    if type(m) == nn.Linear:
        print("Init", *[(name, param.shape)
                        for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

net.apply(my_init)
net[0].weight

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


Parameter containing:
tensor([[ 5.1403, -7.9823,  9.1056,  0.0000],
        [ 0.0000, -8.7479,  6.4611, -0.0000],
        [ 0.0000,  6.2188,  0.0000, -0.0000],
        [-0.0000,  0.0000, -0.0000, -6.9828],
        [ 0.0000, -9.6252, -6.7095,  8.6722],
        [-8.4292,  0.0000, -0.0000,  6.9706],
        [ 0.0000, -5.5829, -9.1312,  8.1601],
        [ 0.0000, -0.0000, -0.0000, -0.0000]], requires_grad=True)

In [93]:
### we can set parameters
net[0].weight.data[:] += 1
net[0].weight.data[0, 0] = 42
net[0].weight.data[0]

tensor([42.0000, -6.9823, 10.1056,  1.0000])

In [94]:
### shared layers 
### give a name
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.Linear(8, 1))
### has same value
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])


In [95]:
### actually same objects
net[2].weight.data[0, 0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])


In [96]:
### without parameters
class CenteredLayer(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self,X):
    return (X-X.mean())


In [97]:
layer = CenteredLayer()
layer(torch.FloatTensor([1,2,3,4,5,6]))

tensor([-2.5000, -1.5000, -0.5000,  0.5000,  1.5000,  2.5000])

In [98]:
net = nn.Sequential(nn.Linear(5,5), CenteredLayer())
Y = net(torch.rand(2,5))
Y.mean()

tensor(3.2783e-08, grad_fn=<MeanBackward0>)

In [99]:
### Linear from scratch
class MyLinear(nn.Module):
  def __init__(self, in_units, out_units):
    super().__init__()
    self.weight=nn.Parameter(torch.randn(in_units, out_units))
    self.bias = nn.Parameter(torch.randn(out_units))

  def forward(self,X):
    linear = torch.matmul(X, self.weight.data)+ self.bias.data
    return F.relu(linear)


In [100]:
dense = MyLinear(5,3)
dense(torch.randn(2,5))

tensor([[1.6766e-03, 0.0000e+00, 1.9397e+00],
        [0.0000e+00, 0.0000e+00, 3.6342e+00]])

In [101]:
net = nn.Sequential(MyLinear(10,5), MyLinear(5,3))
net(torch.randn(2,10))

tensor([[ 9.5071, 13.4427,  4.8041],
        [ 6.5471, 20.8079, 10.2730]])

In [102]:
#### saving results into  file 
x = torch.arange(5)
torch.save(x, 'x_file')

In [103]:
x2 = torch.load('x_file')
x2

tensor([0, 1, 2, 3, 4])

In [104]:
y = torch.zeros(4)
torch.save([x,y], 'x_file')

In [105]:
l,m = torch.load('x_file')
(l,m)

(tensor([0, 1, 2, 3, 4]), tensor([0., 0., 0., 0.]))

In [106]:
### read write from dict
myDict = {'x':x, 'y':y}
torch.save(myDict, 'mydict')
mydict2 = torch.load('mydict')
mydict2


{'x': tensor([0, 1, 2, 3, 4]), 'y': tensor([0., 0., 0., 0.])}

In [107]:
### save model parameters
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.output = nn.Linear(256, 10)

    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))

net = MLP()
X = torch.randn(size=(2, 20))
Y = net(X)

In [108]:
torch.save(net.state_dict(), 'mlp.params')

In [109]:
### clone model and load params to the clone
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))

<All keys matched successfully>

In [110]:
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [111]:
Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])