<a href="https://colab.research.google.com/github/Dmitri9149/TensorFlow-PyTorch-basics/blob/master/PyTorch_Blocks_and_Layers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [111]:
import torch
from torch import nn
from torch.nn import functional as F

Some part of the code is based on d2l.ai book: http://d2l.ai/

In [112]:
### basing on the d2l.ai book 

In [113]:
net = nn.Sequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))

X=torch.rand(2,20)
net(X)

tensor([[ 0.1106, -0.2005, -0.2262, -0.0749,  0.0932, -0.0685, -0.2020,  0.1098,
         -0.0752,  0.0607],
        [ 0.2265, -0.1281, -0.2485, -0.0545,  0.2051,  0.0194, -0.1619,  0.2062,
         -0.1254,  0.0996]], grad_fn=<AddmmBackward>)

In [114]:
net.forward(X)

tensor([[ 0.1106, -0.2005, -0.2262, -0.0749,  0.0932, -0.0685, -0.2020,  0.1098,
         -0.0752,  0.0607],
        [ 0.2265, -0.1281, -0.2485, -0.0545,  0.2051,  0.0194, -0.1619,  0.2062,
         -0.1254,  0.0996]], grad_fn=<AddmmBackward>)

In [115]:
class MLP(nn.Module):

    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)  # Hidden layer
        self.out = nn.Linear(256, 10)  # Output layer

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))


In [116]:
net = MLP()
net(X)

tensor([[ 0.1888,  0.0744,  0.0015, -0.3400, -0.1124,  0.3220, -0.1478, -0.0683,
         -0.4799, -0.1417],
        [ 0.3230, -0.0487, -0.1456, -0.2915,  0.0356,  0.2518, -0.1758, -0.1725,
         -0.3961, -0.0694]], grad_fn=<AddmmBackward>)

In [117]:
### Custom Sequential Block

In [118]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # Here, `block` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[block] = block

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._modules.values():
            X = block(X)
        return X



In [119]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.1074, -0.0912, -0.0485, -0.1502, -0.0150, -0.0216, -0.0244,  0.0261,
          0.1816,  0.0262],
        [ 0.0684, -0.0322, -0.0415, -0.3788,  0.0702,  0.0496, -0.0231, -0.0662,
          0.2251,  0.0856]], grad_fn=<AddmmBackward>)

MyParallel model: several models use the same input, executed in paralle and the results of the models are concatenated in one output.

In [120]:
class MyParallel(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # Here, `block` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[block] = block

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        list_res=[]
        for block in self._modules.values():
          list_res.append(block(X))
        concat_fin=torch.cat(list_res, dim = -1)
        return concat_fin



In [121]:
list_models = [nn.Linear(20,20),nn.Linear(20,20),nn.Linear(20,20)]

In [122]:
net = MyParallel(nn.Linear(20,20),nn.Linear(20,20),nn.Linear(20,20))
#X = torch.tensor([1,10])
net(X)

tensor([[ 0.1923,  0.2258,  0.1898,  0.2642,  0.2004, -0.0909, -0.2042, -0.1412,
          0.1317, -0.2715,  0.2821,  0.4190,  0.6813, -0.4344,  0.0928, -0.3547,
          0.0122,  0.0991,  0.1898,  0.1494, -0.3297,  0.2229, -0.5338,  0.3309,
         -0.2210, -0.0264, -0.2882,  0.1691, -0.3489, -0.2488,  0.4319,  0.1940,
         -0.1779, -0.2231,  0.0658, -0.0994, -0.6673, -0.2304, -0.4316, -0.1863,
         -0.1186,  0.3782, -0.0417, -0.1541, -0.1510, -0.3058, -0.4940,  0.2463,
         -0.3656, -0.2574, -0.0481,  0.1883, -0.3006, -0.4774, -0.5792,  0.0095,
          0.4843, -0.4632, -0.0968,  0.0272],
        [ 0.4254,  0.3969,  0.3384,  0.3011, -0.3866, -0.0576,  0.4332, -0.2993,
          0.1407, -0.0052,  0.0026,  0.3481,  0.6104,  0.2126,  0.4411, -0.1735,
         -0.2814,  0.2015,  0.2099, -0.3457, -0.3032,  0.3520, -0.2476,  0.3115,
         -0.0617, -0.3468, -0.4630,  0.3803, -0.1313, -0.2815,  0.6259,  0.3856,
          0.5596, -0.2938, -0.4867, -0.0782, -0.4699, -0.3038, 

In [123]:
net = MyParallel(*list_models)
net(X)

tensor([[-0.1204, -0.2920, -0.0623, -0.4878, -0.3302,  0.3158, -0.0724, -0.1158,
         -0.2227,  0.0530, -0.2092, -0.1539, -0.2671,  0.1320,  0.3627, -0.1159,
         -0.0620, -0.0320,  0.1773, -0.3923, -0.4581, -0.1511,  0.2418,  0.2011,
          0.3295,  0.0913, -0.4311, -0.0856,  0.3938,  0.0336,  0.0906, -0.1785,
          0.0094, -0.2646,  0.2886,  0.1088,  0.2273,  0.1946, -0.1451,  0.6433,
         -0.1227,  0.1299, -0.1769, -0.3649,  0.3594, -0.3428,  0.1507, -0.2257,
          0.0494, -0.0955,  0.0323,  0.0241,  0.3599,  0.2588,  0.4581,  0.0395,
          0.1803, -0.2136,  0.0401,  0.1895],
        [-0.3141, -0.0586, -0.0623, -0.4458, -0.5648,  0.1986, -0.1087,  0.1572,
         -0.2329, -0.2689, -0.2439, -0.1724, -0.4542, -0.2692,  0.7372, -0.2114,
          0.1729, -0.0716,  0.4312, -0.4330, -0.3447, -0.2541,  0.2147,  0.1597,
         -0.0640,  0.1332, -0.1593, -0.1152,  0.4951, -0.1745, -0.1297,  0.3757,
          0.0518, -0.4908,  0.0824,  0.1834,  0.1530,  0.5988, 

In [124]:
##### Nesting Modules
class NestedBlocks(nn.Module):
  def __init__(self,*args):
    super().__init__()
    self.net = nn.Sequential(nn.Linear(20,20),nn.Linear(20,20))
    self.linear = nn.Linear(20,20)

  def forward(self, X):
    return self.linear(self.net(X))



In [125]:
tensor = torch.ones([2,20])
net = NestedBlocks()
net(X)

tensor([[ 0.0610, -0.2520,  0.1329,  0.0371,  0.3479, -0.1735,  0.0097, -0.2523,
         -0.1039,  0.0919, -0.1871, -0.0748,  0.1762,  0.2347, -0.0625, -0.0698,
         -0.0890, -0.0582,  0.2888, -0.0354],
        [-0.0218, -0.3319,  0.1095,  0.0659,  0.3214, -0.1340, -0.0255, -0.1419,
         -0.1811, -0.0065, -0.1440, -0.0008,  0.1270,  0.1667,  0.0367,  0.0794,
         -0.0456,  0.0038,  0.1505, -0.0351]], grad_fn=<AddmmBackward>)

Parameters access

In [126]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X=torch.rand((2,4))
net(X)

tensor([[0.3906],
        [0.3706]], grad_fn=<AddmmBackward>)

In [127]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.2218,  0.2367, -0.0600,  0.2221,  0.0228,  0.3415,  0.2662,  0.0893]])), ('bias', tensor([0.2046]))])


In [128]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.2046], requires_grad=True)
tensor([0.2046])


In [129]:
net[2].weight.grad == True

False

In [130]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [131]:
net.state_dict()['0.weight'].data

tensor([[ 0.3187, -0.3985, -0.2992, -0.0261],
        [ 0.2790,  0.2470, -0.0362, -0.3362],
        [ 0.2414,  0.0762, -0.4562, -0.3007],
        [-0.2526, -0.3404,  0.4320,  0.2884],
        [-0.0526,  0.3814,  0.1113, -0.2992],
        [-0.3442, -0.3710,  0.1772, -0.3697],
        [ 0.2905,  0.4822,  0.1832, -0.3166],
        [-0.0612, -0.3098, -0.1377, -0.2571]])

In [132]:
def block1():
  return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                        nn.Linear(8, 4), nn.ReLU())

def block2():
  net = nn.Sequential()
  for i in range(4):
    net.add_module(f'block {i}', block1())
  return net

rgnet = nn.Sequential(block2(), nn.Linear(4,1))
rgnet(X)

tensor([[-0.3675],
        [-0.3675]], grad_fn=<AddmmBackward>)

In [133]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [134]:
"""
net_prl = MyParallel(nn.Linear(4,4),nn.Linear(4,4),nn.Linear(4,4))
X=torch.rand(1,4)
net_prl(X)
print(net_prl)
"""

'\nnet_prl = MyParallel(nn.Linear(4,4),nn.Linear(4,4),nn.Linear(4,4))\nX=torch.rand(1,4)\nnet_prl(X)\nprint(net_prl)\n'

In [135]:
rgnet[0][1][0].bias.data

tensor([ 0.3724,  0.2752,  0.1944, -0.1234, -0.1653, -0.0567, -0.1002,  0.4584])

Parameter Initialization

In [136]:
def init_normal(m):
  if type(m)==nn.Linear:
    nn.init.normal_(m.weight, mean=0, std=0.01)
    nn.init.zeros_(m.bias)

net.apply(init_normal)
net[0].weight.data , net[0].bias.data

(tensor([[ 0.0157,  0.0146,  0.0066,  0.0244],
         [-0.0174,  0.0065, -0.0129, -0.0065],
         [-0.0080,  0.0068,  0.0051, -0.0046],
         [ 0.0124,  0.0016,  0.0095,  0.0040],
         [ 0.0073,  0.0034,  0.0104, -0.0138],
         [ 0.0168, -0.0027, -0.0075, -0.0061],
         [ 0.0042, -0.0087,  0.0013, -0.0102],
         [-0.0068,  0.0079, -0.0198, -0.0149]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [137]:
def xavier(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        torch.nn.init.constant_(m.weight, 42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data)
print(net[2].weight.data)

tensor([[ 0.5803,  0.4351, -0.6788, -0.3046],
        [ 0.4326, -0.4754,  0.0225,  0.6407],
        [ 0.1709,  0.1537, -0.1345,  0.0276],
        [ 0.6532,  0.6273,  0.0218,  0.6854],
        [-0.5859,  0.6882, -0.0395, -0.6106],
        [-0.6374, -0.3182,  0.3199,  0.5407],
        [-0.4909,  0.1160, -0.3721,  0.4885],
        [-0.0929,  0.6909, -0.4167, -0.4498]])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [138]:
def my_init(m):
    if type(m) == nn.Linear:
        print("Init", *[(name, param.shape)
                        for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

net.apply(my_init)
net[0].weight

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000,  9.5537],
        [ 0.0000, -8.2329,  9.2167,  6.4254],
        [ 0.0000, -8.9256, -0.0000, -5.2472],
        [-0.0000,  5.0122, -0.0000,  7.2561],
        [-5.9313, -0.0000,  0.0000,  0.0000],
        [-0.0000,  9.9074,  0.0000,  9.9065],
        [ 0.0000,  7.3966,  7.3591, -0.0000],
        [ 7.0495,  6.8960,  0.0000, -6.8345]], requires_grad=True)

In [139]:
### we can set parameters
net[0].weight.data[:] += 1
net[0].weight.data[0, 0] = 42
net[0].weight.data[0]

tensor([42.0000,  1.0000,  1.0000, 10.5537])

In [140]:
### shared layers 
### give a name
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.Linear(8, 1))
### has same value
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])


In [141]:
### actually same objects
net[2].weight.data[0, 0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])


In [142]:
### without parameters
class CenteredLayer(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self,X):
    return (X-X.mean())


In [143]:
layer = CenteredLayer()
layer(torch.FloatTensor([1,2,3,4,5,6]))

tensor([-2.5000, -1.5000, -0.5000,  0.5000,  1.5000,  2.5000])

In [144]:
net = nn.Sequential(nn.Linear(5,5), CenteredLayer())
Y = net(torch.rand(2,5))
Y.mean()

tensor(1.1921e-08, grad_fn=<MeanBackward0>)

In [145]:
### Linear from scratch
class MyLinear(nn.Module):
  def __init__(self, in_units, out_units):
    super().__init__()
    self.weight=nn.Parameter(torch.randn(in_units, out_units))
    self.bias = nn.Parameter(torch.randn(out_units))

  def forward(self,X):
    linear = torch.matmul(X, self.weight.data)+ self.bias.data
    return F.relu(linear)


In [146]:
dense = MyLinear(5,3)
dense(torch.randn(2,5))

tensor([[1.8346, 0.1467, 3.9852],
        [5.8476, 0.0000, 3.2564]])

In [147]:
net = nn.Sequential(MyLinear(10,5), MyLinear(5,3))
net(torch.randn(2,10))

tensor([[0.0000, 1.6558, 0.0000],
        [0.0000, 0.0000, 1.1756]])

In [148]:
#### saving results into  file 
x = torch.arange(5)
torch.save(x, 'x_file')

In [149]:
x2 = torch.load('x_file')
x2

tensor([0, 1, 2, 3, 4])

In [150]:
y = torch.zeros(4)
torch.save([x,y], 'x_file')

In [151]:
l,m = torch.load('x_file')
(l,m)

(tensor([0, 1, 2, 3, 4]), tensor([0., 0., 0., 0.]))

In [152]:
### read write from dict
myDict = {'x':x, 'y':y}
torch.save(myDict, 'mydict')
mydict2 = torch.load('mydict')
mydict2


{'x': tensor([0, 1, 2, 3, 4]), 'y': tensor([0., 0., 0., 0.])}

In [153]:
### save model parameters
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.output = nn.Linear(256, 10)

    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))

net = MLP()
X = torch.randn(size=(2, 20))
Y = net(X)

In [154]:
torch.save(net.state_dict(), 'mlp.params')

In [155]:
### clone model and load params to the clone
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))

<All keys matched successfully>

In [156]:
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [157]:
Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])

In [158]:
!nvidia-smi

Wed Dec  2 15:33:44 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.38       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   48C    P0    60W / 149W |    395MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [159]:
torch.device('cpu'), torch.cuda.device('cuda'), torch.cuda.device('cuda:1')

(device(type='cpu'),
 <torch.cuda.device at 0x7fb50cdc15c0>,
 <torch.cuda.device at 0x7fb50cdc1390>)

In [160]:
torch.cuda.device_count()

1

In [161]:
def try_gpu(i=0):
  if torch.cuda.device_count() >= i+1:
    return torch.device(f'cuda:{i}')
  return torch.device('cpu')

def try_all_gpus():
  devices = [torch.device(f'cuda:{i}') 
            for i  in range (torch.cuda.device_count())]
  return devices if devices else [torch.device('cpu')]

try_gpu(), try_gpu(2), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

In [162]:
x= torch.tensor([1,2,3])
x.device

device(type='cpu')

In [163]:
X = torch.ones(2,3, device = try_gpu())
X

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [164]:
Y = torch.rand(2, 3, device=try_gpu(0))
Y

tensor([[0.9233, 0.4022, 0.5094],
        [0.6596, 0.2509, 0.3599]], device='cuda:0')

In [165]:
Z = X.cuda(0)
print(X)
print(Z)

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')
tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')


In [166]:
Y + Z

tensor([[1.9233, 1.4022, 1.5094],
        [1.6596, 1.2509, 1.3599]], device='cuda:0')