### Imports

In [1]:
import torch
import torch.nn as nn

### Последовательная модель

#### Первый способ

In [3]:
# объявим базовую модель
model = nn.Sequential(
            nn.Linear(784, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

In [4]:
# посмотрим на модель
model

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=10, bias=True)
)

In [5]:
# проверим модель
# создадим тензор 16х784. 16 имитирует размер батча
input = torch.rand([16, 784], dtype=torch.float32)

out = model(input)
out.shape    # (16,10)

torch.Size([16, 10])

#### Рассмотрим некоторые методы модели, которыми будем пользоваться в дальшейнем

In [7]:
# возвращает ссылку на состояние модели
# все данные в нем в виде словаря
# содержит все параметры модели по слоям
model.state_dict()

OrderedDict([('0.weight',
              tensor([[ 1.9227e-02, -1.7696e-02,  8.0754e-03,  ..., -1.7146e-02,
                       -2.1296e-05, -3.2841e-02],
                      [ 1.5663e-02, -8.0799e-03,  1.5257e-02,  ...,  1.1218e-03,
                       -3.2033e-02,  2.9945e-03],
                      [-5.8664e-03, -2.6101e-03,  2.9895e-02,  ..., -3.2090e-02,
                        2.6595e-03, -1.7147e-02],
                      ...,
                      [ 5.5547e-03, -2.8772e-02,  2.7064e-02,  ..., -2.3405e-02,
                       -3.4416e-02, -1.3128e-03],
                      [-2.0327e-02,  1.6203e-02, -2.6216e-02,  ...,  2.4770e-02,
                       -3.2999e-02,  3.9255e-03],
                      [ 2.6167e-02, -2.9423e-02, -9.0841e-03,  ...,  2.9651e-02,
                        2.8608e-02, -1.1144e-04]])),
             ('0.bias',
              tensor([ 0.0142, -0.0348,  0.0108, -0.0056, -0.0129, -0.0316,  0.0201, -0.0350,
                       0.0256,  0.0046, 

In [10]:
# т.к. .state_dict() - словарь, мы можем получать значения по ключу

# весовые коэффициенты для первого линейного слоя
model.state_dict()['0.weight']

tensor([[ 1.9227e-02, -1.7696e-02,  8.0754e-03,  ..., -1.7146e-02,
         -2.1296e-05, -3.2841e-02],
        [ 1.5663e-02, -8.0799e-03,  1.5257e-02,  ...,  1.1218e-03,
         -3.2033e-02,  2.9945e-03],
        [-5.8664e-03, -2.6101e-03,  2.9895e-02,  ..., -3.2090e-02,
          2.6595e-03, -1.7147e-02],
        ...,
        [ 5.5547e-03, -2.8772e-02,  2.7064e-02,  ..., -2.3405e-02,
         -3.4416e-02, -1.3128e-03],
        [-2.0327e-02,  1.6203e-02, -2.6216e-02,  ...,  2.4770e-02,
         -3.2999e-02,  3.9255e-03],
        [ 2.6167e-02, -2.9423e-02, -9.0841e-03,  ...,  2.9651e-02,
          2.8608e-02, -1.1144e-04]])

In [11]:
# весовые коэффициенты, относящиеся к байесу первого лин. слоя
model.state_dict()['0.bias']

tensor([ 0.0142, -0.0348,  0.0108, -0.0056, -0.0129, -0.0316,  0.0201, -0.0350,
         0.0256,  0.0046, -0.0133,  0.0133, -0.0009,  0.0262, -0.0015,  0.0059,
         0.0129,  0.0100, -0.0086,  0.0291, -0.0314,  0.0137,  0.0162, -0.0297,
         0.0163, -0.0122, -0.0213,  0.0346,  0.0299,  0.0023, -0.0246,  0.0353,
         0.0162,  0.0172,  0.0147, -0.0109,  0.0003,  0.0004, -0.0073,  0.0016,
         0.0278,  0.0300, -0.0263, -0.0098,  0.0114,  0.0168,  0.0205,  0.0164,
        -0.0077, -0.0261,  0.0263, -0.0352, -0.0089, -0.0123, -0.0175, -0.0262,
         0.0023, -0.0119, -0.0198,  0.0091,  0.0041,  0.0060, -0.0279,  0.0149,
        -0.0140,  0.0111, -0.0022,  0.0308,  0.0214, -0.0345,  0.0246, -0.0161,
        -0.0297, -0.0136,  0.0244, -0.0309, -0.0050, -0.0120,  0.0198, -0.0075,
         0.0091, -0.0335,  0.0047,  0.0205,  0.0024, -0.0217, -0.0332, -0.0303,
         0.0293, -0.0094, -0.0290,  0.0140, -0.0152, -0.0174, -0.0335, -0.0346,
        -0.0312, -0.0278,  0.0210, -0.01

In [12]:
# данный генератор возвращает все обуемые параметры модели

for parameter in model.parameters():
    print(parameter)
    print(parameter.shape, end='\n\n')

Parameter containing:
tensor([[ 1.9227e-02, -1.7696e-02,  8.0754e-03,  ..., -1.7146e-02,
         -2.1296e-05, -3.2841e-02],
        [ 1.5663e-02, -8.0799e-03,  1.5257e-02,  ...,  1.1218e-03,
         -3.2033e-02,  2.9945e-03],
        [-5.8664e-03, -2.6101e-03,  2.9895e-02,  ..., -3.2090e-02,
          2.6595e-03, -1.7147e-02],
        ...,
        [ 5.5547e-03, -2.8772e-02,  2.7064e-02,  ..., -2.3405e-02,
         -3.4416e-02, -1.3128e-03],
        [-2.0327e-02,  1.6203e-02, -2.6216e-02,  ...,  2.4770e-02,
         -3.2999e-02,  3.9255e-03],
        [ 2.6167e-02, -2.9423e-02, -9.0841e-03,  ...,  2.9651e-02,
          2.8608e-02, -1.1144e-04]], requires_grad=True)
torch.Size([128, 784])

Parameter containing:
tensor([ 0.0142, -0.0348,  0.0108, -0.0056, -0.0129, -0.0316,  0.0201, -0.0350,
         0.0256,  0.0046, -0.0133,  0.0133, -0.0009,  0.0262, -0.0015,  0.0059,
         0.0129,  0.0100, -0.0086,  0.0291, -0.0314,  0.0137,  0.0162, -0.0297,
         0.0163, -0.0122, -0.0213,  0.03

Если model.train() и model.eval() не использовать, то ошибки не будет, но если модель содержит такие слои, как dropout и batchnorm, то работа модели будет отличаться при тренировки и валидации.

Поэтому необходимо взять за правило всегда использовать данные методы при тренировке модели

In [None]:
# model.train() - используется перед началом обучения модели
# model.eval() - используется перед валидацией и тестированием

#### Второй способ

В данном слое слои добавляются последовательно и их наименования явные и добавляются в описание модели при ее вызове

In [14]:
model = nn.Sequential()
model.add_module('layer_1', nn.Linear(784, 128))
model.add_module('relu', nn.ReLU())
model.add_module('layer_2', nn.Linear(128, 10))

In [15]:
model

Sequential(
  (layer_1): Linear(in_features=784, out_features=128, bias=True)
  (relu): ReLU()
  (layer_2): Linear(in_features=128, out_features=10, bias=True)
)

In [19]:
print(model.layer_1)
print(model.relu)
print(model.layer_2)

Linear(in_features=784, out_features=128, bias=True)
ReLU()
Linear(in_features=128, out_features=10, bias=True)


In [20]:
# проверим модель
# создадим тензор 16х784. 16 имитирует размер батча
input = torch.rand([16, 784], dtype=torch.float32)

out = model(input)
out.shape    # (16,10)

torch.Size([16, 10])

In [21]:
model.state_dict()

OrderedDict([('layer_1.weight',
              tensor([[-0.0263,  0.0282, -0.0332,  ...,  0.0097,  0.0285,  0.0002],
                      [ 0.0023,  0.0016, -0.0149,  ..., -0.0279, -0.0161, -0.0231],
                      [ 0.0051, -0.0108,  0.0027,  ..., -0.0141,  0.0121, -0.0126],
                      ...,
                      [ 0.0262,  0.0256,  0.0121,  ...,  0.0259,  0.0067,  0.0304],
                      [-0.0023, -0.0006,  0.0086,  ..., -0.0352,  0.0232,  0.0250],
                      [ 0.0282,  0.0141,  0.0202,  ..., -0.0204, -0.0300, -0.0130]])),
             ('layer_1.bias',
              tensor([ 0.0351,  0.0253, -0.0237,  0.0280, -0.0147,  0.0351,  0.0292,  0.0066,
                      -0.0143, -0.0285,  0.0228,  0.0264, -0.0008, -0.0181,  0.0279, -0.0058,
                      -0.0306, -0.0068, -0.0196, -0.0230,  0.0117,  0.0053,  0.0024,  0.0024,
                      -0.0101,  0.0325, -0.0164, -0.0172, -0.0323, -0.0241, -0.0262,  0.0194,
                       0.025

### Создание класса для модели нейронной сети

#### Модель с одним входом и одним выходом

In [29]:
class MyModel(nn.Module):
    def __init__(self, input, output):
        super().__init__()
        self.layer_1 = nn.Linear(input, 128)
        self.layer_2 = nn.Linear(128, output)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.layer_1(x)
        x = self.act(x)
        out = self.layer_2(x)
        return out

model = MyModel(784, 10)

In [30]:
# Проверяем правильность построения модели
input = torch.rand([16, 784], dtype=torch.float32)

out = model(input)
out.shape    # (16, 10)

torch.Size([16, 10])

#### Модель с двумя входами и двумя выходами

In [35]:
class MyModel(nn.Module):
    def __init__(self, input, output):
        super().__init__()
        self.layer_1 = nn.Linear(input, 128)
        self.layer_2 = nn.Linear(128, output)
        self.act = nn.ReLU()

    def forward(self, x, y):
        x = self.layer_1(x)
        x = self.act(x+y)
        out = self.layer_2(x)
        return out, x

model = MyModel(784, 10)

In [36]:
# создадим тензоры для имитации входных данных
x = torch.rand([16, 784], dtype=torch.float32)
y = torch.rand([16, 128], dtype=torch.float32)
# передадим эти тензоры в модель
out = model(x,y)

In [39]:
print(len(out))
print(f'out_shape_1 = {out[0].shape}')
print(f'out_shape_2 = {out[1].shape}')

2
out_shape_1 = torch.Size([16, 10])
out_shape_2 = torch.Size([16, 128])


#### Модули ModuleList и ModuleDict

##### ModuleList

In [40]:
class MyModel(nn.Module):
    def __init__(self, input, output, hidden_size=2048):
        super().__init__()
        self.layers = nn.ModuleList()
        for i in range(10):
            self.layers.add_module(f'layer_{i}', nn.Linear(input, hidden_size))
            self.layers.add_module(f'act_{i}', nn.ReLU())
            input = hidden_size
            hidden_size = int(hidden_size / 2)
        self.layers.add_module(f'output_{i}', nn.Linear(input, output))

    def forward(self, x):
        output = []
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i != 0 and i%2 == 0 and i%4 != 0:
                output.append(x)
        output.append(x)
        return output

In [41]:
model = MyModel(784, 2)
model

MyModel(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=2048, bias=True)
    (1): ReLU()
    (2): Linear(in_features=2048, out_features=1024, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1024, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=256, bias=True)
    (7): ReLU()
    (8): Linear(in_features=256, out_features=128, bias=True)
    (9): ReLU()
    (10): Linear(in_features=128, out_features=64, bias=True)
    (11): ReLU()
    (12): Linear(in_features=64, out_features=32, bias=True)
    (13): ReLU()
    (14): Linear(in_features=32, out_features=16, bias=True)
    (15): ReLU()
    (16): Linear(in_features=16, out_features=8, bias=True)
    (17): ReLU()
    (18): Linear(in_features=8, out_features=4, bias=True)
    (19): ReLU()
    (20): Linear(in_features=4, out_features=2, bias=True)
  )
)

In [43]:
# Проверяем правильность построения модели
input = torch.rand([16, 784], dtype=torch.float32)

out = model(input)

In [47]:
# проверим выходы из модели
print(len(out))
print(f'out_shape_1 = {out[0].shape}')
print(f'out_shape_2 = {out[1].shape}')
print(f'out_shape_3 = {out[2].shape}')
print(f'out_shape_4 = {out[3].shape}')
print(f'out_shape_5 = {out[4].shape}')
print(f'out_shape_6 = {out[5].shape}')

6
out_shape_1 = torch.Size([16, 1024])
out_shape_2 = torch.Size([16, 256])
out_shape_3 = torch.Size([16, 64])
out_shape_4 = torch.Size([16, 16])
out_shape_5 = torch.Size([16, 4])
out_shape_6 = torch.Size([16, 2])


##### ModuleDict

На примере модели из предыдущего шага рассмотрим ModuleDict

In [50]:
class MyModel(nn.Module):
    def __init__(self, input, output, hidden_size=2048, choice='relu'):
        super().__init__()
        self.activations = nn.ModuleDict({
                                'lrelu' : nn.LeakyReLU(),
                                'relu': nn.ReLU()
                                })
        self.layers = nn.ModuleList()
        for i in range(10):
            self.layers.add_module(f'layer_{i}', nn.Linear(input, hidden_size))
            self.layers.add_module(f'act_{i}', self.activations[choice])
            input = hidden_size
            hidden_size = int(hidden_size / 2)
        self.layers.add_module(f'output_{i}', nn.Linear(input, output))

    def forward(self, x):
        output = []
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i != 0 and i%2 == 0 and i%4 != 0:
                output.append(x)
        output.append(x)
        return output

In [52]:
model = MyModel(784, 2, choice='lrelu')
model

MyModel(
  (activations): ModuleDict(
    (lrelu): LeakyReLU(negative_slope=0.01)
    (relu): ReLU()
  )
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=2048, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=2048, out_features=1024, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=1024, out_features=512, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=512, out_features=256, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Linear(in_features=256, out_features=128, bias=True)
    (9): LeakyReLU(negative_slope=0.01)
    (10): Linear(in_features=128, out_features=64, bias=True)
    (11): LeakyReLU(negative_slope=0.01)
    (12): Linear(in_features=64, out_features=32, bias=True)
    (13): LeakyReLU(negative_slope=0.01)
    (14): Linear(in_features=32, out_features=16, bias=True)
    (15): LeakyReLU(negative_slope=0.01)
    (16): Linear(in_features=16, out_features=8, b

### Создание модели для классификации MNIST

In [58]:
class MyModel(nn.Module):
    def __init__(self, input, output):
        super().__init__()
        self.layer_1 = nn.Linear(input, 128)
        self.layer_2 = nn.Linear(128, output)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.layer_1(x)
        x = self.act(x)
        out = self.layer_2(x)
        return out

In [59]:
model_classification = MyModel(784, 10)

In [60]:
# определяем функцию потерь и оптимизатор градиентного спуска
loss_classification = nn.CrossEntropyLoss()
opt_classification = torch.optim.Adam(model_classification.parameters(), lr=0.001)

In [61]:
# Проверяем правильность построения модели
input = torch.rand([16, 784], dtype=torch.float32)

out = model_classification(input)
out.shape    # (16,10)

torch.Size([16, 10])

### Модель для задачи Регрессии

In [63]:
model_regression = MyModel(64*64, 2)

In [64]:
loss_regression = nn.MSELoss()
opt_regression = torch.optim.Adam(model_regression.parameters(), lr=0.001)

In [65]:
# проверим модель
input = torch.rand([16, 64*64], dtype=torch.float32)

out = model_regression(input)
out.shape    # (16, 2)

torch.Size([16, 2])