In [5]:
from typing import List

In [6]:
import torch as t
import torch.nn.functional as F
from tqdm import tqdm
from models.Autoencoder import Autoencoder
from torchvision import datasets, transforms

In [7]:
bs = 512
train_ds = datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True)
train_dl = t.utils.data.DataLoader(dataset=train_ds, batch_size=bs, shuffle=True, drop_last=True)

In [13]:
train_ds

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data/
    Split: Train
    StandardTransform
Transform: ToTensor()

In [20]:
train_ds[0][0].shapeb

torch.Size([1, 28, 28])

In [21]:
train_ds[0][0][None].shape

torch.Size([1, 1, 28, 28])

In [8]:
device = t.device('cuda') if t.cuda.is_available() else 'cpu'
model = Autoencoder(train_ds[0][0][None], in_c=1, enc_out_c=[32, 64, 64, 64],
                    enc_ks=[3, 3, 3, 3], enc_pads=[1, 1, 0, 1], enc_strides=[1, 2, 2, 1],
                    dec_out_c=[64, 64, 32, 1], dec_ks=[3, 3, 3, 3], dec_strides=[1, 2, 2, 1],
                    dec_pads=[1, 0, 1, 1], dec_op_pads=[0, 1, 1, 0], z_dim=2)
model.cuda(device)


Autoencoder(
  (enc_conv_layers): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): LeakyReLU(negative_slope=0.01)
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout(p=0.25, inplace=False)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): LeakyReLU(negative_slope=0.01)
      (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout(p=0.25, inplace=False)
    )
    (2): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2))
      (1): LeakyReLU(negative_slope=0.01)
      (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout(p=0.25, inplace=False)
    )
    (3): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): LeakyReLU(negative_slope=

In [12]:
from torchsummary import summary
summary(model=model, input_size=(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
         LeakyReLU-2           [-1, 32, 28, 28]               0
       BatchNorm2d-3           [-1, 32, 28, 28]              64
           Dropout-4           [-1, 32, 28, 28]               0
            Conv2d-5           [-1, 64, 14, 14]          18,496
         LeakyReLU-6           [-1, 64, 14, 14]               0
       BatchNorm2d-7           [-1, 64, 14, 14]             128
           Dropout-8           [-1, 64, 14, 14]               0
            Conv2d-9             [-1, 64, 6, 6]          36,928
        LeakyReLU-10             [-1, 64, 6, 6]               0
      BatchNorm2d-11             [-1, 64, 6, 6]             128
          Dropout-12             [-1, 64, 6, 6]               0
           Conv2d-13             [-1, 64, 6, 6]          36,928
        LeakyReLU-14             [-1, 6

In [9]:
optimizer = t.optim.Adam(model.parameters(), lr=5e-4, betas=(.9, .99), weight_decay=1e-2)
model.train()

for epoch in tqdm(range(20)):
    if epoch == 10:
        optimizer = t.optim.Adam(model.parameters(), lr=2e-4, betas=(.9, .99), weight_decay=1e-2)
    for i, (data, _) in enumerate(train_dl):
        data = data.to(device)
        optimizer.zero_grad()
        pred = model(data)
        loss = F.mse_loss(pred, data)
        loss.backward()
        optimizer.step()
        if i % 33 == 0:
            print(loss)


  0%|                                                                                           | 0/20 [00:00<?, ?it/s]

tensor(0.4010, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.1185, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0887, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0781, device='cuda:0', grad_fn=<MseLossBackward>)


  5%|████▏                                                                              | 1/20 [00:14<04:26, 14.00s/it]

tensor(0.0761, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0660, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0601, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0572, device='cuda:0', grad_fn=<MseLossBackward>)


 10%|████████▎                                                                          | 2/20 [00:28<04:12, 14.03s/it]

tensor(0.0579, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0556, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0547, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0548, device='cuda:0', grad_fn=<MseLossBackward>)


 15%|████████████▍                                                                      | 3/20 [00:39<03:40, 12.97s/it]

tensor(0.0542, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0518, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0538, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0509, device='cuda:0', grad_fn=<MseLossBackward>)


 20%|████████████████▌                                                                  | 4/20 [00:51<03:19, 12.49s/it]

tensor(0.0529, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0512, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0498, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0513, device='cuda:0', grad_fn=<MseLossBackward>)


 25%|████████████████████▊                                                              | 5/20 [01:03<03:03, 12.25s/it]

tensor(0.0488, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0504, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0514, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0491, device='cuda:0', grad_fn=<MseLossBackward>)


 30%|████████████████████████▉                                                          | 6/20 [01:15<02:49, 12.08s/it]

tensor(0.0488, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0518, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0497, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0507, device='cuda:0', grad_fn=<MseLossBackward>)


 35%|█████████████████████████████                                                      | 7/20 [01:26<02:35, 12.00s/it]

tensor(0.0479, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0496, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0502, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0486, device='cuda:0', grad_fn=<MseLossBackward>)


 40%|█████████████████████████████████▏                                                 | 8/20 [01:40<02:28, 12.40s/it]

tensor(0.0472, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0475, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0487, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0478, device='cuda:0', grad_fn=<MseLossBackward>)


 45%|█████████████████████████████████████▎                                             | 9/20 [01:52<02:14, 12.26s/it]

tensor(0.0482, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0484, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0484, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0489, device='cuda:0', grad_fn=<MseLossBackward>)


 50%|█████████████████████████████████████████                                         | 10/20 [02:04<02:01, 12.17s/it]

tensor(0.0483, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0472, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0473, device='cuda:0', grad_fn=<MseLossBackward>)


 55%|█████████████████████████████████████████████                                     | 11/20 [02:15<01:48, 12.09s/it]

tensor(0.0473, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0471, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward>)


 60%|█████████████████████████████████████████████████▏                                | 12/20 [02:29<01:38, 12.37s/it]

tensor(0.0479, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0461, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0453, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0461, device='cuda:0', grad_fn=<MseLossBackward>)


 65%|█████████████████████████████████████████████████████▎                            | 13/20 [02:43<01:31, 13.05s/it]

tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0462, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0455, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0453, device='cuda:0', grad_fn=<MseLossBackward>)


 70%|█████████████████████████████████████████████████████████▍                        | 14/20 [02:56<01:17, 12.98s/it]

tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0458, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0472, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward>)


 75%|█████████████████████████████████████████████████████████████▌                    | 15/20 [03:08<01:03, 12.64s/it]

tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0476, device='cuda:0', grad_fn=<MseLossBackward>)


 80%|█████████████████████████████████████████████████████████████████▌                | 16/20 [03:22<00:51, 12.99s/it]

tensor(0.0467, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0455, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0474, device='cuda:0', grad_fn=<MseLossBackward>)


 85%|█████████████████████████████████████████████████████████████████████▋            | 17/20 [03:34<00:38, 12.71s/it]

tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0460, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward>)


 90%|█████████████████████████████████████████████████████████████████████████▊        | 18/20 [03:46<00:25, 12.55s/it]

tensor(0.0448, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0474, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0477, device='cuda:0', grad_fn=<MseLossBackward>)


 95%|█████████████████████████████████████████████████████████████████████████████▉    | 19/20 [03:58<00:12, 12.47s/it]

tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [04:10<00:00, 12.55s/it]


In [10]:
print(loss)
t.save(model.state_dict(), 'models/state_dicts/03_01.pth')

tensor(0.0472, device='cuda:0', grad_fn=<MseLossBackward>)


## def f(*arg, **args) 以及 f(*arg)

In [36]:
enc_out_c=[1, 32, 64, 64, 64]; enc_ks=[3,3,3,3]; enc_strides=[1,2,2,1]; enc_pads=[1,1,0,1]

In [40]:
list(zip(enc_out_c[0:], enc_out_c[1:], enc_ks, enc_strides, enc_pads))

[(1, 32, 3, 1, 1), (32, 64, 3, 2, 1), (64, 64, 3, 2, 0), (64, 64, 3, 1, 1)]

In [41]:
import torch.nn as nn

In [42]:
for in_c_, out_c, ks, stride, pad in zip(enc_out_c[0:], enc_out_c[1:], enc_ks, enc_strides, enc_pads):
    enc_conv_layer = []
    enc_conv_layer.append(nn.Conv2d(in_c_, out_c, ks, stride, padding=pad))
    print('a', enc_conv_layer)
    enc_conv_layer.extend([nn.LeakyReLU(), nn.BatchNorm2d(out_c), nn.Dropout(.25)])
    print('e', enc_conv_layer)
#     enc_conv_layers.append(nn.Sequential(*enc_conv_layer))


a [Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))]
e [Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), LeakyReLU(negative_slope=0.01), BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), Dropout(p=0.25, inplace=False)]
a [Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))]
e [Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)), LeakyReLU(negative_slope=0.01), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), Dropout(p=0.25, inplace=False)]
a [Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2))]
e [Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2)), LeakyReLU(negative_slope=0.01), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), Dropout(p=0.25, inplace=False)]
a [Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))]
e [Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), LeakyReLU(negative_slope=0.01), BatchNo

In [254]:
def f(*args):
    print(args)
    print('len =', len(args))
#     print(args[0])
#     print(isinstance(args[0], OrderedDict))
#     print(type(args))
    for k, v in enumerate(args):
        print(k, v)

In [255]:
from collections import OrderedDict
A = OrderedDict([           #orderdict按照建造时候的顺序进行存储
          ('conv1', nn.Conv2d(1,20,5)),
          ('relu1', nn.ReLU()),
          ('conv2', nn.Conv2d(20,64,5)),
          ('relu2', nn.ReLU())
    ])


In [256]:
A

OrderedDict([('conv1', Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))),
             ('relu1', ReLU()),
             ('conv2', Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))),
             ('relu2', ReLU())])

In [257]:
len(A)

4

In [258]:
f(A)  # 形参中(*OrderedDict)将OrderedDict组合成一个元组

(OrderedDict([('conv1', Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))), ('relu1', ReLU()), ('conv2', Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))), ('relu2', ReLU())]),)
len = 1
0 OrderedDict([('conv1', Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))), ('relu1', ReLU()), ('conv2', Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))), ('relu2', ReLU())])


In [259]:
B = {'a':1, 'b':2, 'c':3, 'd':4}

In [260]:
len(B)

4

In [261]:
f(B)

({'a': 1, 'b': 2, 'c': 3, 'd': 4},)
len = 1
0 {'a': 1, 'b': 2, 'c': 3, 'd': 4}


In [263]:
B

{'a': 1, 'b': 2, 'c': 3, 'd': 4}

In [283]:
C = ['a', 'b', 'c', 'd']  # C = ('a', 'b', 'c', 'd')

In [284]:
C

['a', 'b', 'c', 'd']

In [285]:
len(C)

4

In [281]:
f(C)

(('a', 'b', 'c', 'd'),)
len = 1
0 ('a', 'b', 'c', 'd')


In [286]:
f(*C)  # 实参*C将列表C解包,然后f函数*arg形参又将解包元素打包成元组

('a', 'b', 'c', 'd')
len = 4
0 a
1 b
2 c
3 d


In [287]:
enc_conv_layer

[Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 LeakyReLU(negative_slope=0.01),
 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 Dropout(p=0.25, inplace=False)]

In [288]:
len(enc_conv_layer)

4

In [289]:
print(*enc_conv_layer)

Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) LeakyReLU(negative_slope=0.01) BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) Dropout(p=0.25, inplace=False)


In [290]:
f(*enc_conv_layer)

(Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), LeakyReLU(negative_slope=0.01), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), Dropout(p=0.25, inplace=False))
len = 4
0 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1 LeakyReLU(negative_slope=0.01)
2 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
3 Dropout(p=0.25, inplace=False)
