In [2]:
import time
import torch
from torch import nn,optim
import torch.nn.functional as F
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
def conv_block(in_channels,out_channels):
    blk = nn.Sequential(nn.BatchNorm2d(in_channels),
                       nn.ReLU(),
                       nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
    return blk

In [5]:
class DenseBlock(nn.Module):
    def __init__(self,num_convs,in_channels,out_channels):
        super(DenseBlock,self).__init__()
        net = []
        for i in range(num_convs):
            in_c = in_channels + i*out_channels
            net.append(conv_block(in_c,out_channels))
        self.net = nn.ModuleList(net)
        self.out_channels = in_channels + num_convs * out_channels
    def forward(self,X):
        for blk in self.net:
            Y = blk(X)
            X = torch.cat((X,Y),dim=1)
        return X

In [6]:
blk = DenseBlock(2,3,10)
X = torch.rand(4,3,8,8)
Y = blk(X)
Y.shape

torch.Size([4, 23, 8, 8])

In [7]:
def transition_block(in_channels,out_channels):
    blk = nn.Sequential(nn.BatchNorm2d(in_channels),
                       nn.ReLU(),
                       nn.Conv2d(in_channels,out_channels,kernel_size=1),
                       nn.AvgPool2d(kernel_size=2,stride=2))
    return blk

In [8]:
blk = transition_block(23,10)
blk(Y).shape

torch.Size([4, 10, 4, 4])

In [9]:
net = nn.Sequential(nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),
                   nn.BatchNorm2d(64),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3,stride=2,padding=1))

In [10]:
num_channels,growth_rate = 64,32
num_convs_in_dense_blocks = [4,4,4,4]
for i,num_convs in enumerate(num_convs_in_dense_blocks):
    DB = DenseBlock(num_convs,num_channels,growth_rate)
    net.add_module("DenseBlock_%d" % i,DB)
    num_channels = DB.out_channels
    if i != len(num_convs_in_dense_blocks) -1:
        net.add_module("trainsition_block_%d" % i,
                      transition_block(num_channels,num_channels))
        num_channels = num_channels

In [11]:
X = torch.rand((1,1,96,96))
for name,layer in net.named_children():
    X = layer(X)
    print(name,'output shape:',X.shape)

0 output shape: torch.Size([1, 64, 48, 48])
1 output shape: torch.Size([1, 64, 48, 48])
2 output shape: torch.Size([1, 64, 48, 48])
3 output shape: torch.Size([1, 64, 24, 24])
DenseBlock_0 output shape: torch.Size([1, 192, 24, 24])
trainsition_block_0 output shape: torch.Size([1, 192, 12, 12])
DenseBlock_1 output shape: torch.Size([1, 320, 12, 12])
trainsition_block_1 output shape: torch.Size([1, 320, 6, 6])
DenseBlock_2 output shape: torch.Size([1, 448, 6, 6])
trainsition_block_2 output shape: torch.Size([1, 448, 3, 3])
DenseBlock_3 output shape: torch.Size([1, 576, 3, 3])


In [17]:
#能运行，但是电脑GPU不够
batch_size = 32
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size,resize=24)
lr,num_epochs = 0.001,5
optimizer = torch.optim.Adam(net.parameters(),lr=lr)
d2l.train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cuda


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 2.00 GiB total capacity; 1.32 GiB already allocated; 15.55 MiB free; 1.32 GiB reserved in total by PyTorch)