参考连接：https://blog.csdn.net/qq_38253797/article/details/117078603

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader #只能这么调用
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Prepare Data

In [12]:
batch_size = 64
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
train_data = datasets.CIFAR10(root = '../dataset/cifar',download=True,train=True,transform=transform)
train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)
test_data = datasets.CIFAR10(root='../dataset/cifar',download=True,train=False,transform=transform)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [13]:
data=iter(train_loader)
images,targets=data.next()
print(images.size())

torch.Size([64, 3, 32, 32])


# Design Model

<img src="https://markdown-yqguo.oss-cn-beijing.aliyuncs.com/markdown-yqguo/20210523171246219.png" alt="在这里插入图片描述" style="zoom:33%;" />

In [14]:
class _DenseLayer(nn.Module): #加了Bottleneck和compression（DenseNet-BC）
    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate=0):
        super(_DenseLayer, self).__init__()
        self.drop_rate = drop_rate
        self.dense_layer = nn.Sequential(
            nn.BatchNorm2d(num_input_features),
            nn.ReLU(inplace=True),
            # Bottleneck
            nn.Conv2d(in_channels=num_input_features, out_channels=bn_size * growth_rate, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(bn_size * growth_rate),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=bn_size * growth_rate, out_channels=growth_rate, kernel_size=3, stride=1, padding=1, bias=False)
        )
        self.dropout = nn.Dropout(p=self.drop_rate)

    def forward(self, x):
        y = self.dense_layer(x)
        if self.drop_rate > 0:
            y = self.dropout(y)

        return torch.cat([x, y], dim=1)


class _DenseBlock(nn.Module):
    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate=0):
        super(_DenseBlock, self).__init__()
        layers = []
        for i in range(num_layers):
            layers.append(_DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)


class _TransitionLayer(nn.Module):
    def __init__(self, num_input_features, num_output_features):
        super(_TransitionLayer, self).__init__()
        self.transition_layer = nn.Sequential(
            nn.BatchNorm2d(num_input_features),
            nn.ReLU(inplace=True),
            # compression
            nn.Conv2d(in_channels=num_input_features, out_channels=num_output_features, kernel_size=1, stride=1, padding=0, bias=False),
            nn.AvgPool2d(kernel_size=2, stride=2)
        )

    def forward(self, x):
        return self.transition_layer(x)


class DenseNet(nn.Module):
    def __init__(self, num_init_features=64, growth_rate=32, blocks=(6, 12, 24, 16), bn_size=4, drop_rate=0, num_classes=1000):
        super(DenseNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=num_init_features, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(num_init_features),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        num_features = num_init_features
        self.layer1 = _DenseBlock(num_layers=blocks[0], num_input_features=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate)
        num_features = num_features + blocks[0] * growth_rate
        self.transtion1 = _TransitionLayer(num_input_features=num_features, num_output_features=num_features // 2)

        num_features = num_features // 2
        self.layer2 = _DenseBlock(num_layers=blocks[1], num_input_features=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate)
        num_features = num_features + blocks[1] * growth_rate
        self.transtion2 = _TransitionLayer(num_input_features=num_features, num_output_features=num_features // 2)

        num_features = num_features // 2
        self.layer3 = _DenseBlock(num_layers=blocks[2], num_input_features=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate)
        num_features = num_features + blocks[2] * growth_rate
        self.transtion3 = _TransitionLayer(num_input_features=num_features, num_output_features=num_features // 2)

        num_features = num_features // 2
        self.layer4 = _DenseBlock(num_layers=blocks[3], num_input_features=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate)
        num_features = num_features + blocks[3] * growth_rate

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  #当数据集是CIFAR和SVHN时，使用1×1的池化，当数据集是ImageNet时使用7×7的池化
        self.fc = nn.Linear(num_features, num_classes)

    def forward(self, x):
        x = self.features(x)

        x = self.layer1(x)
        x = self.transtion1(x)
        x = self.layer2(x)
        x = self.transtion2(x)
        x = self.layer3(x)
        x = self.transtion3(x)
        x = self.layer4(x)
        #print(x.size())

        x = self.avgpool(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)

        return x


def DenseNet121(num_classes):
    return DenseNet(blocks=(6, 12, 24, 16), num_classes=num_classes)

def DenseNet169(num_classes):
    return DenseNet(blocks=(6, 12, 32, 32), num_classes=num_classes)

def DenseNet201(num_classes):
    return DenseNet(blocks=(6, 12, 48, 32), num_classes=num_classes)

def DenseNet264(num_classes):
    return DenseNet(blocks=(6, 12, 64, 48), num_classes=num_classes)

def read_densenet121():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = models.densenet121(pretrained=True)
    model.to(device)
    #print(model)


def get_densenet121(flag, num_classes):
    if flag:
        net = models.densenet121(pretrained=True)
        num_input = net.classifier.in_features
        net.classifier = nn.Linear(num_input, num_classes)
    else:
        net = DenseNet121(num_classes)

    return net

In [15]:
net = get_densenet121(False,10)

# Construct loss and optimizer

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Training cycle

In [17]:
def train(epoch,device):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        inputs,labels=inputs.to(device),labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 300 == 299:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 299))
            running_loss = 0.0

    print('Finished Training')

In [18]:
def test(device):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images,labels=images.to(device),labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on test set: %d %%' % (
        100 * correct / total))

In [19]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# print(device) #cuda:0
net.to(device)
for epoch in range(10):
    train(epoch,device)
    test(device)

[1,   300] loss: 1.891
[1,   600] loss: 1.492
Finished Training
Accuracy of the network on test set: 52 %
[2,   300] loss: 1.226
[2,   600] loss: 1.138
Finished Training
Accuracy of the network on test set: 61 %
[3,   300] loss: 0.987
[3,   600] loss: 0.958
Finished Training
Accuracy of the network on test set: 65 %
[4,   300] loss: 0.806
[4,   600] loss: 0.828
Finished Training
Accuracy of the network on test set: 68 %
[5,   300] loss: 0.699
[5,   600] loss: 0.705
Finished Training
Accuracy of the network on test set: 69 %
[6,   300] loss: 0.591
[6,   600] loss: 0.624
Finished Training
Accuracy of the network on test set: 69 %
[7,   300] loss: 0.489
[7,   600] loss: 0.543
Finished Training
Accuracy of the network on test set: 70 %
[8,   300] loss: 0.406
[8,   600] loss: 0.456
Finished Training
Accuracy of the network on test set: 70 %
[9,   300] loss: 0.350
[9,   600] loss: 0.397
Finished Training
Accuracy of the network on test set: 71 %
[10,   300] loss: 0.282
[10,   600] loss: 0.33