# VGG 16模型

#### 该模型在论文《VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAGE RECOGNITION》中实现
[VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAGE RECOGNITION](https://arxiv.org/abs/1409.1556)

提出了一种非常大的神经网络模型，并且卷积核都为`3*3`，同时pool使用`2*2`进行最大池化

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets,transforms
import numpy as np

In [90]:
train_dataset = datasets.CIFAR10('data/cifar10', download=True,train=True, 
                                 transform=transforms.Compose([
                                     transforms.RandomSizedCrop(32),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                 ]))
test_dataset = datasets.CIFAR10('data/cifar10', 
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                ]))

Files already downloaded and verified


In [91]:
train_loader = torch.utils.data.DataLoader(train_dataset,shuffle=True,batch_size=64)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=64)

In [92]:
# 构建vgg16模型
Vgg16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
# 这是前面的卷积和池化层

In [98]:
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16,self).__init__()
        self.features = self._make_layers()
        self.classifier = nn.Sequential(
            nn.Linear(3072, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 10),
        )   
    
    def forward(self,x):
        self.features(x)
        x = x.view(x.size(0),-1)
        x = self.classifier(x)
        return x
    
    def _make_layers(self):
        layers = []
        in_channel = 3
        for x in Vgg16:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2,stride=2)]
            else:
                layers += [nn.Conv2d(in_channel, x, kernel_size=3, padding=1),
                          nn.ReLU(inplace=True)]
                in_channel = x
        return nn.Sequential(*layers)

In [99]:
model = VGG16()

In [100]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [101]:
model

VGG16 (
  (features): Sequential (
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU (inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU (inplace)
    (4): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU (inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU (inplace)
    (9): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU (inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU (inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU (inplace)
    (16): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), 

In [103]:
for epoch in range(10):
    for t, (data, target) in enumerate(train_loader):        
        data,target = Variable(data),Variable(target)
        pred = model(data)
        loss = loss_fn(pred,target)
        print(epoch,t,loss.data[0])
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

0 0 2.308626413345337
0 1 2.322632312774658
0 2 2.30881404876709
0 3 2.3024017810821533
0 4 2.304530620574951
0 5 2.312730312347412
0 6 2.3132405281066895
0 7 2.3081960678100586
0 8 2.2934482097625732
0 9 2.307502508163452
0 10 2.2964401245117188
0 11 2.3000569343566895
0 12 2.293687105178833
0 13 2.3019235134124756
0 14 2.299318552017212
0 15 2.3069067001342773
0 16 2.2907907962799072
0 17 2.3125104904174805
0 18 2.2795302867889404
0 19 2.2819275856018066
0 20 2.3029062747955322
0 21 2.28049373626709
0 22 2.296945810317993
0 23 2.255983829498291
0 24 2.2826952934265137
0 25 2.2702181339263916
0 26 2.276179313659668
0 27 2.2933664321899414
0 28 2.289134979248047
0 29 2.2786049842834473
0 30 2.281862735748291
0 31 2.2685046195983887
0 32 2.2810394763946533
0 33 2.28056263923645
0 34 2.2981977462768555
0 35 2.271517038345337
0 36 2.268321990966797
0 37 2.297367572784424
0 38 2.2930243015289307
0 39 2.263733148574829
0 40 2.277125835418701
0 41 2.2685296535491943
0 42 2.2477917671203613
0

0 339 2.071108102798462
0 340 2.034736156463623
0 341 2.050726890563965
0 342 2.148341655731201
0 343 2.2341394424438477
0 344 2.2088513374328613
0 345 2.0736465454101562
0 346 2.1365978717803955
0 347 2.040461301803589
0 348 2.099363327026367
0 349 2.0992183685302734
0 350 2.1853909492492676
0 351 2.092517375946045
0 352 2.1149511337280273
0 353 2.0966577529907227
0 354 2.1241352558135986
0 355 2.0867302417755127
0 356 2.1338083744049072
0 357 2.043111562728882
0 358 2.179255247116089
0 359 2.145587205886841
0 360 2.148932695388794
0 361 2.0983290672302246
0 362 2.12691593170166
0 363 1.9912168979644775
0 364 2.0895566940307617
0 365 1.9415191411972046
0 366 2.1331725120544434
0 367 2.0540900230407715
0 368 2.066093921661377
0 369 2.0489680767059326
0 370 2.1017656326293945
0 371 2.0825273990631104
0 372 2.119252920150757
0 373 2.1830151081085205
0 374 2.1857683658599854
0 375 2.0684292316436768
0 376 2.0069668292999268
0 377 2.0501294136047363
0 378 2.07235050201416
0 379 2.029852867

KeyboardInterrupt: 