<a href="https://colab.research.google.com/github/LikeWind99/DeepLearning/blob/master/fcn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import torch as t
import torchvision
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as T
import cv2
import numpy as np
from torchvision.models.vgg import VGG

batch_size = 4
num_workers = 4
num_epochs = 100

cfg = {
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
}

ranges = {
    'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)),
}

transform = T.Compose([
    T.Resize(224),
    T.CenterCrop(224),
    T.ToTensor(), 
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])


In [2]:
# 采用VOCSegmentation作为分割对象
trainDataset = torchvision.datasets.VOCSegmentation(root='./newdata', year='2012', image_set='train', download=True)
testDataset = torchvision.datasets.VOCSegmentation(root='./newdata', year='2012', image_set='val', download=True)

trainDataLoader = DataLoader(dataset=trainDataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
testDataLoader = DataLoader(dataset=testDataset, batch_size=batch_size, num_workers=num_workers)


Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to ./newdata/VOCtrainval_11-May-2012.tar


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Using downloaded and verified file: ./newdata/VOCtrainval_11-May-2012.tar


In [11]:
layers = []
batch_norm = False
for each in cfg['vgg16']:
  in_channels = 3
  if each == 'M':
    layers += [nn.MaxPool2d(kernel_size=2, stride=2)]

  else:
    conv2d = nn.Conv2d(in_channels=in_channels, out_channels=each, kernel_size=3, padding=1)
    if batch_norm:
      layers += [conv2d, nn.BatchNorm2d(num_features=each), nn.ReLU(inplace=True)]
    else:
      layers += [conv2d, nn.ReLU(inplace=True)]
  

    

In [18]:
layers = nn.Sequential(*layers)
layers

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(3, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(3, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(3, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(3, 512, kernel_size=(3, 3), s

In [15]:
class VGGNet(VGG):
  def __init__(self, layers=layers, pretrained=True, requires_grad=True, remove_fc=True, show_params=False):
    super(VGGNet, self).__init__(layers)
    if pretrained:
      exec(f"self.load_state_dict(models.{'vgg16'}(pretrained=True).state_dict())")
    if requires_grad == False:
      for param in super().parameters():
        param.requires_grad = False

    if remove_fc:
      del self.classifier

    if show_params:
      for name, param in self.named_parameters():
        print(name, param.size())

  def forward(self, x):
    output = {}
    for i, (begin, end) in enumerate(self.ranges):
      for layer in range(begin, end):
        x = self.features[layers](x)
      output[f'x{i+1}'] = x

    return output


In [16]:
class FCN8s(nn.Module):

    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net
        self.conv6 = nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0, dilation=1)
        self.conv7 = nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0, dilation=1)
        self.relu    = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1     = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2     = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3     = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4     = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn5     = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']    # maxpooling5的feature map (1/32)
        x4 = output['x4']    # maxpooling4的feature map (1/16)
        x3 = output['x3']    # maxpooling3的feature map (1/8)
    
        score = self.relu(self.conv6(x5))    # conv6  size不变 (1/32)
        score = self.relu(self.conv7(score)) # conv7  size不变 (1/32)
        score = self.relu(self.deconv1(x5))   # out_size = 2*in_size (1/16)       
        score = self.bn1(score + x4)                      
        score = self.relu(self.deconv2(score)) # out_size = 2*in_size (1/8)           
        score = self.bn2(score + x3)                      
        score = self.bn3(self.relu(self.deconv3(score)))  # out_size = 2*in_size (1/4)
        score = self.bn4(self.relu(self.deconv4(score)))  # out_size = 2*in_size (1/2)
        score = self.bn5(self.relu(self.deconv5(score)))  # out_size = 2*in_size (1)
        score = self.classifier(score)                    # size不变，使输出的channel等于类别数

        return score  