#### FCN_VGG16_32s  
#### dataset: pascal voc 2012 segmentation(21分类)

need:
1. BilinearUpSampling.py 
2. [vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5](https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5)

#### 1.建立模型

In [1]:
from keras.layers import *
from keras.regularizers import *
import os
from keras.models import Model
from BilinearUpSampling import *

def FCN_Vgg16_32s(input_shape=None, weight_decay=0., batch_momentum=0.9, batch_shape=None, classes=21):
    if batch_shape:
        img_input = Input(batch_shape=batch_shape)
        image_size = batch_shape[1:3]
    else:
        img_input = Input(shape=input_shape)
        image_size = input_shape[0:2]
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', kernel_regularizer=l2(weight_decay))(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', kernel_regularizer=l2(weight_decay))(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', kernel_regularizer=l2(weight_decay))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', kernel_regularizer=l2(weight_decay))(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', kernel_regularizer=l2(weight_decay))(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', kernel_regularizer=l2(weight_decay))(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', kernel_regularizer=l2(weight_decay))(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', kernel_regularizer=l2(weight_decay))(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', kernel_regularizer=l2(weight_decay))(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', kernel_regularizer=l2(weight_decay))(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', kernel_regularizer=l2(weight_decay))(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', kernel_regularizer=l2(weight_decay))(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', kernel_regularizer=l2(weight_decay))(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    # Convolutional layers transfered from fully-connected layers
    x = Conv2D(512, (7, 7), activation='relu', padding='same', name='fc1', kernel_regularizer=l2(weight_decay))(x)
    x = Dropout(0.5)(x)
    x = Conv2D(512, (1, 1), activation='relu', padding='same', name='fc2', kernel_regularizer=l2(weight_decay))(x)
    x = Dropout(0.5)(x)
    #classifying layer
    x = Conv2D(classes, (1, 1), kernel_initializer='he_normal', activation='linear', padding='valid', strides=(1, 1), kernel_regularizer=l2(weight_decay))(x)

    x = BilinearUpSampling2D(size=(32, 32))(x)
    x=(Activation('softmax'))(x)
    model = Model(img_input, x)
    weights_path='./vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'

    model.load_weights(weights_path, by_name=True)
    return model

Using TensorFlow backend.


In [2]:
from keras import optimizers

model=FCN_Vgg16_32s(input_shape=(320,320,3),classes=21)
optimizer=optimizers.RMSprop(lr=1e-5)
#可以自定义损失函数和metrics
# loss_fn=softmax_sparse_crossentropy_ignoring_last_label
# metrics=[sparse_accuracy_ignoring_last_label]
model.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['acc'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 320, 320, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 320, 320, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 320, 320, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 160, 160, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 160, 160, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 160, 160, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 80, 80, 128)       0         
__________

In [3]:
# from keras.utils.vis_utils import plot_model
# plot_model(model,to_file='fcn32s.jpg',show_shapes=True)

#### 2.处理数据，将mask转化为one_hot形式的向量

In [4]:
colormap = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
            [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0],
            [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128],
            [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0],
            [0, 192, 0], [128, 192, 0], [0, 64, 128]]
cm2lbl = np.zeros(256 ** 3)  # 每个像素点有 0 ~ 255 的选择，RGB 三个通道
for i, cm in enumerate(colormap):
    cm2lbl[(cm[0] * 256 + cm[1]) * 256 + cm[2]] = i  # 建立索引


def image2label(im):
    # 输入三维的mask图片，返回二维的矩阵，对每一个像素进行标记(320,320,3)->(320,320)
    #如[0,0,0]标记为0,[128,0,0]标记为1
    data = np.array(im, dtype='int32')
    idx = (data[:, :, 0] * 256 + data[:, :, 1]) * 256 + data[:, :, 2]
    return np.array(cm2lbl[idx], dtype='int64')



#### 3. 模型训练

In [5]:
from PIL import Image
from keras.preprocessing import image
import numpy as np
base_path='/input0/pascal_2012_seg/'
# val_base_path='/input0/pascal_2012_seg/validation'

def pair_random_crop(x,y,target_size ,sync_seed=1, **kwargs):
    np.random.seed(sync_seed)
    # 填充
    h, w = x.shape[0], x.shape[1]
    pad_w = max(target_size[1] - w, 0)
    pad_h = max(target_size[0] - h, 0)
    
    x = np.lib.pad(x, ((pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, pad_w - pad_w // 2), (0, 0)),
                   'constant', constant_values=0.)
    y = np.lib.pad(y, ((pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, pad_w - pad_w // 2), (0, 0)),
                   'constant', constant_values=0.)

    #裁减
    h, w = x.shape[0], x.shape[1]
    rangeh = (h - target_size[0]) // 2
    rangew = (w - target_size[1]) // 2
#     print(rangeh,rangew)
    offseth = 0 if rangeh == 0 else np.random.randint(rangeh)
    offsetw = 0 if rangew == 0 else np.random.randint(rangew)

    h_start, h_end = offseth, offseth + target_size[0]
    w_start, w_end = offsetw, offsetw + target_size[1]

    return x[h_start:h_end, w_start:w_end, :], y[h_start:h_end, w_start:w_end, :]

In [6]:
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard
import random,csv
from skimage import io
import numpy as np

checkpoint=ModelCheckpoint('./models/lab_01_fcn32s.hdf5',monitor='val_acc',save_best_only=True,verbose=1)
tensorboard=TensorBoard(log_dir='./tf_dir', histogram_freq=0, batch_size=20)
if not os.path.exists('./models'):
    os.mkdir('./models')
if not os.path.exists('./tf_dir'):
    os.mkdir('./tf_dir')

def train_val_generator(root_dir,batch_size,num_classes,train=True,target_size=(320,320)):
    all_list=[]
    with open('%s/meta.csv'%root_dir) as fr:
        f_csv=csv.reader(fr)
        next(f_csv)
        for each in f_csv:
            all_list.append(each)
    random.shuffle(all_list)
    train_num=int(len(all_list)*0.9)
    batch=0
    img_batch=[]
    mask_batch=[]
    print('总共有%d张图片，训练集为%d,验证集为%d'%(len(all_list),train_num,len(all_list)-train_num))
    if train:
        all_list=all_list[:train_num]
    else:
        all_list=all_list[train_num:]
    while True:
        for i in range(len(all_list)):
            batch+=1
            img=io.imread(os.path.join(root_dir,all_list[i][1]))
#             print(img)
            img=img.astype('float32')
            img/=255.
            mask=io.imread(os.path.join(root_dir,all_list[i][0]))
            img,mask=pair_random_crop(img,mask,target_size)
            img_batch.append(img)
            mask_batch.append(mask)
            if batch%batch_size==0:
                #将一个batch的mask从rgb->单通道—>one-hot形式
                img_batch,mask_batch=np.array(img_batch),np.array(mask_batch)
                mask_one_hot=np.zeros(mask_batch[:,:,:,0].shape+(num_classes,))
                mask_idx=np.zeros(mask_batch[:,:,:,0].shape)
                for j in range(batch_size):
                    mask_idx[j]=image2label(mask_batch[j])
                    
                for z in range(num_classes):
                    mask_one_hot[mask_idx==z,z]=1
                mask_one_hot=mask_one_hot.astype('int32')
                yield img_batch,mask_one_hot
                batch=0
                img_batch=[]
                mask_batch=[]
                
            
            
root_dir='/input1/Pascal_Voc_2012_Segmentation'
history=model.fit_generator(train_val_generator(root_dir,batch_size=20,num_classes=21),steps_per_epoch=131,
                            epochs=40,validation_data=train_val_generator(root_dir,batch_size=20,num_classes=21,train=False),
                            validation_steps=15,
                            callbacks=[checkpoint,tensorboard])

Epoch 1/40
总共有2913张图片，训练集为2621,验证集为292
总共有2913张图片，训练集为2621,验证集为292

Epoch 00001: val_acc improved from -inf to 0.74736, saving model to ./models/lab_01_fcn32s.hdf5
Epoch 2/40

Epoch 00002: val_acc improved from 0.74736 to 0.79327, saving model to ./models/lab_01_fcn32s.hdf5
Epoch 3/40

Epoch 00003: val_acc improved from 0.79327 to 0.81469, saving model to ./models/lab_01_fcn32s.hdf5
Epoch 4/40

Epoch 00004: val_acc improved from 0.81469 to 0.84022, saving model to ./models/lab_01_fcn32s.hdf5
Epoch 5/40

Epoch 00005: val_acc improved from 0.84022 to 0.85740, saving model to ./models/lab_01_fcn32s.hdf5
Epoch 6/40

Epoch 00006: val_acc improved from 0.85740 to 0.86952, saving model to ./models/lab_01_fcn32s.hdf5
Epoch 7/40

Epoch 00007: val_acc improved from 0.86952 to 0.87757, saving model to ./models/lab_01_fcn32s.hdf5
Epoch 8/40

Epoch 00008: val_acc improved from 0.87757 to 0.88563, saving model to ./models/lab_01_fcn32s.hdf5
Epoch 9/40

Epoch 00009: val_acc improved from 0.88563 to 0