# Обучение различных модификаций FCRN с шорткатами из энкодера в декодер

# Загрузка датасета

In [1]:
import tensorflow
import keras
import h5py
import os
import numpy as np

Using TensorFlow backend.


In [2]:
try:
    train_data_file.close()
except:
    pass
train_data_file = h5py.File('/home/kmouraviev/NYU_dataset_hdf5/train_data_fullsize_small.hdf5')
rgbs_train = np.array(train_data_file['data'])
depths_train = np.array(train_data_file['label'])

In [3]:
try:
    val_data_file.close()
except:
    pass
val_data_file = h5py.File('/home/kmouraviev/NYU_dataset_hdf5/validation_data_different_scenes.hdf5')
rgbs_val = np.array(val_data_file['data'])
depths_val = np.array(val_data_file['label'])

In [4]:
print(rgbs_train.shape, depths_train.shape)
print(rgbs_val.shape, depths_val.shape)
print(rgbs_train[0].min(), rgbs_train[0].max())
print(depths_train[0].min(), depths_train[0].max())

(41685, 384, 512, 3) (41685, 384, 512)
(14266, 384, 512, 3) (14266, 384, 512)
-122.68 150.061
1.74775976467378 4.2530741218842865


# ResNet c Deconvolution-декодером и шорткатами 

## Создание модели

In [10]:
from keras.models import Model, load_model
from keras.layers import *
import keras.backend as K
import tensorflow as tf
from keras.applications.resnet50 import ResNet50

In [65]:
K.clear_session()

In [66]:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
K.set_session(tf.Session(config=config))

In [67]:
def apply_upprojection(nn):
    n_filters = int(nn.shape[-1])
    batchnorm = BatchNormalization()(nn)
    upsampling = UpSampling2D()(batchnorm)
    conv = Conv2D(n_filters // 2, kernel_size=3, padding='same')(upsampling)
    relu = Activation('relu')(conv)
    conv_3 = Conv2D(n_filters // 2, kernel_size=3, padding='same')(relu)
    conv_proj = Conv2D(n_filters // 2, kernel_size=3, padding='same')(upsampling)
    add = Add()([conv_3, conv_proj])
    relu_2 = Activation('relu')(add)
    dropout = Dropout(0.5)(relu_2)
    return dropout


def apply_upconvolution(nn):
    n_filters = int(nn.shape[-1])
    batchnorm = BatchNormalization()(nn)
    upsampling = UpSampling2D()(batchnorm)
    conv = Conv2D(n_filters // 2, kernel_size=3, padding='same')(upsampling)
    relu = Activation('relu')(conv)
    dropout = Dropout(0.5)(relu)
    return dropout


def apply_deconvolution(nn, n_output_filters):
    batchnorm = BatchNormalization()(nn)
    batchnorm = nn
    deconv = Conv2DTranspose(n_output_filters,
                             kernel_size=(3, 3),
                             strides=(2, 2),
                             padding='same',
                             output_padding=1
                            )(batchnorm)
    relu = Activation('relu')(deconv)
    dropout = Dropout(0.5)(relu)
    dropout = relu
    return dropout


def create_fcrn_model(encoder, deconv_type='projection', h=224, w=224):
    for layer in encoder.layers:
        layer.trainable = False
    for layer in encoder.layers:
        if layer.name == 'conv1':
            conv1 = layer.output
        if layer.name == 'add_3':
            add3 = layer.output
        if layer.name == 'add_7':
            add7 = layer.output
        if layer.name == 'add_13':
            add13 = layer.output
    print(add3.shape, add7.shape, add13.shape)
    encoder.compile(loss='mean_squared_error', optimizer='adam')
    resnet_output = encoder.output
    nn = Conv2D(1024, kernel_size=1, padding='same')(resnet_output)
    for k in range(5):
        if k == 1:
            nn = concatenate([add13, nn], axis=-1)
        if k == 2:
            nn = concatenate([add7, nn], axis=-1)
        if k == 3:
            nn = concatenate([add3, nn], axis=-1)
        if k == 4:
            nn = concatenate([conv1, nn], axis=-1)
        if k == 0:
            n_filters = int(nn.shape[-1]) // 2
        elif k == 1 or k == 4:
            n_filters = int(nn.shape[-1]) // 3
        else:
            n_filters = int(nn.shape[-1]) // 4
        if deconv_type == 'projection':
            nn = apply_upprojection(nn, n_filters)
        if deconv_type ==  'convolution':
            nn = apply_upconvolution(nn, n_filters)
        if deconv_type == 'deconvolution':
            nn = apply_deconvolution(nn, n_filters)
    depth_output = Conv2D(1, kernel_size=3, padding='same')(nn)
    #depth_output = Activation('relu')(depth_output)
    depth_output = Reshape((h, w))(depth_output)
    fcrn_model = Model(inputs=encoder.input, outputs=depth_output)
    return fcrn_model

In [68]:
resnet_encoder = ResNet50(input_shape=(384, 512, 3), weights=None, include_top=False)
pretrained_encoder = ResNet50(weights='imagenet', include_top=False)
resnet_encoder.set_weights(pretrained_encoder.get_weights())

In [69]:
resnet_encoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 384, 512, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 390, 518, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 192, 256, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 192, 256, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [70]:
model = create_fcrn_model(resnet_encoder, deconv_type='deconvolution', h=384, w=512)
model.summary()

(?, 96, 128, 256) (?, 48, 64, 512) (?, 24, 32, 1024)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 384, 512, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 390, 518, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 192, 256, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 192, 256, 64) 256         conv1[0][0]                      
________________________________________________________

## Обучение на малом датасете

In [73]:
from experiment_utils.training import train_model
from experiment_utils.callbacks import LoggingCallback
from keras.optimizers import Adam

In [74]:
params = {
    'optimizer': Adam(lr=3e-5, decay=1e-6),
    'epochs': 20,
    'trainable_layers': [True] * len(model.layers),
    'batch_size': 32,
    'generator': None
}

In [75]:
save_dir = '/home/kmouraviev/FCRN_notebooks/train_resnet_with_shortcuts_small_dataset'
callback = LoggingCallback(model, save_dir).get_callback()
train_model(model,
            save_dir,
            [params],
            callback,
            rgbs_train,
            depths_train,
            rgbs_val,
            depths_val
           )

start creating multi_gpu_model
created
Train on 41685 samples, validate on 14266 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20

KeyboardInterrupt: 