In [1]:
import tensorflow as tf
import keras
import keras.backend as K
import keras.layers as KL
import keras.initializers as KI
import keras.engine as KE
import keras.models as KM
import numpy as np
import os
import cv2
import skimage
import h5py
import sys
sys.path.append('..')
from cfg.config import Config
import utils.matterport_utils as matterport_utils
import utils.matterport_visualize as matterport_visualize
from models import data_generator, model_misc
from models.padnet_v1_0 import PADNet
from utils.misc import load_weights_from_hdf5_group_by_name
from dataIO.a2d_dataset import A2DDataset

from distutils.version import LooseVersion
assert LooseVersion(tf.__version__) >= LooseVersion("1.3")
assert LooseVersion(keras.__version__) >= LooseVersion('2.0.8')

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
print(os.getenv('CUDA_VISIBLE_DEVICES'))

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


0,1,2,3


In [2]:
class A2DConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "a2d"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 4
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_ACTOR_CLASSES = 1 + 7  # background + 7 objects
    NUM_ACTION_CLASSES = 9

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 256
    IMAGE_MAX_DIM = 256

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 64

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 1000

    # use small validation steps since the epoch is small
    VALIDATION_STPES = 100

    TIMESTEPS = 8

    BATCH_SIZE = IMAGES_PER_GPU * GPU_COUNT
    IMAGE_H = 256
    IMAGE_W = 256
    IMAGE_SHAPE = np.array([IMAGE_H, IMAGE_W, 3])
    RGB_CLIP_SHAPE = np.array([TIMESTEPS, IMAGE_H, IMAGE_W, 3])
    FLOW_CLIP_SHAPE = np.array([TIMESTEPS, IMAGE_H, IMAGE_W, 2])
    
config = A2DConfig()
config.display()


Configurations:
BACKBONE_SHAPES                [[64 64]
 [32 32]
 [16 16]
 [ 8  8]
 [ 4  4]]
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     8
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FLOW_CLIP_SHAPE                [  8 256 256   2]
GPU_COUNT                      4
IMAGES_PER_GPU                 2
IMAGE_H                        256
IMAGE_MAX_DIM                  256
IMAGE_MIN_DIM                  256
IMAGE_PADDING                  True
IMAGE_SHAPE                    [256 256   3]
IMAGE_W                        256
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.002
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES               100
MEAN_PIXEL                     [123.7 116.8 103.9]
MINI_MASK_SHAPE                (56, 56)
NAME                           a2d
NUM_ACTION_CLASSES        

In [3]:
# Training dataset
dataset_train = A2DDataset('train', '/vision/u/jingweij/Datasets/A2D/Release')
dataset_train.prepare()

# Validation dataset
dataset_val = A2DDataset('test', '/vision/u/jingweij/Datasets/A2D/Release')
dataset_val.prepare()

In [4]:
model = PADNet(mode='training', config=config, model_dir='../outputs/padnet_v1.0')

In [6]:
mrcnn_coco_pretrained_filename = '/vision/u/jingweij/VideoObjSeg/other_repos/Mask_RCNN/mask_rcnn_coco.h5'

In [7]:
layer_name_map = {}
all_layer_names = [x.name for x in model.keras_model.inner_model.layers]
for ln in all_layer_names:
    if ln.startswith('rgb_'):
        layer_name_map[ln] = ln.lstrip('rgb_')
    elif ln.startswith('flow_'):
        layer_name_map[ln] = ln.lstrip('flow_')

In [8]:
layer_name_map

{'flow_ROI': 'ROI',
 'flow_magnitude': 'magnitude',
 'flow_mrcnn_class_bn1': 'mrcnn_class_bn1',
 'flow_mrcnn_class_conv1': 'mrcnn_class_conv1',
 'flow_mrcnn_mask': 'mrcnn_mask',
 'flow_mrcnn_mask_bn1': 'mrcnn_mask_bn1',
 'flow_mrcnn_mask_bn2': 'mrcnn_mask_bn2',
 'flow_mrcnn_mask_bn3': 'mrcnn_mask_bn3',
 'flow_mrcnn_mask_bn4': 'mrcnn_mask_bn4',
 'flow_mrcnn_mask_conv1': 'mrcnn_mask_conv1',
 'flow_mrcnn_mask_conv2': 'mrcnn_mask_conv2',
 'flow_mrcnn_mask_conv3': 'mrcnn_mask_conv3',
 'flow_mrcnn_mask_conv4': 'mrcnn_mask_conv4',
 'flow_mrcnn_mask_deconv': 'mrcnn_mask_deconv',
 'flow_proposal_targets': 'proposal_targets',
 'flow_resnet_3d': 'resnet_3d',
 'flow_roi_align_classifier': 'roi_align_classifier',
 'flow_roi_align_mask': 'roi_align_mask',
 'flow_rpn_bbox': 'rpn_bbox',
 'flow_rpn_class': 'rpn_class',
 'flow_rpn_class_logits': 'rpn_class_logits',
 'flow_rpn_model': 'rpn_model',
 'flow_top_down': 'top_down',
 'rgb_ROI': 'ROI',
 'rgb_mrcnn_class_bn1': 'mrcnn_class_bn1',
 'rgb_mrcnn_clas

In [9]:
with h5py.File(mrcnn_coco_pretrained_filename, mode='r') as f:
    print('heads:')
    load_weights_from_hdf5_group_by_name(f, model.keras_model.inner_model.layers, layer_name_map, skip_mismatch=True)
    print('rgb resnet 3d spatial layers:')
    load_weights_from_hdf5_group_by_name(f, model.rgb_resnet_3d_model.layers)
    print('flow resnet 3d spatial layers:')
    load_weights_from_hdf5_group_by_name(f, model.flow_resnet_3d_model.layers)
    print('rgb top down:')
    load_weights_from_hdf5_group_by_name(f, model.rgb_top_down_model.layers)
    print('flow top down:')
    load_weights_from_hdf5_group_by_name(f, model.flow_top_down_model.layers)
    print('rgb rpn:')
    load_weights_from_hdf5_group_by_name(f, [model.r_rpn], {'rgb_rpn_model':'rpn_model'})
    print('flow rpn:')
    load_weights_from_hdf5_group_by_name(f, [model.f_rpn], {'flow_rpn_model':'rpn_model'})

heads:


  weight_values[i].shape))
  weight_values[i].shape))
  weight_values[i].shape))
  weight_values[i].shape))
  weight_values[i].shape))
  weight_values[i].shape))
  ' ({} vs {}).'.format(len(symbolic_weights), len(weight_values)))


Weight value tuples:
[(<tf.Variable 'rpn_conv_shared_1/kernel:0' shape=(3, 3, 256, 512) dtype=float32_ref>,
  <HDF5 dataset "kernel:0": shape (3, 3, 256, 512), type "<f4">),
 (<tf.Variable 'rpn_conv_shared_1/bias:0' shape=(512,) dtype=float32_ref>,
  <HDF5 dataset "bias:0": shape (512,), type "<f4">),
 (<tf.Variable 'rpn_class_raw_1/kernel:0' shape=(1, 1, 512, 6) dtype=float32_ref>,
  <HDF5 dataset "kernel:0": shape (1, 1, 512, 6), type "<f4">),
 (<tf.Variable 'rpn_class_raw_1/bias:0' shape=(6,) dtype=float32_ref>,
  <HDF5 dataset "bias:0": shape (6,), type "<f4">),
 (<tf.Variable 'rpn_bbox_pred_1/kernel:0' shape=(1, 1, 512, 12) dtype=float32_ref>,
  <HDF5 dataset "kernel:0": shape (1, 1, 512, 12), type "<f4">),
 (<tf.Variable 'rpn_bbox_pred_1/bias:0' shape=(12,) dtype=float32_ref>,
  <HDF5 dataset "bias:0": shape (12,), type "<f4">),
 (<tf.Variable 'rgb_mrcnn_mask_conv1/kernel:0' shape=(3, 3, 256, 256) dtype=float32_ref>,
  <HDF5 dataset "kernel:0": shape (3, 3, 256, 256), type "<f4">

rgb resnet 3d spatial layers:
flow resnet 3d spatial layers:
rgb top down:
flow top down:
rgb rpn:
flow rpn:
