In [1]:
%matplotlib inline
import tensorflow as tf
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.contrib import slim
from tensorflow.contrib.learn import ModeKeys
from tensorflow.contrib.learn import learn_runner

In [2]:
TRAIN_SIZE = 18500
EVAL_SIZE = 2500
MEAN = 93.6877751708 
STD = 76.9036483865
LR = 0.001

In [3]:
import argparse
import functools
import itertools
import os
batch_size = 16
epochs = 60
TRAIN_STEPS = int(TRAIN_SIZE/batch_size)*epochs
EVAL_STEPS = int(EVAL_SIZE/batch_size)
EPOCH_STEPS = int(TRAIN_SIZE/batch_size)
print ("TRAIN STEPS: ", TRAIN_STEPS)
print ("EVAL STEPS: ", EVAL_STEPS)
print ("EPOCH STEPS: ", EPOCH_STEPS)

TRAIN STEPS:  69360
EVAL STEPS:  156
EPOCH STEPS:  1156


In [4]:
def architecture(inputs, is_training, scope='MnistConvNet'):
    """Return the output operation following the network architecture.
    Args:
        inputs (Tensor): Input Tensor
        is_training (bool): True iff in training mode
        scope (str): Name of the scope of the architecture
    Returns:
         Logits output Op for the network.
    """
    print (inputs)
    with tf.variable_scope(scope):
        # Convolution Layer with 32 filters and a kernel size of 5
        conv1 = tf.layers.conv2d(inputs, 32, 5, activation=tf.nn.relu)
        conv1 = tf.layers.max_pooling2d(conv1, 4, 2)

        conv2 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu)
        conv2 = tf.layers.max_pooling2d(conv2, 4, 2)

        conv3 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu)
        conv3 = tf.layers.max_pooling2d(conv3, 4, 2)
        
        fc1 = tf.layers.flatten(conv3)

        fc1 = tf.layers.dense(fc1, 1024)
        fc1 = tf.layers.dropout(fc1, rate=0.5, training=is_training)

        # Output layer, class prediction
        out = tf.layers.dense(fc1, 12)

        return out
    
def nvidia_architecture(inputs, is_training, scope='Nvidia'):
    with tf.variable_scope(scope):
        net = tf.layers.conv2d(inputs, 24, 5, strides=(2, 2), activation=tf.nn.relu, name='conv1')
        net = tf.layers.conv2d(net, 36, 5, strides=(2, 2), activation=tf.nn.relu, name='conv2')
        net = tf.layers.conv2d(net, 48, 5, strides=(2, 2), activation=tf.nn.relu, name='conv3')
        net = tf.layers.conv2d(net, 64, 3, activation=tf.nn.relu, name='conv4')
        net = tf.layers.conv2d(net, 64, 3, activation=tf.nn.relu, name='conv5')
        
        net = tf.layers.flatten(net)
        net = tf.layers.dense(net, 1164, activation=tf.nn.relu, name='fc1')
        net = tf.layers.dropout(net, rate=0.7, training=is_training)
        
        net = tf.layers.dense(net, 100, activation=tf.nn.relu, name='fc2')
        net = tf.layers.dropout(net, rate=0.7, training=is_training)
        
        net = tf.layers.dense(net, 50, activation=tf.nn.relu, name='fc3')
        net = tf.layers.dropout(net, rate=0.7, training=is_training)
        
        net = tf.layers.dense(net, 20, activation=tf.nn.relu, name='fc4')
        
        # scaling based on sfmlearner
        predictions = 0.01 * tf.layers.dense(net, 12, activation=None, name='predictions')
        return predictions
    
def sfmlearner_architecture(inputs, is_training, scope='sfmlearner'):
    with tf.variable_scope(scope):
        regularizer = tf.contrib.layers.l2_regularizer(scale=0.05)
        net = tf.layers.conv2d(inputs, 16, 7, strides=(2, 2), activation=tf.nn.relu, name='conv1', kernel_regularizer=regularizer)
        net = tf.layers.conv2d(net, 32, 5, strides=(2, 2), activation=tf.nn.relu, name='conv2', kernel_regularizer=regularizer)
        net = tf.layers.conv2d(net, 64, 3, strides=(2, 2), activation=tf.nn.relu, name='conv3', kernel_regularizer=regularizer)
        net = tf.layers.conv2d(net, 128, 3, strides=(2, 2), activation=tf.nn.relu, name='conv4', kernel_regularizer=regularizer)
        net = tf.layers.conv2d(net, 256, 3, strides=(2, 2), activation=tf.nn.relu, name='conv5', kernel_regularizer=regularizer)
        net = tf.layers.conv2d(net, 256, 3, strides=(2, 2), activation=tf.nn.relu, name='conv6', kernel_regularizer=regularizer)
        net = tf.layers.conv2d(net, 256, 3, strides=(2, 2), activation=tf.nn.relu, name='conv7', kernel_regularizer=regularizer)
        predictions = tf.layers.dense(net, 12, activation=None, name='predictions')
        predictions = tf.reduce_mean(predictions, [1, 2])
        predictions = 0.01 * tf.reshape(predictions, [-1, ])
        
        return predictions
    
def sfmlearner_architecture2(inputs, is_training, scope='sfmlearner'):
    slim = tf.contrib.slim
    with tf.variable_scope('pose_exp_net') as sc:
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                            normalizer_fn=None,
                            weights_regularizer=slim.l2_regularizer(0.05),
                            activation_fn=tf.nn.relu):
            # cnv1 to cnv5b are shared between pose and explainability prediction
            cnv1  = slim.conv2d(inputs,16,  [7, 7], stride=2, scope='cnv1')
            cnv2  = slim.conv2d(cnv1, 32,  [5, 5], stride=2, scope='cnv2')
            cnv3  = slim.conv2d(cnv2, 64,  [3, 3], stride=2, scope='cnv3')
            cnv4  = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4')
            cnv5  = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5')
            # Pose specific layers
            with tf.variable_scope('pose'):
                cnv6  = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6')
                cnv7  = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7')
                pose_pred = slim.conv2d(cnv7, 6, [1, 1], scope='pred', 
                    stride=1, normalizer_fn=None, activation_fn=None)
                pose_avg = tf.reduce_mean(pose_pred, [1, 2])
                # Empirically we found that scaling by a small constant 
                # facilitates training.
                pose_final = tf.reshape(pose_avg, [-1, 6])
    return pose_final

def deepvo_architecture(inputs, is_training, scope='deepvo'):
    slim = tf.contrib.slim
    with tf.variable_scope('deepvo') as sc:
        with slim.arg_scope([slim.conv2d],
                           activation_fn=tf.nn.relu):
            cnv1 = slim.conv2d(inputs, 64, [7, 7], stride=2)
            cnv2 = slim.conv2d(cnv1, 128, [5, 5], stride=2)
            cnv3 = slim.conv2d(cnv2, 256, [5, 5], stride=2)
            cnv3_1 = slim.conv2d(cnv3, 256, [3, 3])
            cnv4 = slim.conv2d(cnv3_1, 512, [3, 3], stride=2)
            cnv4_1 = slim.conv2d(cnv4, 512, [3, 3])
            cnv5 = slim.conv2d(cnv4_1, 512, [3, 3], stride=2)
            cnv5_1 = slim.conv2d(cnv5, 512, [3, 3])
            cnv6 = slim.conv2d(cnv5_1, 1024, [3, 3], stride=2)
            pose_pred = slim.conv2d(cnv6, 6, [1, 1], scope='pred', normalizer_fn=None,
                                   activation_fn=None)
            pose_avg = tf.reduce_mean(pose_pred, [1, 2])
            pose_final = tf.reshape(pose_avg, [-1, 6])
    return pose_final
    
def get_train_op_fn(loss, params, lr):
    return tf.contrib.layers.optimize_loss(loss=loss, global_step=tf.contrib.framework.get_global_step(),
                                          optimizer=tf.train.AdamOptimizer, learning_rate=lr)
    
def model_fn(features, labels, mode, params):
    is_training = mode == ModeKeys.TRAIN
    
    #predictions = nvidia_architecture(features, is_training=is_training)
    predictions = sfmlearner_architecture2(features, is_training=is_training)
    #predictions = deepvo_architecture(features, is_training=is_training)
    
    loss = None
    train_op = None
    eval_metric_ops = {}
    #if mode != ModeKeys.INFER:
        #loss = tf.reduce_mean(tf.losses.absolute_difference(labels, predictions), name='loss')
        #loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions, scope='loss')
        #train_op = get_train_op_fn(loss, params)
        
    # deepvo loss
    if mode != ModeKeys.INFER:
        print (predictions, labels)
        trans_pred, rot_pred = tf.split(predictions, 2, axis=1)  
        trans_label, rot_label = tf.split(labels, 2, axis=1)  

        rot_loss = tf.losses.mean_squared_error(rot_label, rot_pred, scope='rot_loss')
        trans_loss = tf.losses.mean_squared_error(trans_label, trans_pred, scope='trans_loss')
        loss = tf.add(rot_loss, trans_loss)
        
        global_step = tf.train.get_global_step()
        lr = tf.train.exponential_decay(LR, global_step, int(TRAIN_STEPS/5), 0.5, staircase=True, name='lr')    
        train_op = get_train_op_fn(loss, params, lr)
        
#     image_curr, image_prev = tf.split(features, 2, axis=3)
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('rot_loss', rot_loss)
    tf.summary.scalar('trans_loss', trans_loss)
    tf.summary.scalar('lr', lr)

    
    for var in tf.trainable_variables():
        tf.summary.histogram(var.name, var)
#     tf.summary.image('prev_images', image_prev)
#     tf.summary.image('curr_images', image_curr)
#     tf.summary.image('diff_images', tf.abs(tf.subtract(image_prev, image_curr)))
        
    tensors_to_log = {'loss': loss, 'rot_loss': rot_loss, 'trans_loss': trans_loss}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=500)
    train_hooks = [logging_hook]
    
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, training_hooks=train_hooks)

In [5]:
HEIGHT = 128
WIDTH = 416
DEPTH = 3

class KittiDataset(object):
    def __init__(self, data_dir, subset='train', use_distortion=True):
        self.data_dir = data_dir
        self.subset = subset
        self.use_distortion = use_distortion
        
    def get_filenames(self):
        if self.subset in ['train', 'validation', 'eval']:
            return [os.path.join(self.data_dir, self.subset + '_eulcolor.tfrecord')]
            #return [os.path.join(self.data_dir, 'train/train00.odometry')]
        else:
            raise ValueError('Invalid data subset "%s"' % self.subset)
            
    def parser(self, record):
        keys_to_features = {
            "pose": tf.FixedLenFeature((), tf.string, default_value=""),
            "img_raw": tf.FixedLenFeature((), tf.string, default_value=""),
            "img_raw_prev": tf.FixedLenFeature((), tf.string, default_value=""),
        }
        parsed = tf.parse_single_example(record, keys_to_features)

        # Perform additional preprocessing on the parsed data.
        image = tf.image.decode_jpeg(parsed["img_raw"])
        image = tf.reshape(image, [HEIGHT, WIDTH, DEPTH])
        image = tf.cast(image, tf.float32)
        
        image_prev = tf.image.decode_jpeg(parsed["img_raw_prev"])
        image_prev = tf.reshape(image_prev, [HEIGHT, WIDTH, DEPTH])
        image_prev = tf.cast(image_prev, tf.float32)
        
        label = tf.decode_raw(parsed["pose"], tf.float64)
        label = tf.reshape(label, [6])
        
        return image, image_prev, label
    
    def make_batch(self, batch_size):
        filenames = self.get_filenames()
        
        dataset = tf.data.TFRecordDataset(filenames).repeat()
        
        # parse records
        dataset = dataset.map(self.parser, num_parallel_calls=8)
        dataset = dataset.shuffle(buffer_size=5000)
        dataset = dataset.batch(batch_size)
        iterator = dataset.make_one_shot_iterator()
        image_batch, image_prev_batch, label_batch = iterator.get_next()
        
        image_batch = self.preprocess(image_batch, image_prev_batch)
        
        return image_batch, label_batch
    
    def _image_augmentation(self, image, seed=42):
        #image = tf.image.random_brightness(image, 0.1, seed=seed)
        #image = tf.image.random_contrast(image, lower=0.9, upper=1.1, seed=seed)
        #image = tf.image.random_hue(image, 0.1, seed=seed)
        #image = tf.image.random_saturation(image, lower=0.9, upper=1.1, seed=seed)
        return image
    
    def preprocess(self, image, image_prev):
        image = self._image_augmentation(image, seed=42)
        image_prev = self._image_augmentation(image_prev, seed=42)
        
        image = tf.divide(image, 255)
        image_prev = tf.divide(image, 255)
        #image = tf.divide(tf.subtract(image, MEAN), STD)
        #image_prev = tf.divide(tf.subtract(image_prev, MEAN), STD)
        image = tf.concat([image, image_prev], axis=3)
        #image = tf.image.resize_image_with_crop_or_pad(image, 25, 100)
        return image
    

    

In [6]:
def input_fn(data_dir, subset, batch_size, use_distortion_for_training=True):
    with tf.device('/cpu:0'):
        use_distortion = subset == 'train' and use_distortion_for_training
        dataset = KittiDataset(data_dir, subset, use_distortion)
        image_batch, label_batch = dataset.make_batch(batch_size)
        return image_batch, label_batch

In [7]:
def get_experiment_fn(data_dir, use_distortion_for_training=True):
    def _experiment_fn(run_config, hparams):
        train_input_fn = functools.partial(input_fn, data_dir, subset='train', batch_size=batch_size, use_distortion_for_training=True)
        eval_input_fn = functools.partial(input_fn, data_dir, subset='eval', batch_size=batch_size)
        
        train_steps = TRAIN_STEPS
        eval_steps = EVAL_STEPS
        
        classifier = tf.estimator.Estimator(model_fn=model_fn, config=run_config, params=hparams)
        
        return tf.contrib.learn.Experiment(classifier, train_input_fn=train_input_fn,
                                          eval_input_fn=eval_input_fn, train_steps=train_steps,
                                          eval_steps=eval_steps)
    return _experiment_fn

In [8]:
def run_experiment(data_dir, use_distortion_for_training):
    """Run the training experiment."""
    # Define model parameters
    params = tf.contrib.training.HParams(
        learning_rate=LR,
        n_classes=6,
        train_steps=TRAIN_STEPS,
        min_eval_frequency=2000
    )

    # Set the run_config and the directory to save the model and stats
    run_config = tf.contrib.learn.RunConfig()
    run_config = run_config.replace(model_dir='./kitti_training_eulcolor')

    
    learn_runner.run(
        experiment_fn=get_experiment_fn(data_dir, use_distortion_for_training),  # First-class function
        run_config=run_config,  # RunConfig
        schedule="train_and_evaluate",  # What to run
        hparams=params  # HParams
    )

In [9]:
tf.app.run(main=run_experiment( '/home/paperspace/DeepOdometry/', False))

INFO:tensorflow:Using config: {'_is_chief': True, '_master': '', '_log_step_count_steps': 100, '_session_config': None, '_keep_checkpoint_every_n_hours': 10000, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_num_worker_replicas': 0, '_task_type': None, '_environment': 'local', '_model_dir': './kitti_training_eulcolorscale', '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_id': 0, '_evaluation_master': '', '_save_summary_steps': 100, '_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc430275908>, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600}
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
Tensor("pose_exp_net/pose/Reshape:0", shape=(?, 6), dtype=float32) Tensor("IteratorGetNext:2", shape=(?, 6), dtype=float64, device=/device:CPU:0)
Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Summary name pose_exp_net/

INFO:tensorflow:Evaluation [66/156]
INFO:tensorflow:Evaluation [67/156]
INFO:tensorflow:Evaluation [68/156]
INFO:tensorflow:Evaluation [69/156]
INFO:tensorflow:Evaluation [70/156]
INFO:tensorflow:Evaluation [71/156]
INFO:tensorflow:Evaluation [72/156]
INFO:tensorflow:Evaluation [73/156]
INFO:tensorflow:Evaluation [74/156]
INFO:tensorflow:Evaluation [75/156]
INFO:tensorflow:Evaluation [76/156]
INFO:tensorflow:Evaluation [77/156]
INFO:tensorflow:Evaluation [78/156]
INFO:tensorflow:Evaluation [79/156]
INFO:tensorflow:Evaluation [80/156]
INFO:tensorflow:Evaluation [81/156]
INFO:tensorflow:Evaluation [82/156]
INFO:tensorflow:Evaluation [83/156]
INFO:tensorflow:Evaluation [84/156]
INFO:tensorflow:Evaluation [85/156]
INFO:tensorflow:Evaluation [86/156]
INFO:tensorflow:Evaluation [87/156]
INFO:tensorflow:Evaluation [88/156]
INFO:tensorflow:Evaluation [89/156]
INFO:tensorflow:Evaluation [90/156]
INFO:tensorflow:Evaluation [91/156]
INFO:tensorflow:Evaluation [92/156]
INFO:tensorflow:Evaluation [

INFO:tensorflow:trans_loss = 6517.267, loss = 13777.78, rot_loss = 7260.513 (42.895 sec)
INFO:tensorflow:global_step/sec: 11.5781
INFO:tensorflow:loss = 8773.369, step = 4101 (8.636 sec)
INFO:tensorflow:global_step/sec: 11.5445
INFO:tensorflow:loss = 8808.069, step = 4201 (8.665 sec)
INFO:tensorflow:global_step/sec: 11.6222
INFO:tensorflow:loss = 11257.837, step = 4301 (8.601 sec)
INFO:tensorflow:global_step/sec: 10.9042
INFO:tensorflow:loss = 5443.968, step = 4401 (9.174 sec)
INFO:tensorflow:global_step/sec: 11.2815
INFO:tensorflow:loss = 5015.2524, step = 4501 (8.862 sec)
INFO:tensorflow:trans_loss = 1976.4235, loss = 5015.2524, rot_loss = 3038.8289 (43.938 sec)
INFO:tensorflow:global_step/sec: 11.5184
INFO:tensorflow:loss = 3391.6506, step = 4601 (8.684 sec)
INFO:tensorflow:global_step/sec: 11.6501
INFO:tensorflow:loss = 3466.141, step = 4701 (8.581 sec)
INFO:tensorflow:global_step/sec: 11.5786
INFO:tensorflow:loss = 3429.701, step = 4801 (8.638 sec)
INFO:tensorflow:global_step/sec:

INFO:tensorflow:Evaluation [79/156]
INFO:tensorflow:Evaluation [80/156]
INFO:tensorflow:Evaluation [81/156]
INFO:tensorflow:Evaluation [82/156]
INFO:tensorflow:Evaluation [83/156]
INFO:tensorflow:Evaluation [84/156]
INFO:tensorflow:Evaluation [85/156]
INFO:tensorflow:Evaluation [86/156]
INFO:tensorflow:Evaluation [87/156]
INFO:tensorflow:Evaluation [88/156]
INFO:tensorflow:Evaluation [89/156]
INFO:tensorflow:Evaluation [90/156]
INFO:tensorflow:Evaluation [91/156]
INFO:tensorflow:Evaluation [92/156]
INFO:tensorflow:Evaluation [93/156]
INFO:tensorflow:Evaluation [94/156]
INFO:tensorflow:Evaluation [95/156]
INFO:tensorflow:Evaluation [96/156]
INFO:tensorflow:Evaluation [97/156]
INFO:tensorflow:Evaluation [98/156]
INFO:tensorflow:Evaluation [99/156]
INFO:tensorflow:Evaluation [100/156]
INFO:tensorflow:Evaluation [101/156]
INFO:tensorflow:Evaluation [102/156]
INFO:tensorflow:Evaluation [103/156]
INFO:tensorflow:Evaluation [104/156]
INFO:tensorflow:Evaluation [105/156]
INFO:tensorflow:Evalua

INFO:tensorflow:loss = 1830.4071, step = 11101 (9.138 sec)
INFO:tensorflow:global_step/sec: 10.8632
INFO:tensorflow:loss = 1780.4163, step = 11201 (9.204 sec)
INFO:tensorflow:global_step/sec: 10.4191
INFO:tensorflow:loss = 1794.0016, step = 11301 (9.598 sec)
INFO:tensorflow:global_step/sec: 10.3941
INFO:tensorflow:loss = 2700.4165, step = 11401 (9.621 sec)
INFO:tensorflow:global_step/sec: 10.7518
INFO:tensorflow:loss = 8368.854, step = 11501 (9.302 sec)
INFO:tensorflow:trans_loss = 427.83618, loss = 8368.854, rot_loss = 7941.0176 (46.863 sec)
INFO:tensorflow:global_step/sec: 10.9473
INFO:tensorflow:loss = 2393.9414, step = 11601 (9.133 sec)
INFO:tensorflow:global_step/sec: 10.4459
INFO:tensorflow:loss = 4090.1655, step = 11701 (9.573 sec)
INFO:tensorflow:global_step/sec: 11.0708
INFO:tensorflow:loss = 4388.449, step = 11801 (9.032 sec)
INFO:tensorflow:global_step/sec: 11.1574
INFO:tensorflow:loss = 3234.6655, step = 11901 (8.963 sec)
INFO:tensorflow:global_step/sec: 11.2235
INFO:tensor

INFO:tensorflow:Evaluation [98/156]
INFO:tensorflow:Evaluation [99/156]
INFO:tensorflow:Evaluation [100/156]
INFO:tensorflow:Evaluation [101/156]
INFO:tensorflow:Evaluation [102/156]
INFO:tensorflow:Evaluation [103/156]
INFO:tensorflow:Evaluation [104/156]
INFO:tensorflow:Evaluation [105/156]
INFO:tensorflow:Evaluation [106/156]
INFO:tensorflow:Evaluation [107/156]
INFO:tensorflow:Evaluation [108/156]
INFO:tensorflow:Evaluation [109/156]
INFO:tensorflow:Evaluation [110/156]
INFO:tensorflow:Evaluation [111/156]
INFO:tensorflow:Evaluation [112/156]
INFO:tensorflow:Evaluation [113/156]
INFO:tensorflow:Evaluation [114/156]
INFO:tensorflow:Evaluation [115/156]
INFO:tensorflow:Evaluation [116/156]
INFO:tensorflow:Evaluation [117/156]
INFO:tensorflow:Evaluation [118/156]
INFO:tensorflow:Evaluation [119/156]
INFO:tensorflow:Evaluation [120/156]
INFO:tensorflow:Evaluation [121/156]
INFO:tensorflow:Evaluation [122/156]
INFO:tensorflow:Evaluation [123/156]
INFO:tensorflow:Evaluation [124/156]
INF

KeyboardInterrupt: 