In [1]:
import tensorflow as tf
import numpy as np

from model import DispNet
from dataset import Scene_Flow_disparity

import os
import cv2
import time
import datetime
from random import shuffle
from random import randrange
from tqdm import tqdm_notebook

  from ._conv import register_converters as _register_converters


In [2]:
dataset = Scene_Flow_disparity()

>> already download flyingthings3d__frames_cleanpass.tar of Scene Flow Datasets
>> already extracted flyingthings3d__frames_cleanpass of Scene Flow Datasets
>> already download driving__frames_cleanpass.tar of Scene Flow Datasets
>> already extracted driving__frames_cleanpass of Scene Flow Datasets
>> already download monkaa__frames_cleanpass.tar of Scene Flow Datasets
>> already extracted monkaa__frames_cleanpass of Scene Flow Datasets
>> already download flyingthings3d__frames_finalpass.tar of Scene Flow Datasets
>> already extracted flyingthings3d__frames_finalpass of Scene Flow Datasets
>> already download driving__frames_finalpass.tar of Scene Flow Datasets
>> already extracted driving__frames_finalpass of Scene Flow Datasets
>> already download monkaa__frames_finalpass.tar of Scene Flow Datasets
>> already extracted monkaa__frames_finalpass of Scene Flow Datasets
>> already download flyingthings3d__disparity.tar.bz2 of Scene Flow Datasets
>> already extracted flyingthings3d__disp

In [3]:
directories = [i for i in dataset.data_paths if ('left' in i) and ('.png' in i)]

QUEUE_LENGTH = 20
NUM_OF_THREAD = 12

epoch = 30
rate_of_paths = 1.0
train_batch_size = 2
learning_rate = 0.1**4
train_paths = directories[0:int(len(directories)*rate_of_paths)]

In [4]:
def DataGenerator(data_paths, batch_size = 1):
    if shuffle == True:
        shuffle(data_paths)
    train_left_inputBatch = []
    train_right_inputBatch = []
    train_left_groundTruthBatch = []
    for dir_ in data_paths:
        l_img_nparray = cv2.imread(dir_.replace("Scene Flow Datasets", "resized_SFD"), cv2.IMREAD_COLOR)
        r_img_nparray = cv2.imread(dir_.replace("left", "right").replace("Scene Flow Datasets", "resized_SFD"), cv2.IMREAD_COLOR)     
        
        if 'driving__frames_cleanpass' in dir_:
            tmp = dir_.replace("driving__frames_cleanpass", "driving__disparity")
            dir_ = tmp.replace("frames_cleanpass", "disparity")
        elif 'driving__frames_finalpass' in dir_:
            tmp = dir_.replace("driving__frames_finalpass", "driving__disparity")
            dir_ = tmp.replace("frames_finalpass", "disparity")
        elif 'flyingthings3d__frames_cleanpass' in dir_:
            tmp = dir_.replace("flyingthings3d__frames_cleanpass", "flyingthings3d__disparity")
            dir_ = tmp.replace("frames_cleanpass", "disparity")
        elif 'flyingthings3d__frames_finalpass' in dir_:
            tmp = dir_.replace("flyingthings3d__frames_finalpass", "flyingthings3d__disparity")
            dir_ = tmp.replace("frames_finalpass", "disparity")
        elif 'monkaa__frames_cleanpass' in dir_:
            tmp = dir_.replace("monkaa__frames_cleanpass", "monkaa__disparity")
            dir_ = tmp.replace("frames_cleanpass", "disparity")
        elif 'monkaa__frames_finalpass' in dir_:
            tmp = dir_.replace("monkaa__frames_finalpass", "monkaa__disparity")
            dir_ = tmp.replace("frames_finalpass", "disparity")
        
        ground_truth = np.load(dir_.replace(".png", ".npy").replace("Scene Flow Datasets", "resized_SFD"))
        start_x = randrange(0, 224)
        start_y = randrange(0, 128)
        train_left_inputBatch += [l_img_nparray[2 * start_y:2 * start_y + 256,2 * start_x:2 * start_x + 448,:]]
        train_right_inputBatch += [r_img_nparray[2 * start_y:2 * start_y + 256,2 * start_x:2 * start_x + 448,:]]
        train_left_groundTruthBatch += [ground_truth[start_y:start_y + 128,start_x:start_x + 224,np.newaxis]]
        
        if len(train_left_groundTruthBatch) == batch_size:
            batch_input_x_l = np.array(train_left_inputBatch).astype(np.float32)
            batch_input_x_r = np.array(train_right_inputBatch).astype(np.float32)
            batch_input_y = np.array(train_left_groundTruthBatch)
            train_left_inputBatch = []
            train_right_inputBatch = []
            train_left_groundTruthBatch = []

            yield (batch_input_x_l, batch_input_x_r, batch_input_y)

In [None]:
train_generator = DataGenerator(data_paths = train_paths, batch_size = train_batch_size)
print('training step : ' + str(int(len(train_paths) / train_batch_size)))

queue = tf.FIFOQueue(QUEUE_LENGTH, dtypes=[tf.float32, tf.float32, tf.float32], shapes=[[train_batch_size, 256, 448, 3], 
                                                                                        [train_batch_size, 256, 448, 3], 
                                                                                        [train_batch_size, 128, 224, 1]])
with tf.device('/cpu:0'):
    enqueue = queue.enqueue(next(train_generator))
    qr = tf.train.QueueRunner(queue, [enqueue]*NUM_OF_THREAD)
    sess = tf.Session()
    coord = tf.train.Coordinator()
    threads = qr.create_threads(sess, coord=coord, start=True)
    model = DispNet(img_height = 256,
                    img_width = 448,
                    img_depth = 3,
                    learning_rate = learning_rate, 
                    batch_size = train_batch_size,
                    mode = 'correlation_')
    X_L_batch, X_R_batch, y_batch = queue.dequeue()
with sess.as_default():
    train_op, loss, prediction, tensorboard = model.inference(X_L_batch, X_R_batch, y_batch)
    
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    saver = tf.train.Saver()
    saver.save(sess, './checkpoints/tf-DispNet/model_graph')
    #'''
    tensorboard_writer = tf.summary.FileWriter(
        './TensorBoard/tf-DispNet/trained_in_SceneFlowDataset/batch_size:{0}-{1}epoch'.format(train_batch_size, epoch), 
        sess.graph)
    #'''
    print()
    for i in range(0, epoch):
        for step in tqdm_notebook(range(0, (int)(len(train_paths) / train_batch_size))):
            train_start = time.clock()
            _, summary, loss_val = sess.run([train_op, tensorboard, loss])
            train_end = time.clock()
            if step % 1 == 0:
                tensorboard_writer.add_summary(summary, step)
                print('loss_value = %.3f' % loss_val + '    batch_step_time = ' + str(train_end-train_start), end = '\r')
        saver.save(sess,'./checkpoints/tf-DispNet/trained_in_SceneFlowDataset/batch_size:{0}-{1}epoch'.format(train_batch_size, i+1))

training step : 39824
input image resized by (height = 256, width = 448)



HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.174    batch_step_time = 0.18343300000015006448


HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.151    batch_step_time = 0.1884899999986373675


HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.016    batch_step_time = 0.181731000000581846


HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.012    batch_step_time = 0.18303600000217557


HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.010    batch_step_time = 0.186199000003398475


HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.009    batch_step_time = 0.18971199999941746


HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.063    batch_step_time = 0.182638000005681538


HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.039    batch_step_time = 0.18314300000201913


HBox(children=(IntProgress(value=0, max=39824), HTML(value='')))

loss_value = 0.006    batch_step_time = 0.18362699999852346