In [1]:
import os
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
from __future__ import absolute_import, division, print_function
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
MEAN_RGB = np.array([123.68, 116.779, 103.939], dtype = np.float32)

In [4]:
data_dict = np.load('./PretrainedModel/vgg16.npy',encoding='latin1').item()

In [5]:
sorted([i for i in data_dict.keys()])

['conv1_1',
 'conv1_2',
 'conv2_1',
 'conv2_2',
 'conv3_1',
 'conv3_2',
 'conv3_3',
 'conv4_1',
 'conv4_2',
 'conv4_3',
 'conv5_1',
 'conv5_2',
 'conv5_3',
 'fc6',
 'fc7',
 'fc8']

In [6]:
layers = ['conv1_1',
         'conv1_2',
         'conv2_1',
         'conv2_2',
         'conv3_1',
         'conv3_2',
         'conv3_3',
         'conv4_1',
         'conv4_2',
         'conv4_3',
         'conv5_1',
         'conv5_2',
         'conv5_3',
         'fc6',
         'fc7',
         'fc8']

In [7]:
# 打印出每一层的shape信息
for k in layers:
    w = data_dict[k][0]
    b = data_dict[k][1]
    print('Layer {0}, \t\t Weight shape: {1}, \t\tBias shape {2}'.format(k, w.shape, b.shape))

Layer conv1_1, 		 Weight shape: (3, 3, 3, 64), 		Bias shape (64,)
Layer conv1_2, 		 Weight shape: (3, 3, 64, 64), 		Bias shape (64,)
Layer conv2_1, 		 Weight shape: (3, 3, 64, 128), 		Bias shape (128,)
Layer conv2_2, 		 Weight shape: (3, 3, 128, 128), 		Bias shape (128,)
Layer conv3_1, 		 Weight shape: (3, 3, 128, 256), 		Bias shape (256,)
Layer conv3_2, 		 Weight shape: (3, 3, 256, 256), 		Bias shape (256,)
Layer conv3_3, 		 Weight shape: (3, 3, 256, 256), 		Bias shape (256,)
Layer conv4_1, 		 Weight shape: (3, 3, 256, 512), 		Bias shape (512,)
Layer conv4_2, 		 Weight shape: (3, 3, 512, 512), 		Bias shape (512,)
Layer conv4_3, 		 Weight shape: (3, 3, 512, 512), 		Bias shape (512,)
Layer conv5_1, 		 Weight shape: (3, 3, 512, 512), 		Bias shape (512,)
Layer conv5_2, 		 Weight shape: (3, 3, 512, 512), 		Bias shape (512,)
Layer conv5_3, 		 Weight shape: (3, 3, 512, 512), 		Bias shape (512,)
Layer fc6, 		 Weight shape: (25088, 4096), 		Bias shape (4096,)
Layer fc7, 		 Weight shape: (4096,

In [8]:
global wd, num_classes
num_classes = 3
wd = 5e-4

In [9]:
np.array([4,3])*32*6

array([768, 576])

In [10]:
# resize scale = (576, 768)
# img shape = (768, 576)
resize_scale = (576, 768)

In [11]:
inputs = tf.placeholder(dtype=tf.float32, shape = (None, 768, 576, 4), name= 'inputs')
labels = tf.placeholder(dtype=tf.int32, shape = (None, 768, 576, 3), name = 'labels')
keeprob = tf.placeholder(dtype=tf.float32, shape = (), name = 'keeprob')

In [12]:
def get_conv_filter(name):
    # filter shape [filter_height, filter_width, in_channels, out_channels]
    weight = data_dict[name][0]
    if name == 'conv1_1':
        weight_mean = weight.mean( axis = 2)
        in_channels = weight.shape[2]
        tmp = [weight[:,:,i,:] for i in range(in_channels)]
        tmp.append(weight_mean)
        weight = np.stack( tmp, axis = 2 )
    #print(weight.shape)
    init = tf.constant_initializer(value=weight, dtype=tf.float32)
    shape = weight.shape
    var = tf.get_variable(name="filter", initializer=init, shape=shape)
    return var

In [13]:
def get_bias(name):
    bias_weights = data_dict[name][1]
    shape = bias_weights.shape
    if name == 'fc8':
        #tf.set_random_seed(1) 
        init = tf.contrib.layers.xavier_initializer(seed = 1)
        shape = [num_classes]
    else:
        init = tf.constant_initializer(value=bias_weights, dtype=tf.float32)
    #print('I am ok')
    var = tf.get_variable(name="biases", shape=shape, initializer=init)
    return var  

In [14]:
def conv_layer( x, name):
    with tf.variable_scope(name) as scope:
        filt = get_conv_filter(name)
        bias = get_bias(name)

        x = tf.nn.conv2d(x, filt, [1, 1, 1, 1], padding='SAME')
        x = tf.nn.bias_add( x, bias)
        x = tf.nn.relu(x)
    
        return x

In [15]:
def max_pool(x, name):
    return tf.nn.max_pool(x , ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)

In [16]:
def fc_layer( x, name, num_classes = num_classes, relu = True):
    with tf.variable_scope(name) as scope:
        # shape = x.get_shape().as_list()
        if name == 'fc6':
            shape = [7, 7, 512, 4096]
        elif name == 'fc7':
            shape = [1, 1, 4096, 4096]
        elif name == 'fc8':
            shape = [1, 1, 4096, num_classes]
        init = tf.contrib.layers.xavier_initializer( seed = 1, dtype = tf.float32)
        filt = tf.get_variable( name = 'weights', initializer=init, shape = shape )
        bias = get_bias(name)
        
        x = tf.nn.conv2d( x, filt, [ 1, 1, 1, 1], padding='SAME' )
        x = tf.nn.bias_add( x, bias)
        
        if relu:
            x = tf.nn.relu(x)
            
        return x
        

In [17]:
with tf.name_scope('encoder') as scope:
    
    ####conv1
    x = inputs
    x = conv_layer( x, 'conv1_1')
    x = conv_layer( x, 'conv1_2')
    pool1_out = max_pool(x, 'pool1')
    
    ####conv2
    x = conv_layer( pool1_out, 'conv2_1')
    x = conv_layer( x, 'conv2_2')
    pool2_out = max_pool(x, 'pool2')
    
    ####conv3
    x = conv_layer( pool2_out, 'conv3_1')
    x = conv_layer( x, 'conv3_2')
    x = conv_layer( x, 'conv3_3')
    pool3_out = max_pool( x, 'pool3')
    
    ####conv4
    x = conv_layer( pool3_out, 'conv4_1')
    x = conv_layer( x, 'conv4_2')
    x = conv_layer( x, 'conv4_3')
    pool4_out = max_pool( x, 'pool4')
    
    ####conv5
    x = conv_layer( pool4_out, 'conv5_1')
    x = conv_layer( x, 'conv5_2')
    x = conv_layer( x, 'conv5_3')
    pool5_out = max_pool( x, 'pool5')
    
    ####fc6
    x = fc_layer( pool5_out, 'fc6')
    x = tf.nn.dropout( x, keep_prob=keeprob)
    
    ####fc7
    fc7_out = fc_layer( x, 'fc7')
    #x = tf.nn.dropout( fc7_out, keep_prob=keeprob)
    # shape (None, 20, 20, 4096)
    
    ####fc8
    #x = fc_layer( x, 'fc8')
    #x = tf.argmax( x, axis = 3)
    
    
 

In [18]:
stddev_1x1 = 0.001
stddev_conv2d_trans = 0.01

In [19]:
l2_regularization_rate = tf.placeholder(dtype=tf.float32, shape=[], name='l2_regularization_rate')

In [20]:
with tf.name_scope('decoder') as scope:
    pool3_out_scaled = tf.multiply( pool3_out, 0.0001, name = 'pool3_out_scaled')
    
    pool3_1x1 = tf.layers.conv2d( inputs = pool3_out_scaled,
                                  filters = num_classes,
                                  kernel_size = ( 1, 1),
                                  strides = (1, 1), 
                                  padding = 'same',
                                  kernel_initializer = tf.truncated_normal_initializer( stddev=stddev_1x1),
                                  kernel_regularizer = tf.contrib.layers.l2_regularizer(l2_regularization_rate),
                                  name = 'pool3_1x1')
    
    pool4_out_scaled = tf.multiply( pool4_out, 0.01, name = 'pool4_out_scaled')
    
    pool4_1x1 = tf.layers.conv2d( inputs = pool4_out_scaled,
                                  filters = num_classes,
                                  kernel_size = (1,1),
                                  strides = (1,1),
                                  padding = 'same',
                                  kernel_initializer = tf.truncated_normal_initializer( stddev=stddev_1x1),
                                  kernel_regularizer = tf.contrib.layers.l2_regularizer(l2_regularization_rate),
                                  name = 'pool4_1x1')
    
    fc7_1x1 = tf.layers.conv2d( inputs = fc7_out,
                                filters = num_classes,
                                kernel_size = (1,1),
                                strides = (1,1),
                                padding = 'same',
                                kernel_initializer = tf.truncated_normal_initializer( stddev= stddev_1x1),
                                kernel_regularizer = tf.contrib.layers.l2_regularizer( l2_regularization_rate),
                                name = 'fc7_1x1')
    
    fc7_conv2d_trans = tf.layers.conv2d_transpose(inputs=fc7_1x1,
                                                  filters=num_classes,
                                                  kernel_size=(4, 4),
                                                  strides=(2, 2),
                                                  padding='same',
                                                  kernel_initializer=tf.truncated_normal_initializer(stddev=stddev_conv2d_trans),
                                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_regularization_rate),
                                                  name='fc7_conv2d_trans')
    
    add_fc7_pool4 = tf.add(fc7_conv2d_trans, pool4_1x1, name='add_fc7_pool4')
    
    fc7_pool4_conv2d_trans = tf.layers.conv2d_transpose(inputs=add_fc7_pool4,
                                                        filters=num_classes,
                                                        kernel_size=(4, 4),
                                                        strides=(2, 2),
                                                        padding='same',
                                                        kernel_initializer=tf.truncated_normal_initializer(stddev=stddev_conv2d_trans),
                                                        kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_regularization_rate),
                                                        name='fc7_pool4_conv2d_trans')
    
    add_fc7_pool4_pool3 = tf.add(fc7_pool4_conv2d_trans, pool3_1x1, name='add_fc7_pool4_pool3')
    
    fc7_pool4_pool3_conv2d_trans = tf.layers.conv2d_transpose(inputs=add_fc7_pool4_pool3,
                                                              filters=num_classes,
                                                              kernel_size=(16, 16),
                                                              strides=(8, 8),
                                                              padding='same',
                                                              kernel_initializer=tf.truncated_normal_initializer(stddev=stddev_conv2d_trans),
                                                              kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_regularization_rate),
                                                              name='fc7_pool4_pool3_conv2d_trans')
    
    fcn8s_output = tf.identity(fc7_pool4_pool3_conv2d_trans, name='fcn8s_output')

In [21]:
fcn8s_output.shape

TensorShape([Dimension(None), Dimension(768), Dimension(576), Dimension(3)])

In [22]:
from data_generator.batch_generator_v3 import BatchGenerator

In [23]:
img_dir = './dataset/training/images/output/filterImg/'
trimap_dir = './dataset/training/images/output/NewTrimap/'
priorInfo_dir_lsit = ['./dataset/training/images/output/shapeMask/']
alpha_dir = './dataset/training/images/output/filterAlpha/'

In [25]:
"""
bg = BatchGenerator(img_dirs = img_dir,
                    img_file_format = 'png',
                    prior_info_dir_list = priorInfo_dir_lsit,
                    ground_truth_dir = alpha_dir,
                    num_classes = 3)
"""

"\nbg = BatchGenerator(img_dirs = img_dir,\n                    img_file_format = 'png',\n                    prior_info_dir_list = priorInfo_dir_lsit,\n                    ground_truth_dir = alpha_dir,\n                    num_classes = 3)\n"

In [26]:
"""
valid_inp, valid_gt = bg.genValidata(valid_size=200, 
                                       convert_labels_to_one_hot=True)
"""

'\nvalid_inp, valid_gt = bg.genValidata(valid_size=200, \n                                       convert_labels_to_one_hot=True)\n'

In [27]:
#print('valid input shape:',valid_inp.shape)
#print('valid ground truth shape', valid_gt.shape)

In [28]:
batch_size = 4

In [29]:
from closed_form_matting import compute_laplacian
from scipy import sparse

In [30]:
import logging
from logging import handlers

class Logger(object):
    level_relations = {
        'debug':logging.DEBUG,
        'info':logging.INFO,
        'warning':logging.WARNING,
        'error':logging.ERROR,
        'crit':logging.CRITICAL
    }#日志级别关系映射

    def __init__(self,filename,level='info',when='D',backCount=3,fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):
        self.logger = logging.getLogger(filename)
        format_str = logging.Formatter(fmt)#设置日志格式
        self.logger.setLevel(self.level_relations.get(level))#设置日志级别
        sh = logging.StreamHandler()#往屏幕上输出
        sh.setFormatter(format_str) #设置屏幕上显示的格式
        th = handlers.TimedRotatingFileHandler(filename=filename,when=when,backupCount=backCount,encoding='utf-8')#往文件里写入#指定间隔时间自动生成文件的处理器
        #实例化TimedRotatingFileHandler
        #interval是时间间隔，backupCount是备份文件的个数，如果超过这个个数，就会自动删除，when是间隔的时间单位，单位有以下几种：
        # S 秒
        # M 分
        # H 小时、
        # D 天、
        # W 每星期（interval==0时代表星期一）
        # midnight 每天凌晨
        th.setFormatter(format_str)#设置文件里写入的格式
        self.logger.addHandler(sh) #把对象加到logger里
        self.logger.addHandler(th)

In [31]:
log = Logger('./LOG/Time_Analysis.log',level='debug')

In [32]:
def matting_laplacians_impl(xs, prob_bs, prob_fs):
    #MEAN_BGR = np.array([104.00698793, 116.66876762, 122.67891434])
    start_time = time()
    def convert_sparse_matrix(X):
        coo = X.tocoo()
        indices = np.mat([coo.row, coo.col]).transpose()
        return indices, coo.data, coo.shape
    
    xs = xs.copy()
    prob_bs = prob_bs.copy()
    prob_fs = prob_fs.copy()
    MEAN_RGB = np.array([123.68, 116.779, 103.939], dtype = np.float32)
    laplacians = list()
    ret0 = list() # indices
    ret1 = list() # value
    ret2 = list() # dense_shape
    for x, prob_b, prob_f in zip(xs, prob_bs, prob_fs):
        img = x[:,:,:3]
        img = (img + MEAN_RGB) / 255.0   # [-127:127] -> [0:1]

        # Constant map  (Remove extra channel of (1, h, w))
        consts_map = (0.9 < prob_b) | (0.9 < prob_f)
        
        laplacian = compute_laplacian(img, ~consts_map)
        Xindices, Xdata, Xshape = convert_sparse_matrix(laplacian)
        ret0.append(Xindices)
        ret1.append(Xdata)
        ret2.append(Xshape)
        #laplacians.append(laplacian)
    out0 = np.asarray(ret0)
    out1 = np.asarray(ret1)
    out2 = np.asarray(ret2)
    cost_time = time() - start_time
    """
    log.logger.debug('indices: {0} dtype:{1}'.format( (out0.shape), out0.dtype) )
    log.logger.debug('values: {0} dtype:{1}'.format( (out1.shape), out1.dtype) )
    log.logger.debug('dense_shape: {0} dtype:{1}'.format( (out2.shape), out2.dtype) )
    """
    #log.logger.debug('Matting Laplacian Cost {0}'.format(cost_time))
    return out0, out1, out2

In [33]:
def _solve(A,b):
    solution = sparse.linalg.spsolve(A.astype(np.float64), b.astype(np.float64))
    return solution

In [34]:
def _diag(x):
    return sparse.diags(x.reshape(-1))

In [35]:
tf.split(fcn8s_output, [1,1,1], 3)

[<tf.Tensor 'split:0' shape=(?, 768, 576, 1) dtype=float32>,
 <tf.Tensor 'split:1' shape=(?, 768, 576, 1) dtype=float32>,
 <tf.Tensor 'split:2' shape=(?, 768, 576, 1) dtype=float32>]

In [36]:
def matting_laplacians( inputs, prob_bs, prob_fs):
    return tf.py_func(matting_laplacians_impl, [inputs, prob_bs, prob_fs], [tf.int32, tf.float64, tf.int64] , stateful=True, name = None)

In [37]:
lambda_ = tf.get_variable(name = 'lambdal_', initializer=tf.constant(1, tf.float32))

In [38]:
def forward_cpu( prob_f, prob_b, lambda_, x_indices, x_values, x_shapes):
    
    start_time = time()
    
    def convert_to_laplacians(x_indices, x_values, x_shapes):
        laplacians = list()
        for x_indice, x_value, x_shape in zip(x_indices, x_values, x_shapes):
            laplacian = sparse.coo_matrix((x_value, (x_indice[:,0], x_indice[:,1])), shape=x_shape )
            laplacians.append(laplacian)
        return np.asarray(laplacians)
    
    img_shape = prob_b[0].shape
    nm = prob_b[0].shape[0]
    lambda_f = float(lambda_)
    laplacians = convert_to_laplacians(x_indices, x_values, x_shapes)
    cost_time = time() - start_time
    log.logger.debug('recontruct laplacian cost: {0}'.format(cost_time))
    
    ret = []
    for b, f, lablacian in zip(prob_b, prob_f, laplacians):
        BF_diag = _diag( b + f)
        F = f.reshape(-1)
        
        
        D = sparse.csc_matrix(BF_diag * lambda_f + lablacian)
        alpha = _solve( D, F*lambda_f)
        
        
        ret.append(alpha.reshape(img_shape))
    cost_time = time() - start_time
    log.logger.debug('forward cpu cost: {0}'.format(cost_time))
    return np.array(ret, dtype=np.float32)

In [39]:
def backward_cpu(op, gy):
    prob_b = op.inputs[0]
    prob_f = op.inputs[1]
    #laplacians = op.inputs[2]
    lambda_ = op.inputs[2]
    
    x_indices = op.inputs[3]
    x_values = op.inputs[4]
    x_shapes = op.inputs[5]
    
    alpha = op.outputs[0]
    
    def convert_to_laplacians(x_indices, x_values, x_shapes):
        laplacians = list()
        for x_indice, x_value, x_shape in zip(x_indices, x_values, x_shapes):
            laplacian = sparse.coo_matrix((x_value, (x_indice[:,0], x_indice[:,1])), shape=x_shape )
            laplacians.append(laplacian)
        return np.asarray(laplacians)
    laplacians = convert_to_laplacians(x_indices, x_values, x_shapes)
    
    
    mg_shape = prob_b[0].shape
    nm = prob_b[0].size
    lambda_f = float(lambda_)

    ret0, ret1, ret2, ret3 = [], [], [],[]

    for b, f, lap, alpha, gy0 in zip(prob_b, prob_f, laps, alphas, gy):
        BF_diag = _diag(b+f)
        F = f.reshape(-1)
        gY = gy0.reshape(-1)

        D = sparse.csc_matrix(BF_diag * lambda_f + lap)
        D_inv_F_lambda = alpha

        gb = _solve(D, -lambda_f * (_diag(D_inv_F_lambda)* gY))
        ret0.append(gb.reshape(img_shape))
        gf = gb + _solve(D, lambda_f* gY)
        ret1.append(gf.reshape(img_shape))
        gl = _solve(D, -BF_diag * D_inv_F_lambda)
        gl += D_inv_F_lambda / lambda_f
        gl = gl.dot(gY)
        ret3.append(gl)
        
    ret = []
    ret.append( np.asarray(ret0, dtype=np.float32) )
    ret.append(  np.asarray(ret1, dtype=np.float32) )
    ret3 = np.sum(ret3).reshape(1)
    ret.append( np.asarray(ret3, dtype=np.float32) )
    return ret
    

In [40]:
def py_func(func, inp, Tout, graph, stateful = True, name = None, grad = None):
    rnd_name = 'PyFuncGrad'+ str(np.random.randint(0, 1E+8))
    tf.RegisterGradient(rnd_name)(grad)
    with graph.gradient_override_map({"matting_layer": rnd_name}):
        return tf.py_func(func, inp, Tout, stateful=stateful, name=name)

In [41]:
prob_fs, prob_bs, prob_us = tf.split(fcn8s_output, [1,1,1], 3)

In [42]:
prob_fs.shape

TensorShape([Dimension(None), Dimension(768), Dimension(576), Dimension(1)])

In [43]:
prob_bs.shape

TensorShape([Dimension(None), Dimension(768), Dimension(576), Dimension(1)])

In [44]:
inputs.shape

TensorShape([Dimension(None), Dimension(768), Dimension(576), Dimension(4)])

In [45]:
from time import time

In [46]:
def matting_func( fcn8s_output, inputs, lambda_, name = None):
    with tf.name_scope(name, "matting_layer", [fcn8s_output]) as name:
        prob_fs, prob_bs, prob_us = tf.split(fcn8s_output, [1,1,1], 3)
        laplacians = matting_laplacians(inputs, prob_bs, prob_fs)
        return py_func(forward_cpu, 
                       [prob_fs, prob_bs, lambda_, laplacians[0], laplacians[1], laplacians[2]], 
                       [tf.float32], 
                       graph = tf.get_default_graph(), 
                       name = name, 
                       grad = backward_cpu)

In [47]:
with tf.name_scope('matting') as scope:
    alpha_ = matting_func(fcn8s_output, inputs, lambda_)
    

In [None]:
batch_size = 16

In [61]:
def learning_rate_schedule(step):
    if step <= 5000: return 0.0001
    elif 5000 < step <= 7000: return 0.00001
    elif 7000 < step <= 8000: return 0.000003
    else: return 0.000001

In [62]:
"""
default hyper params
++++++++++++++++++++
learning_rate = 1e-4
momentum = 0.99
weight_decay = 0.005
++++++++++++++++++++
"""
with tf.name_scope('trimap_pretrain') as scope:
    global_step = tf.Variable(0, trainable=False, name='global_step')
    trimap_approximation_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=fcn8s_output), 
        name='trimap_approximation_loss')
    trimap_learning_rate = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate')
    trimap_regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    trimap_total_loss = tf.add(trimap_approximation_loss, trimap_regularization_losses, name='trimap_total_loss')
    #lr = tf.train.exponential_decay( 1e-2, global_step=global_step, decay_steps=1000,decay_rate=0.05)
    #tf.train.exponential_decay( 1e-3, global_step=step, decay_steps=1000,decay_rate=0.05)
    trimap_optimizer = tf.train.AdamOptimizer(learning_rate=trimap_learning_rate, name='adam_optimizer')
    trimap_train_op = trimap_optimizer.minimize(trimap_total_loss, global_step=global_step, name='trimap_train_op')

In [63]:
with tf.name_scope('trimap_predictor'):
    softmax_output = tf.nn.softmax(fcn8s_output, name='trimap_softmax_output')
    predictions_argmax = tf.argmax(softmax_output, axis=-1, name='predictions_argmax', output_type=tf.int64)
    labels_argmax = tf.argmax(labels, axis=-1, name='trimap_labels_argmax', output_type=tf.int64)
    acc_value, acc_update_op = tf.metrics.accuracy(labels=labels_argmax, predictions= predictions_argmax)

In [53]:
bg = BatchGenerator(img_dirs = img_dir,
                    img_file_format = 'png',
                    prior_info_dir_list = priorInfo_dir_lsit,
                    ground_truth_dir = trimap_dir,
                    num_classes = 3)

In [54]:
valid_inp, valid_gt = bg.genValidata(valid_size=100, 
                                       convert_labels_to_one_hot=True)

100%|██████████| 100/100 [00:04<00:00, 23.85it/s]


In [55]:
batch_size = 8

In [None]:
saver = tf.train.Saver()
model_path = './TrimapPretrainedModel/'
model_name = 'fcn8smatting_pretrained.cpkt'
max_iter = 10000
pixel_match = -1
with tf.Session() as sess:
    batchG = bg.generate(batch_size = batch_size,
                       convert_labels_to_one_hot = True)
    init = tf.group([tf.global_variables_initializer(), tf.local_variables_initializer()])
    sess.run(init)
    cnt = 0
    while pixel_match < 0.97:
        c_inp, c_gt = next(batchG)
        step = sess.run(global_step)
        lr = learning_rate_schedule(step)
        if not (( c_inp.shape == ( batch_size, 768, 576, 4) ) and (c_gt.shape == ( batch_size, 768, 576, 3)) ):
            print('Shape incorrect, c_inp shape:{0}  c_gt shape:{1}'.format(c_inp.shape, c_gt.shape))
            continue
        sess.run(trimap_train_op, feed_dict={inputs: c_inp, labels: c_gt, trimap_learning_rate:lr, keeprob: 0.5, l2_regularization_rate:0.0})
        if cnt%20 == 0:
            t_losses = []
            t_acc = []
            t_pm = []
            for i in range(valid_inp.shape[0]):
                slice_vinp = np.expand_dims(valid_inp[i,:,:,:],0)
                slice_vgt = np.expand_dims( valid_gt[i,:,:,:],0)
                loss, acc_v, pd_argmax, lb_argmax = sess.run([trimap_total_loss,acc_value, predictions_argmax, labels_argmax], feed_dict={inputs: slice_vinp, labels: slice_vgt, keeprob: 1.0, l2_regularization_rate:0.0})
                t_losses.append(np.mean(loss))
                t_acc.append(acc_v)
                t_pm.append(np.sum(pd_argmax == lb_argmax)/pd_argmax.size)
            #t_losses, acc_v, pd_argmax, lb_argmax = sess.run([total_loss,acc_value, predictions_argmax, labels_argmax], feed_dict={inputs: valid_inp, labels: valid_gt, keeprob: 1.0,l2_regularization_rate:0.0})
            #acc_v = sess.run(acc_value)
            pm = np.mean(t_pm)
            if pm > pixel_match:
                pixel_match = pm
            print('Step {0:00005} \t Learning_rate {1} \t Loss {2} \t pixel_match {3}'.format(step, lr,np.mean(t_losses), pm))
        cnt += 1
        if (cnt%1000 == 0) or (pm > 0.9):
            saver.save(sess, model_path + 'fcn8smatting_pretrained_{0}_{1}.ckpt'.format(cnt, pm))

Step 00000 	 Learning_rate 0.0001 	 Loss 1.0818300247192383 	 pixel_match 0.5301993815104167
Step 00020 	 Learning_rate 0.0001 	 Loss 1.038285493850708 	 pixel_match 0.4139742702907986
Step 00040 	 Learning_rate 0.0001 	 Loss 0.7472904324531555 	 pixel_match 0.6259330692997684
Step 00060 	 Learning_rate 0.0001 	 Loss 0.5512394905090332 	 pixel_match 0.7563595015914353
Step 00080 	 Learning_rate 0.0001 	 Loss 0.6423148512840271 	 pixel_match 0.7563888662832753
Step 00100 	 Learning_rate 0.0001 	 Loss 0.4626580774784088 	 pixel_match 0.8313560881438078
Step 00120 	 Learning_rate 0.0001 	 Loss 0.41962334513664246 	 pixel_match 0.8507206443504053
Step 00140 	 Learning_rate 0.0001 	 Loss 0.3935807943344116 	 pixel_match 0.8524341046368634
Step 00160 	 Learning_rate 0.0001 	 Loss 0.3900660276412964 	 pixel_match 0.8581215639467592
Step 00180 	 Learning_rate 0.0001 	 Loss 0.4077347218990326 	 pixel_match 0.8585902913411458
Step 00200 	 Learning_rate 0.0001 	 Loss 0.3611046075820923 	 pixel_ma

In [None]:
print('{0} loops with batch_size {1} and loss is {2}, we obtained pixel_match: {3}'.format(cnt, batch_size, np.mean(t_losses), pm))