In [1]:
import cv2, glob, itertools, json
import numpy as np
from os.path import join
import os, tqdm
from pathlib import Path

import tensorflow as tf
from feature_extractor import MobileNet, Resnet, Vgg16
from modules import atrous_spatial_pyramid_pooling
from datetime import datetime


  data = yaml.load(f.read()) or {}


In [2]:
class UVExtractor(object):
    def __init__(self, base_architecture, training=True, ignore_label=0,batch_norm_momentum=0.9997, pre_trained_model=None, log_dir='./' ):
        tf.reset_default_graph() 
        
        self.is_training = tf.placeholder(tf.bool, None, name='is_training')
        self.ignore_label = ignore_label
        self.inputs_shape = [None, 60, 60, 3]
        self.labels_shape = [None, 60, 60, 2]
        self.training = training
        self.inputs = tf.placeholder(tf.float32, shape=self.inputs_shape, name='inputs')
        self.labels = tf.placeholder(tf.float32, shape=self.labels_shape, name='labels')

        self.target_height = tf.placeholder(tf.int32, None, name='target_image_height')
        self.target_width = tf.placeholder(tf.int32, None, name='target_image_width')

        self.weight_decay = tf.placeholder(tf.float32, None, name='weight_decay')
        self.regularizer = tf.contrib.layers.l2_regularizer(scale=self.weight_decay)
        self.batch_norm_momentum = batch_norm_momentum

        self.feature_map = self.backbone_initializer(base_architecture)
        if pre_trained_model:
            self.initialize_backbone_from_pretrained_weights(pre_trained_model)
        self.outputs, self.outputs_resized = self.model_initializer()

        self.learning_rate = tf.placeholder(tf.float32, None, name='learning_rate')
        self.loss = self.loss_initializer()
        self.optimizer = self.optimizer_initializer()

        # Initialize tensorflow session
        self.saver = tf.train.Saver()
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        if self.training:
            self.train_step = 0
            now = datetime.now()
            self.log_dir = os.path.join(log_dir, now.strftime('%Y%m%d-%H%M%S'))
            self.writer = tf.summary.FileWriter(self.log_dir, tf.get_default_graph())
            self.train_summaries, self.valid_summaries = self.summary()

    def backbone_initializer(self, base_architecture):

        with tf.variable_scope('backbone'):
            if base_architecture == 'vgg16':
                features = Vgg16(self.inputs, self.weight_decay, self.batch_norm_momentum)
            elif base_architecture.startswith('resnet'):
                n_layers = int(base_architecture.split('_')[-1])
                features = Resnet(n_layers, self.inputs, self.weight_decay, self.batch_norm_momentum, self.is_training)
            elif base_architecture.startswith('mobilenet'):
                depth_multiplier = float(base_architecture.split('_')[-1])
                features = MobileNet(depth_multiplier, self.inputs, self.weight_decay, self.batch_norm_momentum, self.is_training)
            else:
                raise ValueError('Unknown backbone architecture!')

        return features

    def model_initializer(self):

        pools = atrous_spatial_pyramid_pooling(inputs=self.feature_map, filters=256, regularizer=self.regularizer)
        logits = tf.layers.conv2d(inputs=pools, filters=2, kernel_size=(1, 1), name='logits')
#         outputs = tf.image.resize_bilinear(images=logits, size=(self.target_height, self.target_width), name='resized_outputs')
        outputs = logits
        outputs_resized = tf.image.resize_bilinear(images=logits, size=(self.target_height, self.target_width), name='resized_outputs')
        return outputs, outputs_resized

    def loss_initializer(self):
    
#         labels_linear = tf.reshape(tensor=self.labels, shape=[-1])
#         not_ignore_mask = tf.to_float(tf.not_equal(labels_linear, self.ignore_label))
        # The locations represented by indices in indices take value on_value, while all other locations take value off_value.
        # For example, ignore label 255 in VOC2012 dataset will be set to zero vector in onehot encoding (looks like the not ignore mask is not required)
        # onehot_labels = tf.one_hot(indices=labels_linear, depth=, on_value=1.0, off_value=0.0)
    
        # loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=tf.reshape(self.outputs, shape=[-1, self.num_classes]), weights=not_ignore_mask)
        
        loss = self.Smooth_l1_loss(self.labels, self.outputs_resized)
        
        return loss
    
    def optimizer_initializer(self):

        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)

        return optimizer

    def initialize_backbone_from_pretrained_weights(self, path_to_pretrained_weights):

        variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=['global_step'])
        valid_prefix = 'backbone/'
        tf.train.init_from_checkpoint(path_to_pretrained_weights, {v.name[len(valid_prefix):].split(':')[0]: v for v in variables_to_restore if v.name.startswith(valid_prefix)})
    
    def Smooth_l1_loss(self, labels,predictions,scope=tf.GraphKeys.LOSSES):
        with tf.variable_scope(scope):
            diff=tf.abs(labels-predictions)
            less_than_one=tf.cast(tf.less(diff,1.0),tf.float32)   #Bool to float32
            smooth_l1_loss=(less_than_one*0.5*diff**2)+(1.0-less_than_one)*(diff-0.5)#同上图公式
            return tf.reduce_mean(smooth_l1_loss)

    def close(self):

        if self.training:
            self.writer.close()
        self.sess.close()

In [3]:
dataFolder = r'F:\WorkingCopy2\2021_RandomlyDeformedGridMesh\Data'
numMarkers = 50



In [4]:
imgsPerMaker = []
uvsPerMaker = []

for iM in range(numMarkers):
    outImgDataFile = join(dataFolder, 'ImgMarker_' + str(iM).zfill(3) + '.npy')
    outUVDataFile = join(dataFolder, 'UVMarker_' + str(iM).zfill(3) + '.npy')

    img = np.load(outImgDataFile)
    uv = np.load(outUVDataFile)
    
    imgsPerMaker.append(img)
    uvsPerMaker.append(uv)

#     print(img.shape)
#     print(uv.shape)

#     for i in range(50):
#         iImg = np.random.randint(0, img.shape[0])
#         cv2.imshow('crop', img[iImg, :, :])
#         cv2.imshow('u', uv[iImg, :,:,0])
#         cv2.imshow('v', uv[iImg, :,:,1])
#         cv2.waitKey()

In [5]:
# uvExtractor = UVExtractor('resnet_50', training=False,)
# uvExtractor = UVExtractor('vgg16', training=False,)
uvExtractor = UVExtractor('resnet_101', training=False,  pre_trained_model=r'C:\Code\MyRepo\00_DeepLearning\DeepLab-V3\data\models\pretrained\resnet_101\resnet_v2_101.ckpt')
# uvExtractor = UVExtractor('mobilenet_1.0_224', training=False,)

In [7]:
imgsPerMaker[0].shape

(1000, 60, 60, 3)

In [8]:
fd = {uvExtractor.inputs:imgsPerMaker[0][:10, :,:,:],
      uvExtractor.is_training:False,
      uvExtractor.target_width:60,
      uvExtractor.target_height:60,
      uvExtractor.labels:uvsPerMaker[0][:10, ...]
     }
output = uvExtractor.sess.run(uvExtractor.outputs, feed_dict=fd)
print(output.shape)

(10, 4, 4, 2)


In [9]:
output_resized = uvExtractor.sess.run(uvExtractor.outputs_resized, feed_dict=fd)
print(output_resized.shape)

(10, 60, 60, 2)


In [30]:
output_resized[0,...]

array([[[-547.1964  ,  137.62387 ],
        [-538.4445  ,  134.9598  ],
        [-529.6926  ,  132.29572 ],
        ...,
        [-500.0861  ,   94.04289 ],
        [-500.0861  ,   94.04289 ],
        [-500.0861  ,   94.04289 ]],

       [[-555.53754 ,  128.42982 ],
        [-546.90063 ,  125.628235],
        [-538.26373 ,  122.82664 ],
        ...,
        [-501.40018 ,   91.9568  ],
        [-501.40018 ,   91.9568  ],
        [-501.40018 ,   91.9568  ]],

       [[-563.87866 ,  119.23578 ],
        [-555.35675 ,  116.29667 ],
        [-546.8348  ,  113.35756 ],
        ...,
        [-502.71426 ,   89.870705],
        [-502.71426 ,   89.870705],
        [-502.71426 ,   89.870705]],

       ...,

       [[-412.7161  ,  -81.54241 ],
        [-411.09613 ,  -83.90189 ],
        [-409.47617 ,  -86.26137 ],
        ...,
        [-441.0507  ,   20.698822],
        [-441.0507  ,   20.698822],
        [-441.0507  ,   20.698822]],

       [[-412.7161  ,  -81.54241 ],
        [-411.09613 ,  -83.

In [25]:
featureMap = uvExtractor.sess.run(uvExtractor.outputs, feed_dict=fd)

In [26]:
print(featureMap.shape)

(10, 4, 4, 2)


In [29]:
print(uvExtractor.sess.run(uvExtractor.loss, feed_dict=fd))

284.89804


In [11]:
fd = {uvExtractor.inputs:imgsPerMaker[0][:10, :,:,:],
      uvExtractor.is_training:True,
      uvExtractor.target_width:60,
      uvExtractor.target_height:60,
      uvExtractor.labels:uvsPerMaker[0][:10, ...],
      uvExtractor.weight_decay: 5e-4,
      uvExtractor.learning_rate: 1e-5
     }
_, outputs, train_loss,  = uvExtractor.sess.run([uvExtractor.optimizer, uvExtractor.outputs, uvExtractor.loss,], feed_dict=fd)

In [12]:
train_loss

0.4221286