diff --git a/README_files/loss.png b/README_files/loss.png new file mode 100644 index 0000000..f89420a Binary files /dev/null and b/README_files/loss.png differ diff --git a/README_files/model.png b/README_files/model.png new file mode 100644 index 0000000..706f1ff Binary files /dev/null and b/README_files/model.png differ diff --git a/extract_feature.py b/extract_feature.py new file mode 100644 index 0000000..df2c3d3 --- /dev/null +++ b/extract_feature.py @@ -0,0 +1,128 @@ +import os +import sys +import argparse +import numpy as np +import vgg19 +import tensorflow as tf +import skimage.io +import skimage.transform + +def parse_args(): + parser = argparse.ArgumentParser(description='convert data to feature') + parser.add_argument('--source_path', dest='source_path', help='path of the data', + default='.', type=str) + parser.add_argument('--target_path', dest='target_path', help='path to store feature map', + default='./feature_fc', type=str) + parser.add_argument('--batch_size', dest='batch_size', help='batch_size', + default='32', type=int) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + args = parser.parse_args() + return args + +global args +args = parse_args() + +class Data: + def __init__(self): + self.frame_path = os.path.join(args.source_path, 'frames') + self.label_path = os.path.join(args.source_path, 'labels') + self.target_path = args.target_path + self.batch_size = args.batch_size + + if not os.path.exists(self.target_path): + os.makedirs(self.target_path) + + + def load_data(self): + batch_data = [] + batch_path = [] + total = 0 + num = 0 + for dirpath, dirnames, filenames in os.walk(self.frame_path): + for f in filenames: + total += 1 + for dirpath, dirnames, filenames in os.walk(self.frame_path): + for f in filenames: + num += 1 + filepath = os.path.join(dirpath, f) + img = self.load_image(filepath) + batch_data.append(img) + batch_path.append([dirpath, f]) + if len(batch_path) == self.batch_size: + batch_data = np.asarray(batch_data) + batch_path = np.asarray(batch_path) + feature_map_batch = self.extract_feature(batch_data, batch_size=self.batch_size) + for i in range(self.batch_size): + path_ = batch_path[i][0].split('/') + for l in path_: + if l.find('train') != -1: + target_dir_path = '/train/' + break + elif l.find('test') != -1: + target_dir_path = '/test/' + break + if not os.path.exists(self.target_path + target_dir_path): + os.makedirs(self.target_path + target_dir_path) + f_name = batch_path[i][0].replace(self.frame_path + target_dir_path, '') + f_name = f_name.replace('/', '_') + '_' + batch_path[i][1].replace('.png', '.npy') + target_dir_path = self.target_path + target_dir_path + target_path = os.path.join(target_dir_path, f_name) + print('saving feature:{0}' .format(target_path)) + np.save(target_path, feature_map_batch[i]) + batch_data = [] + batch_path = [] + elif num == total: + batch_data = np.asarray(batch_data) + batch_path = np.asarray(batch_path) + feature_map_batch = self.extract_feature(batch_data, batch_size=len(batch_data)) + for i in range(self.batch_size): + path_ = batch_path[i][0].split('/') + for l in path_: + if l.find('train') != -1: + target_dir_path = '/train/' + break + elif l.find('test') != -1: + target_dir_path = '/test/' + break + if not os.path.exists(self.target_path + target_dir_path): + os.makedirs(self.target_path + target_dir_path) + f_name = batch_path[i][0].replace(self.frame_path + target_dir_path, '') + f_name = f_name.replace('/', '_') + '_' + batch_path[i][1].replace('.png', '.npy') + target_dir_path = self.target_path + target_dir_path + target_path = os.path.join(target_dir_path, f_name) + print('saving feature:{0}' .format(target_path)) + np.save(target_path, feature_map_batch[i]) + + def load_image(self, path): + print('loading image:{0}' .format(path)) + img = skimage.io.imread(path) + img = img / 255.0 + assert (0 <= img).all() and (img <= 1.0).all() + short_edge = min(img.shape[:2]) + yy = int((img.shape[0] - short_edge) / 2) + xx = int((img.shape[1] - short_edge) / 2) + crop_img = img[yy: yy + short_edge, xx: xx + short_edge] + resized_img = skimage.transform.resize(crop_img, (224, 224)) + return resized_img + + + def extract_feature(self, batch, batch_size): + with tf.device('/gpu:0'): + with tf.Graph().as_default(): + with tf.Session() as sess: + images = tf.placeholder("float", [batch_size, 224, 224, 3]) + vgg = vgg19.Vgg19() + with tf.name_scope("content_vgg"): + vgg.build(images) + feature_map_batch = sess.run(vgg.fc6, feed_dict={images: batch}) + return feature_map_batch + + +if __name__ == "__main__": + print args + data = Data() + data.load_data() diff --git a/obj_fc_align.py b/obj_fc_align.py new file mode 100644 index 0000000..22da63c --- /dev/null +++ b/obj_fc_align.py @@ -0,0 +1,308 @@ +import tensorflow as tf +import os +import random +import time +import numpy as np +import argparse +import math +import sys + +def parse_args(): + parser = argparse.ArgumentParser(description='convert data to feature') + parser.add_argument('--source_path_feature', dest='source_path_feature', help='path of the feature map', + default='./feature_fc', type=str) + parser.add_argument('--source_path_label', dest='source_path_label', help='path of the label', + default='./labels', type=str) + parser.add_argument('--batch_size', dest='batch_size', help='batch_size', + default='64', type=int) + parser.add_argument('--mode', dest='mode', help='train/test', + default='train', type=str) + parser.add_argument('--model_path', dest='model_path', help='path of the model', + default='./model', type=str) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + args = parser.parse_args() + return args + +global args +args = parse_args() + +class Data: + def __init__(self, mode): + self.feature_path = args.source_path_feature + self.label_path = args.source_path_label + self.batch_size = args.batch_size + self.feature_list_train_ = [] + self.feature_list_validation_ = [] + self.feature_list_test = [] + self.mode = mode + #split validation set from training set (training : validation = 19 : 1) + self.feature_list = os.listdir(os.path.join(self.feature_path, self.mode)) + self.feature_list = [x for x in self.feature_list if x.find('Lhand') != -1] + if mode == 'train': + num = int(len(self.feature_list) * 0.95) + self.train_data_len = num + self.train_data_len = len(self.feature_list) + self.validation_data_len = len(self.feature_list) - num + self.feature_list_validation = self.feature_list[num:] + self.feature_list_train = self.feature_list[:num] + random.seed(time.time()) + random.shuffle(self.feature_list_train) + elif mode == 'test': + self.test_data_len = len(self.feature_list) + + def load_data(self, mode): + if mode == 'train': + if len(self.feature_list_train_) < self.batch_size: + self.feature_list_train_ = self.feature_list_train + random.seed(time.time()) + random.shuffle(self.feature_list_train_) + if len(self.feature_list_train_) >= self.batch_size: + batch_data_L = self.feature_list_train_[:self.batch_size] + self.feature_list_train_ = self.feature_list_train_[self.batch_size:] + elif mode == 'validation': + if len(self.feature_list_validation_) < self.batch_size: + self.feature_list_validation_ = self.feature_list_validation + if len(self.feature_list_validation_) >= self.batch_size: + batch_data_L = self.feature_list_validation_[:self.batch_size] + self.feature_list_validation_ = self.feature_list_validation_[self.batch_size:] + elif mode == 'test': + if len(self.feature_list_test) < self.batch_size: + self.feature_list_test = self.feature_list + if len(self.feature_list_test) >= self.batch_size: + batch_data_L = self.feature_list_test[:self.batch_size] + self.feature_list_test = self.feature_list_test[self.batch_size:] + + batch_data_head = [] + batch_label = [] + batch_data_R = [] + + for path in batch_data_L: + batch_data_head.append(path.replace('Lhand', 'head')) + batch_data_R.append(path.replace('Lhand', 'Rhand')) + for path_list in [batch_data_L, batch_data_R]: + for path in path_list: + path = path.split('_') + path_ = os.path.join(self.label_path, path[0]) + number = int(path[3].split('Image')[1].split('.npy')[0]) - 1 + if self.mode == 'test': + if path[0] == 'office' or path[0] == 'house': + path[1] = str(int(path[1]) + 3) #test office/house 1.2.3 => 4.5.6 + elif path[0] == 'lab': + path[1] = str(int(path[1]) + 4) #test lab 1.2.3.4 => 5.6.7.8 + if path[2].find('Rhand') != -1: + fa = np.load(os.path.join(path_, 'FA_right' + path[1] + '.npy')) #free/active + fa = int(fa[number]) + ges = np.load(os.path.join(path_, 'ges_right' + path[1] + '.npy')) #gesture + ges = int(ges[number]) + obj = np.load(os.path.join(path_, 'obj_right' + path[1] + '.npy')) #object + obj = int(obj[number]) + batch_label.append([fa, ges, obj]) + elif path[2].find('Lhand') != -1: + fa = np.load(os.path.join(path_, 'FA_left' + path[1] + '.npy')) + fa = int(fa[number]) + ges = np.load(os.path.join(path_, 'ges_left' + path[1] + '.npy')) + ges = int(ges[number]) + obj = np.load(os.path.join(path_, 'obj_left' + path[1] + '.npy')) + obj = int(obj[number]) + batch_label.append([fa, ges, obj]) + batch_label = np.asarray(batch_label) + batch_data_L = [np.load(os.path.join(self.feature_path, self.mode, x)) for x in batch_data_L] + batch_data_R = [np.load(os.path.join(self.feature_path, self.mode, x)) for x in batch_data_R] + batch_data_head = [np.load(os.path.join(self.feature_path, self.mode, x)) for x in batch_data_head] + batch_data_L = np.asarray(batch_data_L) + batch_data_R = np.asarray(batch_data_R) + batch_data_head = np.asarray(batch_data_head) + + return batch_data_L, batch_label, batch_data_head, batch_data_R + + def get_data_length(self): + if self.mode == 'train': + return self.train_data_len, self.validation_data_len + elif self.mode == 'test': + return self.test_data_len + +class Train: + def __init__(self): + self.batch_size = args.batch_size + self.model_path = args.model_path + self.epoch = 20 + self.start_learning_rate = 0.0001 + self.data_train = Data('train') + self.data_test = Data('test') + + def train(self): + model = Model() + best_accuracy =0.0 + with tf.device('/gpu:0'): + with tf.Graph().as_default(): + feature_Lhand, feature_Rhand, feature_head, label_fa, label_ges, label_obj, logits_fa, logits_ges, logits_obj, label_onehot_fa, label_onehot_ges, label_onehot_obj,_ = model.build(keep_prob=0.5) + cross_entropy_fa = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label_onehot_fa, logits=logits_fa, name='softmax_loss_fa')) + tf.add_to_collection('losses_fa', cross_entropy_fa) + cross_entropy_ges = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label_onehot_ges, logits=logits_ges, name='softmax_loss_ges')) + tf.add_to_collection('losses_ges', cross_entropy_ges) + cross_entropy_obj = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label_onehot_obj, logits=logits_obj, name='softmax_loss_obj')) + tf.add_to_collection('losses_obj', cross_entropy_obj) + loss_fa = tf.reduce_sum(tf.get_collection('losses_fa')) + loss_ges = tf.reduce_sum(tf.get_collection('losses_ges')) + loss_obj = tf.reduce_sum(tf.get_collection('losses_obj')) + global_step = tf.Variable(0, trainable=False) + learning_rate = self.start_learning_rate + optimizer_1 = tf.train.AdamOptimizer(learning_rate=learning_rate) + optimizer_2 = tf.train.AdamOptimizer(learning_rate=learning_rate) + optimizer_3 = tf.train.AdamOptimizer(learning_rate=learning_rate) + train_optimizer_fa = optimizer_1.minimize(loss_fa) + train_optimizer_ges = optimizer_2.minimize(loss_ges) + train_optimizer_obj = optimizer_3.minimize(loss_obj, global_step=global_step) + tf.summary.scalar('loss_fa', loss_fa) + tf.summary.scalar('loss_ges', loss_ges) + tf.summary.scalar('loss_obj', loss_obj) + summary = tf.summary.merge_all() + with tf.Session() as sess: + if not os.path.exists(self.model_path): + os.makedirs(self.model_path) + summary_writer = tf.summary.FileWriter('./graph', sess.graph) + sess.run(tf.global_variables_initializer()) + with tf.device('/cpu:0'): + saver = tf.train.Saver(max_to_keep=5) + if len(os.listdir(self.model_path)) > 0: + saver.restore(sess, tf.train.latest_checkpoint(self.model_path)) + print('Start training ......') + data_len_train, data_len_validation = self.data_train.get_data_length() + for i in range(int(data_len_train * self.epoch / self.batch_size)): + features_Lhand, labels, features_head, features_Rhand = self.data_train.load_data('train') + labels_fa = labels[:,0] + labels_ges = labels[:,1] + labels_obj = labels[:,2] + _, _, _, _loss_fa, _loss_ges, _loss_obj, step, summary_ = sess.run([train_optimizer_fa, train_optimizer_ges, train_optimizer_obj, loss_fa, loss_ges, loss_obj, global_step, summary], feed_dict={feature_Lhand:features_Lhand, feature_Rhand:features_Rhand, feature_head:features_head, label_fa:labels_fa, label_ges:labels_ges, label_obj:labels_obj}) + summary_writer.add_summary(summary_, global_step=step) + if i % int(data_len_train/self.batch_size) == 0: + print 'epoch {0}' .format(math.floor(i*self.batch_size/data_len_train)+1) + if step % 10 == 0: + #print('Step: {0} Loss free/active: {1}' .format(step, _loss_fa)) + #print('Step: {0} Loss gesture: {1}' .format(step, _loss_ges)) + print('Step: {0} Loss object: {1}' .format(step, _loss_obj)) + if step % 100 == 0: + print('Saving models ......') + with tf.device('/cpu:0'): + saver.save(sess, os.path.join(self.model_path, 'model.ckpt'), global_step=step) + accuracy_fa_, accuracy_ges_, accuracy_obj_ = self.test(mode='validation', data_len=data_len_validation) + print('=> Validation accuracy free/active: {0}%' .format(accuracy_fa_ * 100)) + print('=> Validation accuracy gesture: {0}%' .format(accuracy_ges_ * 100)) + print('=> Validation accuracy object: {0}%' .format(accuracy_obj_ * 100)) + + def test(self, mode, data_len=512): + if args.mode == 'test': + data_len = self.data_test.get_data_length() + model = Model() + _accuracy = 0.0 + with tf.device('/gpu:0'): + with tf.Graph().as_default(): + feature_Lhand, feature_Rhand, feature_head, label_fa, label_ges, label_obj, logits_fa, logits_ges, logits_obj, label_onehot_fa, label_onehot_ges, label_onehot_obj = model.build(keep_prob=1) + prediction_fa = tf.argmax(logits_fa, axis=1) + prediction_ges = tf.argmax(logits_ges, axis=1) + prediction_obj = tf.argmax(logits_obj, axis=1) + accuracy_fa, update_accuracy_fa = tf.metrics.accuracy(labels=label_fa, predictions=prediction_fa) + tf.summary.scalar('accuracy_fa', accuracy_fa) + accuracy_ges, update_accuracy_ges = tf.metrics.accuracy(labels=label_ges, predictions=prediction_ges) + tf.summary.scalar('accuracy_ges', accuracy_ges) + accuracy_obj, update_accuracy_obj = tf.metrics.accuracy(labels=label_obj, predictions=prediction_obj) + tf.summary.scalar('accuracy_obj', accuracy_obj) + tf.summary.scalar('accuracy',accuracy_obj) + + with tf.Session() as sess: + sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) + with tf.device('/cpu:0'): + restorer = tf.train.Saver() + restorer.restore(sess, tf.train.latest_checkpoint(self.model_path)) + for _ in range(int(data_len / self.batch_size)): + if mode == 'test': + features_Lhand, labels, features_head, features_Rhand = self.data_test.load_data('test') + elif mode == 'train': + features_Lhand, labels, features_head, features_Rhand = self.data_train.load_data('train') + elif mode == 'validation': + features_Lhand, labels, features_head, features_Rhand = self.data_train.load_data('validation') + labels_fa = labels[:,0] + labels_ges = labels[:,1] + labels_obj = labels[:,2] + _, _, _ = sess.run([update_accuracy_fa, update_accuracy_ges, update_accuracy_obj], feed_dict={feature_Lhand:features_Lhand, feature_Rhand:features_Rhand, feature_head:features_head, label_fa:labels_fa, label_ges:labels_ges, label_obj:labels_obj}) + _accuracy_fa, _accuracy_ges, _accuracy_obj = sess.run([accuracy_fa, accuracy_ges, accuracy_obj]) + return _accuracy_fa, _accuracy_ges, _accuracy_obj + +class Model: + def __init__(self): + self.batch_size = args.batch_size + self.data_dict = np.load('./vgg19.npy', encoding='latin1').item() + + def build(self, keep_prob): + feature_Lhand = tf.placeholder(tf.float32, shape=[self.batch_size, 4096], name='feature_Lhand') + feature_Rhand = tf.placeholder(tf.float32, shape=[self.batch_size, 4096], name='feature_Rhand') + feature_head = tf.placeholder(tf.float32, shape=[self.batch_size, 4096], name='feature_head_input') + label_fa = tf.placeholder(tf.int32, shape=[2*self.batch_size], name='label_fa') + label_onehot_fa = tf.one_hot(label_fa, depth=2) + label_ges = tf.placeholder(tf.int32, shape=[2*self.batch_size], name='label_ges') + label_onehot_ges = tf.one_hot(label_ges, depth=13) + label_obj = tf.placeholder(tf.int32, shape=[2*self.batch_size], name='label_obj') + label_onehot_obj = tf.one_hot(label_obj, depth=24) + fc1 = tf.concat([feature_Lhand, feature_Rhand, feature_head], axis=1) + with tf.variable_scope('fc2'): + fc2 = self.fc_layer(fc1, [4096*3, 4096], [4096], wd=0.0, _class='all') + fc2 = tf.nn.relu(fc2) + with tf.variable_scope('dropout2'): + fc2 = tf.nn.dropout(fc2, keep_prob=keep_prob) + with tf.variable_scope('fc3_fa_L'): + fc3_fa_L = self.fc_layer(fc2, [4096, 2], [2], wd=0.0001, _class='fa') + with tf.variable_scope('fc3_ges_L'): + fc3_ges_L = self.fc_layer(fc2, [4096, 13], [13], wd=0.0001, _class='ges') + with tf.variable_scope('fc3_obj_L'): + fc3_obj_L = self.fc_layer(fc2, [4096, 24], [24], wd=0.0001, _class='obj') + with tf.variable_scope('fc3_fa_R'): + fc3_fa_R = self.fc_layer(fc2, [4096, 2], [2], wd=0.0001, _class='fa') + with tf.variable_scope('fc3_ges_R'): + fc3_ges_R = self.fc_layer(fc2, [4096, 13], [13], wd=0.0001, _class='ges') + with tf.variable_scope('fc3_obj_R'): + fc3_obj_R = self.fc_layer(fc2, [4096, 24], [24], wd=0.0001, _class='obj') + fc3_fa = tf.concat([fc3_fa_L, fc3_fa_R], 0) + fc3_ges = tf.concat([fc3_ges_L, fc3_ges_R], 0) + fc3_obj = tf.concat([fc3_obj_L, fc3_obj_R], 0) + + return feature_Lhand, feature_Rhand, feature_head, label_fa, label_ges, label_obj, fc3_fa, fc3_ges, fc3_obj, label_onehot_fa, label_onehot_ges, label_onehot_obj + + + + def fc_layer(self, _input, kernel_shape, bias_shape, wd, _class): + w = tf.get_variable('weights', shape=kernel_shape, initializer=tf.contrib.layers.xavier_initializer()) + b = tf.get_variable('bias', shape=bias_shape, initializer=tf.constant_initializer(0.0)) + weight_decay = tf.multiply(tf.nn.l2_loss(w), wd, name='weight_loss') + + if _class == 'all': + tf.add_to_collection('losses_fa', weight_decay) + tf.add_to_collection('losses_ges', weight_decay) + tf.add_to_collection('losses_obj', weight_decay) + elif _class == 'fa': + tf.add_to_collection('losses_fa', weight_decay) + elif _class == 'ges': + tf.add_to_collection('losses_ges', weight_decay) + elif _class == 'obj': + tf.add_to_collection('losses_obj', weight_decay) + + return tf.nn.xw_plus_b(x=_input, weights=w, biases=b) + + + + + +if __name__ == '__main__': + print args + train = Train() + if args.mode == 'train': + train.train() + elif args.mode == 'test': + accuracy_fa, accuracy_ges, accuracy_obj = train.test(mode='test') + #print('=>Testing accuracy free/active: {0}%' .format(accuracy_fa * 100)) + #print('=>Testing accuracy gesture: {0}%' .format(accuracy_ges * 100)) + print('=>Testing accuracy object: {0}%' .format(accuracy_obj * 100)) + diff --git a/results/index.md b/results/index.md index 96ce61c..9a274db 100644 --- a/results/index.md +++ b/results/index.md @@ -1,47 +1,82 @@ -# Your Name (id) +# 鄭欽安  (103061148) -#Project 5: Deep Classification +# Homework 1: Deep Classification ## Overview -The project is related to -> quote +The project is related to Handcam dataset object classification ## Implementation -1. One - * item - * item -2. Two +1. It's inplemented in tensorflow 1.2.1 + +2. Use VGG-19 model pre-trained on ImageNet to extract features + +3. Model Architure + + +4. Use multi-task learning, because I think free/active, gesture labels can help train object labels and avoid overfitting + +5. Align lefthand ,righthand and head informations to share features and classify + +6. Detail + * learning rate = 0.0001 + + * add regularization hyperparameter (=0.0001) in last fc layer + + * use one dropout + + * batch size = 128 + + + + + + + +## Installation +* Tensorflow + +* Pre-trained model [VGG-19 tensorflow](https://github.com/machrisaa/tensorflow-vgg), and download [vgg19.npy](https://mega.nz/#!xZ8glS6J!MAnE91ND_WyfZ_8mvkuSa2YcA7q-1ehfSm-Q1fxOvvs) + +* Skimage + +* Extract features of frames +``` +python extract_feature.py --source_path={data_path} --target_path={targe_feature_path} --batch_size={batch_size} +``` + +* Training +``` +python obj_fc_align.py --source_path_feature={targe_feature_path} --source_path_label={label_path} --mode=train --batch_size={batch_size} --model={model_path} +``` + +* Testing +``` +python obj_fc_align.py --source_path_feature={targe_feature_path} --source_path_label={label_path} --mode=testing --batch_size={batch_size} --model={model_path} ``` -Code highlights +* Tensorboard ``` +tensorboard --logdir=./graph +``` -## Installation -* Other required packages. -* How to compile from source? +### Results -### Results +* loss + - +* accuracy +
- - - +
- - - - +testing accuracy
- - - - +65.84%
diff --git a/vgg19.py b/vgg19.py new file mode 100644 index 0000000..1fb0d44 --- /dev/null +++ b/vgg19.py @@ -0,0 +1,128 @@ +import os +import tensorflow as tf + +import numpy as np +import time +import inspect + +VGG_MEAN = [103.939, 116.779, 123.68] + + +class Vgg19: + def __init__(self, vgg19_npy_path=None): + if vgg19_npy_path is None: + path = inspect.getfile(Vgg19) + path = os.path.abspath(os.path.join(path, os.pardir)) + path = os.path.join(path, "vgg19.npy") + vgg19_npy_path = path + print(vgg19_npy_path) + + self.data_dict = np.load(vgg19_npy_path, encoding='latin1').item() + print("npy file loaded") + + def build(self, rgb): + """ + load variable from npy to build the VGG + + :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1] + """ + + start_time = time.time() + print("build model started") + rgb_scaled = rgb * 255.0 + + # Convert RGB to BGR + red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled) + assert red.get_shape().as_list()[1:] == [224, 224, 1] + assert green.get_shape().as_list()[1:] == [224, 224, 1] + assert blue.get_shape().as_list()[1:] == [224, 224, 1] + bgr = tf.concat(axis=3, values=[ + blue - VGG_MEAN[0], + green - VGG_MEAN[1], + red - VGG_MEAN[2], + ]) + assert bgr.get_shape().as_list()[1:] == [224, 224, 3] + + self.conv1_1 = self.conv_layer(bgr, "conv1_1") + self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2") + self.pool1 = self.max_pool(self.conv1_2, 'pool1') + + self.conv2_1 = self.conv_layer(self.pool1, "conv2_1") + self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2") + self.pool2 = self.max_pool(self.conv2_2, 'pool2') + + self.conv3_1 = self.conv_layer(self.pool2, "conv3_1") + self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2") + self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3") + self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4") + self.pool3 = self.max_pool(self.conv3_4, 'pool3') + + self.conv4_1 = self.conv_layer(self.pool3, "conv4_1") + self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2") + self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3") + self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4") + self.pool4 = self.max_pool(self.conv4_4, 'pool4') + + self.conv5_1 = self.conv_layer(self.pool4, "conv5_1") + self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2") + self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3") + self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4") + self.pool5 = self.max_pool(self.conv5_4, 'pool5') + + self.fc6 = self.fc_layer(self.pool5, "fc6") + assert self.fc6.get_shape().as_list()[1:] == [4096] + self.relu6 = tf.nn.relu(self.fc6) + + self.fc7 = self.fc_layer(self.relu6, "fc7") + self.relu7 = tf.nn.relu(self.fc7) + + self.fc8 = self.fc_layer(self.relu7, "fc8") + + self.prob = tf.nn.softmax(self.fc8, name="prob") + + self.data_dict = None + print(("build model finished: %ds" % (time.time() - start_time))) + + def avg_pool(self, bottom, name): + return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) + + def max_pool(self, bottom, name): + return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) + + def conv_layer(self, bottom, name): + with tf.variable_scope(name): + filt = self.get_conv_filter(name) + + conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') + + conv_biases = self.get_bias(name) + bias = tf.nn.bias_add(conv, conv_biases) + + relu = tf.nn.relu(bias) + return relu + + def fc_layer(self, bottom, name): + with tf.variable_scope(name): + shape = bottom.get_shape().as_list() + dim = 1 + for d in shape[1:]: + dim *= d + x = tf.reshape(bottom, [-1, dim]) + + weights = self.get_fc_weight(name) + biases = self.get_bias(name) + + # Fully connected layer. Note that the '+' operation automatically + # broadcasts the biases. + fc = tf.nn.bias_add(tf.matmul(x, weights), biases) + + return fc + + def get_conv_filter(self, name): + return tf.constant(self.data_dict[name][0], name="filter") + + def get_bias(self, name): + return tf.constant(self.data_dict[name][1], name="biases") + + def get_fc_weight(self, name): + return tf.constant(self.data_dict[name][0], name="weights")