In [117]:
import tensorflow as tf
import numpy as np
from PIL import Image

num_classes = 200

In [141]:
# todo: use tf.contrib.layers.conv2d for 3D filter
def conv_relu(input, kernel_shape, bias_shape, stride = 2):
    strides = [stride, stride, stride, stride]
    weights = tf.get_variable("weights", kernel_shape, initializer=tf.random_normal_initializer())
    biases = tf.get_variable("biases", bias_shape, initializer=tf.constant_initializer(0.0))
    conv = tf.nn.conv2d(input, weights, strides=strides, padding='SAME')
    return tf.nn.relu(conv + biases)

def conv_layers(input_image):
    # Variables created here will be named "convX/weights", "convX/biases".
    with tf.variable_scope("conv1"):
        out_channels = 15
        output = conv_relu(input_image, [5, 5, 3, out_channels], [out_channels], stride=1)
    with tf.variable_scope("conv2"):
        old_out_channels = out_channels
        out_channels = 20
        output = conv_relu(output, [5, 5, old_out_channels, out_channels], [out_channels])
    with tf.variable_scope("conv3"):
        old_out_channels = out_channels
        out_channels = 3
        return conv_relu(output, [5, 5, old_out_channels, out_channels], [out_channels])
    
# https://github.com/tensorflow/tensorflow/issues/6011
def spp_layer(image, levels=[6, 3, 2, 1]):
    shape = image.get_shape().as_list()
    if shape[1] < levels[0] ** 2 or shape[2] < levels[0] ** 2:
        print(shape)
        print('Size must be greater than {:d}x{:d}'.format(levels[0], levels[0]))
        return None

    with tf.variable_scope('spp'):
        pool_outputs = []
        for level in levels:
            # todo: figure out why it is surrounded by 1
            window_size = [1] + [np.ceil(d / level).astype(np.int32) for d in shape[1:3]] + [1]
            strides = [1] + [np.floor(d / level + 1).astype(np.int32) for d in shape[1:3]] + [1]
            
            pool = tf.nn.max_pool(image, ksize=window_size, strides=strides, padding='SAME')
            pool_outputs.append(tf.reshape(pool, [shape[0], -1]))
        spp_pool = tf.concat(pool_outputs, axis=1)
    return spp_pool

def fc_layer(image, reuse):
    return tf.contrib.layers.fully_connected(image, num_classes, activation_fn=None, scope="fc", reuse=reuse)
    
tf.reset_default_graph()

def load(filename):
    file = open(filename, "r") 
    image_names = file.readlines()
    images = []
    labels = []
    counter = 1
    with tf.variable_scope("network") as scope:
        fc_reuse = False
        for name in image_names:
            label = int(name[:3])
            if label <= 1:
                im = Image.open("images/" + name.rstrip('\n'))
                H, W = im.size
                pixels = list(im.getdata())
                image = np.array([comp for pixel in pixels for comp in pixel]).reshape(-1, H, W, 3)
                image = tf.convert_to_tensor(image)
                image = tf.to_float(image)
                image = conv_layers(image)
                image = spp_layer(image)
                if not image is None:
                    image = fc_layer(image, fc_reuse)
                    image = tf.reshape(image, [-1])
                    images.append(image)
                    labels.append(label)
                    fc_reuse = True

                scope.reuse_variables()
#                 if counter >= 15:
#                     break
#                 counter += 1

    print(images)
    print(labels)
    return tf.convert_to_tensor(images), labels

images, labels = load('train.txt')
print( "Total number of variables used: ", np.sum([v.get_shape().num_elements() for v in tf.trainable_variables()]) )

[<tf.Tensor 'network/Reshape:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_1:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_2:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_3:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_4:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_5:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_6:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_7:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_8:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_9:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_10:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_11:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_12:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_13:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_14:0' shape=(200,) dtype=float32>, <tf.Tensor 'network/Reshape_15:0' shape=(200,) dtype=float32>, <tf

In [136]:
print()

Tensor("packed:0", shape=(30, 200), dtype=float32)


In [None]:
# FAILED SPPs
# # https://github.com/tensorflow/tensorflow/issues/6011
# def spp_layer(image, levels=[6, 3, 2, 1], name = 'SPP_layer'):
#     shape = image.get_shape()[1:3].as_list()
#     with tf.variable_scope(name):
#         pool_outputs = []
#         print(image.shape)
#         for level in levels:
#             window_size = [np.ceil(d / level).astype(np.int32) for d in shape]
#             strides = [np.floor(d / level + 1).astype(np.int32) for d in shape]
            
#             # todo: figure out why it is surrounded by 1 
#             ksize = [1, window_size[0], window_size[1], 1]
#             strides = [1, strides[0], strides[1], 1]
            
#             print(ksize)
#             print(strides)
            
#             pool = tf.nn.max_pool(image, ksize=ksize, strides=strides, padding='SAME')
#             pool_outputs.append(tf.reshape(pool, [shape[0], -1]))
#         spp_pool = tf.concat(pool_outputs, axis=1)
#         print(spp_pool)
#     return spp_pool

# # https://github.com/tensorflow/tensorflow/issues/6011
# def spp_layer(image, levels=[6, 3, 2, 1], name = 'SPP_layer'):
#     shape = image.get_shape().as_list()
#     with tf.variable_scope(name):
#         pool_outputs = []
#         for level in levels:
#             # todo: figure out why it is surrounded by 1 
#             window_size = [1] + [np.ceil(d / level).astype(np.int32) for d in shape[1:3]] + [1]
#             strides = [1, np.floor(shape[1] / level + 1).astype(np.int32), np.floor(shape[2] / level + 1), 1]
            
#             pool = tf.nn.max_pool(image, ksize=window_size, strides=strides, padding='SAME')
#             pool_outputs.append(tf.reshape(pool, [shape[0], -1]))
#         spp_pool = tf.concat(pool_outputs, axis=1)
#     return spp_pool

# MISC

# def conv_layers(input_image):
#     # Variables created here will be named "convX/weights", "convX/biases".
#     with tf.variable_scope("conv1"):
#         out_channels = 10
#         output = conv_relu(input_image, [15, 15, 3, out_channels], [out_channels])
#     with tf.variable_scope("conv2"):
#         old_out_channels = out_channels
#         out_channels = 15
#         output = conv_relu(output, [5, 5, old_out_channels, out_channels], [out_channels])
#     with tf.variable_scope("conv3"):
#         old_out_channels = out_channels
#         out_channels = 15
#         output = conv_relu(output, [5, 5, old_out_channels, out_channels], [out_channels])
#     with tf.variable_scope("conv4"):
#         old_out_channels = out_channels
#         out_channels = 3
#         return conv_relu(output, [5, 5, old_out_channels, out_channels], [out_channel