In [1]:
import tensorflow as tf
import numpy as np
from PIL import Image

num_classes = 200

def load(filename):
    file = open(filename, "r") 
    image_names = file.readlines()
    images = []
    labels = []
    for name in image_names:
        label = int(name[:3])
        if label <= 1:
            im = Image.open("images/" + name.rstrip('\n'))
            H, W = im.size
            pixels = list(im.getdata())
            if not type(pixels[0]) is int:
                # todo: right now we are discarding transparent images
                image = np.array([comp for pixel in pixels for comp in pixel]).reshape(-1, H, W, 3)
                images.append(image)
                # zero-index the label
                labels.append(label - 1)
    return images, labels

images_train, labels_train = load('train.txt')
# images_test, labels_test = load('test.txt')

print(len(images_train))
print(len(labels_train))

30
30


In [31]:
# todo: use tf.contrib.layers.conv2d for 3D filter
def conv_relu(input_image, kernel_shape, bias_shape, stride = 2):
    strides = [1, stride, stride, 1]
    weights = tf.get_variable("weights", kernel_shape, initializer=tf.random_normal_initializer())
    biases = tf.get_variable("biases", bias_shape, initializer=tf.constant_initializer(0.0))
    conv = tf.nn.conv2d(input_image, weights, strides=strides, padding='SAME')
    return tf.nn.relu(conv + biases)

def conv_layers(input_image):
    # Variables created here will be named "convX/weights", "convX/biases".
    with tf.variable_scope("conv1"):
        out_channels = 15
        output = conv_relu(input_image, [5, 5, 3, out_channels], [out_channels], stride=1)
    with tf.variable_scope("conv2"):
        old_out_channels = out_channels
        out_channels = 20
        output = conv_relu(output, [5, 5, old_out_channels, out_channels], [out_channels])
    with tf.variable_scope("conv3"):
        old_out_channels = out_channels
        out_channels = 3
        return conv_relu(output, [5, 5, old_out_channels, out_channels], [out_channels])

def max_pool_2d_nxn_regions(inputs, output_size):
    inputs_shape = tf.shape(inputs)
    h = tf.cast(tf.gather(inputs_shape, 1), tf.int32)
    w = tf.cast(tf.gather(inputs_shape, 2), tf.int32)
    
    pooling_op = tf.reduce_max
    
#         pooling_op = tf.reduce_mean

    result = []
    n = output_size
    for row in range(output_size):
        for col in range(output_size):
            # start_h = floor(row / n * h)
            start_h = tf.cast(tf.floor(tf.multiply(row / n, tf.cast(h, tf.float32))), tf.int32)
            # end_h = ceil((row + 1) / n * h)
            end_h = tf.cast(tf.ceil(tf.multiply((row + 1) / n, tf.cast(h, tf.float32))), tf.int32)
            # start_w = floor(col / n * w)
            start_w = tf.cast(tf.floor(tf.multiply(col / n, tf.cast(w, tf.float32))), tf.int32)
            # end_w = ceil((col + 1) / n * w)
            end_w = tf.cast(tf.ceil(tf.multiply((col + 1) / n, tf.cast(w, tf.float32))), tf.int32)
            pooling_region = inputs[:, start_h:end_h, start_w:end_w, :]
            pool_result = pooling_op(pooling_region, axis=(1, 2))
            result.append(pool_result)
    return result

# Modified from RikHeijdens on https://github.com/tensorflow/tensorflow/issues/6011
def spp_layer(inputs, dimensions=[3, 2, 1]):
    # todo: fix this
    # print(inputs.get_shape()[1] < tf.constant(36, dtype=tf.int32))


#     if tf.less(inputs.get_shape()[1], dimensions[0] ** 2) or tf.less(inputs.get_shape()[2], dimensions[0] ** 2):
#         print(shape)
#         print('Size must be greater than {:d}x{:d}'.format(dimensions[0], dimensions[0]))
#         return None
    pool_list = []
    for pool_dim in dimensions:
        pool_list += max_pool_2d_nxn_regions(inputs, pool_dim)
    return tf.concat(pool_list, axis=1)

# todo: might be able to move this into session
def fc_layer(image, reuse):
    return tf.contrib.layers.fully_connected(image, num_classes, activation_fn=None, scope="fc", reuse=reuse)


batch_size = 8
tf.reset_default_graph()
fc_reuse = False
with tf.variable_scope("network") as scope:
    image_placeholders = []
    label_placeholders = []

    logits = []
    logit_labels = []

    for i in range(batch_size):
        image = tf.placeholder(tf.float32, (1,None,None,3), name='image_%d'%(i))
        image_placeholders.append(image)
        label = tf.placeholder(tf.int32, name='label_%d'%(i))
        label_placeholders.append(label)
    
        logit = tf.to_float(image)
        logit = conv_layers(logit)
        logit = spp_layer(logit)

        if not logit is None:
            logit = fc_layer(logit, fc_reuse)
            logit = tf.reshape(logit, [-1])
            logits.append(logit)
            logit_labels.append(label)
            fc_reuse = True

        scope.reuse_variables()
    
# output = tf.identity(logits, name='output')
logits = tf.convert_to_tensor(logits)
test = tf.argmax(logits, 0)
# test_2 = tf.argmax(logits, 1)

# loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=logit_labels))
# regularization_loss = tf.losses.get_regularization_loss()
# total_loss = loss + 1e-6 * regularization_loss
# optimizer = tf.train.MomentumOptimizer(0.001, 0.9)
# with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
#     opt = optimizer.minimize(total_loss)
# correct = tf.equal(tf.argmax(logits, 0), logit_labels)
# accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
session = tf.Session()
session.run(tf.global_variables_initializer())

# session.run([accuracy], feed_dict={image: images_train[0]})
image_data = images_train[:batch_size]
label_data = labels_train[:batch_size]
fd = {**{i: d for i, d in zip(image_placeholders, image_data)}, **{i: d for i, d in zip(label_placeholders, label_data )}}
# accuracy_val, loss_val, _  = session.run([accuracy, total_loss, opt], feed_dict=fd)

test = session.run([test], feed_dict=fd)

print(test)
# print(loss_val)


[array([4, 0, 5, 1, 0, 0, 6, 4, 0, 3, 0, 0, 4, 2, 0, 0, 6, 0, 7, 4, 1, 3, 5,
       3, 0, 7, 4, 5, 4, 5, 5, 0, 0, 3, 0, 5, 0, 5, 0, 6, 6, 6, 4, 5, 5, 4,
       5, 0, 0, 5, 5, 6, 6, 6, 0, 0, 0, 6, 0, 6, 1, 5, 4, 7, 3, 7, 0, 5, 4,
       3, 2, 5, 0, 0, 0, 0, 6, 3, 0, 0, 2, 6, 0, 3, 4, 7, 0, 0, 5, 0, 0, 1,
       4, 0, 5, 5, 7, 4, 3, 6, 0, 0, 0, 4, 0, 0, 5, 2, 0, 6, 2, 0, 4, 2, 7,
       7, 5, 0, 0, 2, 5, 5, 0, 4, 4, 0, 3, 3, 5, 5, 3, 5, 0, 0, 6, 0, 5, 0,
       5, 6, 7, 2, 0, 2, 0, 0, 0, 0, 0, 5, 5, 0, 0, 5, 7, 0, 5, 5, 2, 5, 4,
       3, 0, 0, 3, 2, 5, 0, 0, 2, 3, 6, 5, 0, 0, 6, 4, 5, 5, 5, 0, 5, 0, 5,
       5, 5, 7, 5, 0, 4, 4, 1, 0, 3, 5, 4, 0, 5, 0, 7])]
