In [1]:
%matplotlib inline

import sys, os, time
import itertools
import math, random
import glob
import tensorflow as tf
from tensorflow.contrib.layers import flatten
import numpy as np
import cv2
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from IPython.display import Image, display
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Code from NestedNet START

#temp variables
BN_DECAY = 0.999
BN_EPSILON = 0.00001

# channel scheduling factors (now, 3 levels for conv scheduling)
l2r = 1.0/2.0    # actual density: (l2r^2) *100
l1r = 1.0/4.0    # actual density: (l1r^2) *100

def create_variables(name, shape):

    n1 = np.sqrt(6. / (shape[0] * shape[1] * l1r * (shape[-2] + shape[-1])))
    n2 = np.sqrt(6. / (shape[0] * shape[1] * l2r * (shape[-2] + shape[-1])))
    n3 = np.sqrt(6. / (shape[0] * shape[1] * (shape[-2] + shape[-1])))

    shape1 = [shape[0], shape[1], int(l1r * shape[2]), int(l1r * shape[3])]
    shape2_1 = [shape[0], shape[1], int((l2r - l1r) * shape[2]), int(l1r * shape[3])]
    shape2_2 = [shape[0], shape[1], int(l2r * shape[2]), int((l2r - l1r) * shape[3])]
    shape3_1 = [shape[0], shape[1], int((1. - l2r) * shape[2]), int(l2r * shape[3])]
    shape3_2 = [shape[0], shape[1], shape[2], int((1. - l2r) * shape[3])]

    lv1_variables = tf.get_variable(name + '_l1', initializer=tf.random_uniform(shape1, -n3, n3, tf.float32, seed=None))
    lv2_1_variables = tf.get_variable(name + '_l2_1', initializer=tf.random_uniform(shape2_1, -n3, n3, tf.float32, seed=None))
    lv2_2_variables = tf.get_variable(name + '_l2_2', initializer=tf.random_uniform(shape2_2, -n3, n3, tf.float32, seed=None))
    lv3_1_variables = tf.get_variable(name + '_l3_1', initializer=tf.random_uniform(shape3_1, -n3, n3, tf.float32, seed=None))
    lv3_2_variables = tf.get_variable(name + '_l3_2', initializer=tf.random_uniform(shape3_2, -n3, n3, tf.float32, seed=None))

    return lv1_variables, lv2_1_variables, lv2_2_variables, lv3_1_variables, lv3_2_variables

def output_layer(input1, input2, input3, num_labels):

    input_dim1 = input1.get_shape().as_list()[-1]
    input_dim2 = input2.get_shape().as_list()[-1]
    input_dim3 = input3.get_shape().as_list()[-1]

    fc_w1 = tf.get_variable('fc_weights_l1', shape=[input_dim1, num_labels], initializer=tf.initializers.variance_scaling(scale=1.0))
    fc_w2 = tf.get_variable('fc_weights_l2', shape=[input_dim2, num_labels], initializer=tf.initializers.variance_scaling(scale=1.0))
    fc_w3 = tf.get_variable('fc_weights_l3', shape=[input_dim3, num_labels], initializer=tf.initializers.variance_scaling(scale=1.0))

    fc_b1 = tf.get_variable(name='fc_bias_l1', shape=[num_labels], initializer=tf.zeros_initializer())
    fc_b2 = tf.get_variable(name='fc_bias_l2', shape=[num_labels], initializer=tf.zeros_initializer())
    fc_b3 = tf.get_variable(name='fc_bias_l3', shape=[num_labels], initializer=tf.zeros_initializer())

    fc_h1 = tf.matmul(input1, fc_w1) + fc_b1
    fc_h2 = tf.matmul(input2, fc_w2) + fc_b2
    fc_h3 = tf.matmul(input3, fc_w3) + fc_b3
    return fc_h1, fc_h2, fc_h3

def batch_normalization_layer(name, input_layer, dimension, is_training=True):

    beta = tf.get_variable(name + 'beta', dimension, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32))
    gamma = tf.get_variable(name + 'gamma', dimension, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32))
    mu = tf.get_variable(name + 'mu', dimension, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False)
    sigma = tf.get_variable(name + 'sigma', dimension, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False)

    if is_training is True:
        mean, variance = tf.nn.moments(input_layer, axes=[0, 1, 2])
        train_mean = tf.assign(mu, mu * BN_DECAY + mean * (1 - BN_DECAY))
        train_var = tf.assign(sigma, sigma * BN_DECAY + variance * (1 - BN_DECAY))

        with tf.control_dependencies([train_mean, train_var]):
            return tf.nn.batch_normalization(input_layer, mean, variance, beta, gamma, BN_EPSILON)
    else:
        bn_layer = tf.nn.batch_normalization(input_layer, mu, sigma, beta, gamma, BN_EPSILON)

    return bn_layer


def conv_layer(input_layer, filter_shape, stride, is_training):

    in_channel = input_layer.get_shape().as_list()[-1]

    bn_layer = batch_normalization_layer('l1_l2_l3', input_layer, in_channel, is_training)
    bn_layer = tf.nn.relu(bn_layer)

    n1 = np.sqrt(6. / (filter_shape[0] * filter_shape[1] * (filter_shape[-2] + l1r * filter_shape[-1])))
    n2 = np.sqrt(6. / (filter_shape[0] * filter_shape[1] * (filter_shape[-2] + l2r * filter_shape[-1])))
    n3 = np.sqrt(6. / (filter_shape[0] * filter_shape[1] * (filter_shape[-2] + filter_shape[-1])))
    filter1 = tf.get_variable('conv_l1',
                              initializer=tf.random_uniform([filter_shape[0], filter_shape[1], filter_shape[2], int(l1r*filter_shape[3])],
                                                            -n3, n3,   tf.float32, seed=None))
    filter2 = tf.get_variable('conv_l2',
                              initializer=tf.random_uniform([filter_shape[0], filter_shape[1], filter_shape[2], int((l2r-l1r)*filter_shape[3])],
                                                            -n3, n3,   tf.float32, seed=None))
    filter3 = tf.get_variable('conv_l3',
                              initializer=tf.random_uniform([filter_shape[0], filter_shape[1], filter_shape[2], int((1.-l2r)*filter_shape[3])],
                                                            -n3, n3,   tf.float32, seed=None))

    conv1 = tf.nn.conv2d(bn_layer, filter1, strides=[1, stride, stride, 1], padding='SAME')
    conv2 = tf.concat((conv1, tf.nn.conv2d(bn_layer, filter2, strides=[1, stride, stride, 1], padding='SAME')), 3)
    conv3 = tf.concat((conv2, tf.nn.conv2d(bn_layer, filter3, strides=[1, stride, stride, 1], padding='SAME')), 3)

    return conv1, conv2, conv3


def bn_relu_conv_layer(input1, input2, input3, filter_shape, stride, is_training):

    in_channel1 = input1.get_shape().as_list()[-1]
    in_channel2 = input2.get_shape().as_list()[-1]
    in_channel3 = input3.get_shape().as_list()[-1]

    bn_layer1 = batch_normalization_layer('l1', input1, in_channel1, is_training)
    bn_layer1 = tf.nn.relu(bn_layer1)
    bn_layer2 = batch_normalization_layer('l2', input2, in_channel2, is_training)
    bn_layer2 = tf.nn.relu(bn_layer2)
    bn_layer3 = batch_normalization_layer('l3', input3, in_channel3, is_training)
    bn_layer3 = tf.nn.relu(bn_layer3)

    filter1, filter2_1, filter2_2, filter3_1, filter3_2 = create_variables(name='conv', shape=filter_shape)

    conv1 = tf.nn.conv2d(bn_layer1, filter1, strides=[1, stride, stride, 1], padding='SAME')
    conv2 = tf.concat((tf.add(tf.nn.conv2d(bn_layer2[:, :, :, :int(l1r * filter_shape[2])], filter1, strides=[1, stride, stride, 1], padding='SAME'),
                              tf.nn.conv2d(bn_layer2[:, :, :, int(l1r * filter_shape[2]):int(l2r * filter_shape[2])], filter2_1, strides=[1, stride, stride, 1],
                                           padding='SAME')),
                       tf.nn.conv2d(bn_layer2, filter2_2, strides=[1, stride, stride, 1], padding='SAME')), 3)
    conv3 = tf.concat((tf.add(tf.nn.conv2d(bn_layer3[:, :, :, :int(l1r * filter_shape[2])], filter1, strides=[1, stride, stride, 1], padding='SAME'),
                              tf.nn.conv2d(bn_layer3[:, :, :, int(l1r * filter_shape[2]):int(l2r * filter_shape[2])], filter2_1, strides=[1, stride, stride, 1],
                                           padding='SAME')),
                       tf.nn.conv2d(bn_layer3[:, :, :, :int(l2r * filter_shape[2])], filter2_2, strides=[1, stride, stride, 1], padding='SAME')), 3)
    conv3 = tf.concat((tf.add(conv3, tf.nn.conv2d(bn_layer3[:, :, :, int(l2r * filter_shape[2]):], filter3_1, strides=[1, stride, stride, 1], padding='SAME')),
                       tf.nn.conv2d(bn_layer3, filter3_2, strides=[1, stride, stride, 1], padding='SAME')), 3)

    return conv1, conv2, conv3


def residual_block(input1, input2, input3, output_channel, wide_scale, is_training, first_block=False):

    input_channel = input3.get_shape().as_list()[-1]

    # When it's time to "shrink" the image size, we use stride = 2
    output_channel = int(output_channel * wide_scale)

    if input_channel * wide_scale == output_channel:
        increase_dim = True
        stride = 1
    else:
        if input_channel * 2 == output_channel:
            increase_dim = True
            stride = 2
        elif input_channel == output_channel:
            increase_dim = False
            stride = 1
        else:
            raise ValueError('Output and input channel does not match in residual blocks!!!')

    # The first conv layer of the first residual block does not need to be normalized and relu-ed.
    with tf.variable_scope('conv1_in_block'):
        if first_block:
            conv1, conv2, conv3 = conv_layer(input1, [3, 3, input_channel, output_channel], stride, is_training)
        else:
            conv1, conv2, conv3 = bn_relu_conv_layer(input1, input2, input3, [3, 3, input_channel, output_channel], stride, is_training)

    with tf.variable_scope('conv2_in_block'):
        conv1, conv2, conv3 = bn_relu_conv_layer(conv1, conv2, conv3, [3, 3, output_channel, output_channel], 1, is_training)

    # When the channels of input layer and conv2 does not match, we add zero pads to increase the
    #  depth of input layers
    if increase_dim is True:
        if input_channel * wide_scale == output_channel:
            if first_block:
                np0 = int((output_channel * l1r - input_channel) / 2)
                np1 = int((output_channel * l2r - input_channel) / 2)
                np2 = int((output_channel * 1 - input_channel) / 2)
                padded_input1 = tf.pad(input1, [[0, 0], [0, 0], [0, 0], [np0, np0]])
                padded_input2 = tf.pad(input2, [[0, 0], [0, 0], [0, 0], [np1, np1]])
                padded_input3 = tf.pad(input3, [[0, 0], [0, 0], [0, 0], [np2, np2]])
            else:
                np1 = int((output_channel - input_channel) / 2 * l1r)
                np2 = int((output_channel - input_channel) / 2 * l2r)
                np3 = int((output_channel - input_channel) / 2)
                padded_input1 = tf.pad(input1, [[0, 0], [0, 0], [0, 0], [np1, np1]])
                padded_input2 = tf.pad(input2, [[0, 0], [0, 0], [0, 0], [np2, np2]])
                padded_input3 = tf.pad(input3, [[0, 0], [0, 0], [0, 0], [np3, np3]])
        else:
            pooled_input1 = tf.nn.avg_pool(input1, ksize=[1, 2, 2, 1],
                                          strides=[1, 2, 2, 1], padding='VALID')
            padded_input1 = tf.pad(pooled_input1, [[0, 0], [0, 0], [0, 0], [int(input_channel*l1r) // 2,
                                                                            int(input_channel*l1r) // 2]])
            pooled_input2 = tf.nn.avg_pool(input2, ksize=[1, 2, 2, 1],
                                          strides=[1, 2, 2, 1], padding='VALID')
            padded_input2 = tf.pad(pooled_input2, [[0, 0], [0, 0], [0, 0], [int(input_channel*l2r) // 2,
                                                                            int(input_channel*l2r) // 2]])
            pooled_input3 = tf.nn.avg_pool(input3, ksize=[1, 2, 2, 1],
                                          strides=[1, 2, 2, 1], padding='VALID')
            padded_input3 = tf.pad(pooled_input3, [[0, 0], [0, 0], [0, 0], [input_channel // 2,
                                                                            input_channel // 2]])
    else:
        padded_input1 = input1
        padded_input2 = input2
        padded_input3 = input3

    output1 = conv1 + padded_input1
    output2 = conv2 + padded_input2
    output3 = conv3 + padded_input3

    return output1, output2, output3

# Code from NestedNet END

In [3]:
# Helper layer functions
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W, stride):
    return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [4]:
def nested_relu(input1, input2, input3, size):
	size1 = int(size*l1r)
	size2 = int(size*l2r) - int(size*l1r)
	size3 = size - int(size*l2r)

	bias1 = bias_variable(shape=(size1,))
	bias2 = tf.concat((bias1, bias_variable(shape=(size2,))),0)
	bias3 = tf.concat((bias2, bias_variable(shape=(size3,))),0)

	relu1 = tf.nn.relu(input1 + bias1)
	relu2 = tf.nn.relu(input2 + bias2)
	relu3 = tf.nn.relu(input3 + bias3)
	return relu1, relu2, relu3

def inference(x, is_training):
	with tf.variable_scope('conv1_in_block'):
		conv1_1, conv1_2, conv1_3 = conv_layer(x, [3,3,3,16], 1, is_training)

	relu1_1, relu1_2, relu1_3 = nested_relu(conv1_1, conv1_2, conv1_3, 16)
	
	with tf.variable_scope('conv2_in_block'):
		conv2_1, conv2_2, conv2_3 = bn_relu_conv_layer(relu1_1, relu1_2, relu1_3, [3, 3, 16, 16], 1, is_training)

	relu2_1, relu2_2, relu2_3 = nested_relu(conv2_1, conv2_2, conv2_3, 16)
	
	with tf.variable_scope('conv3_in_block'):
		conv3_1, conv3_2, conv3_3 = bn_relu_conv_layer(relu2_1, relu2_2, relu2_3, [3, 3, 16, 16], 1, is_training)

	relu3_1, relu3_2, relu3_3 = nested_relu(conv3_1, conv3_2, conv3_3, 16)

	with tf.variable_scope('fc1_in_block'):
		logits1, logits2, logits3 = output_layer(relu3_1, relu3_2, relu3_3, 3)

	return logits1, logits2, logits3

In [5]:
# Basic parameters
max_epochs = 25
# the path where our training data is stored
base_image_path = "img/"
# subdirectories in the images folder, each one representing a different class
image_types = ["red", "green", "yellow"]
# width and height of the images
input_img_x = 32
input_img_y = 32
# the ratio of training images to testing images
train_test_split_ratio = 0.9
# the minibatch size
batch_size = 32
# where we will save our best model
checkpoint_name = "model.ckpt"

In [6]:
# Model (Init input and output neurons)
# input neurons are shape of image which is (32 x 32 x 3)
x = tf.placeholder(tf.float32, shape=[None, input_img_x, input_img_y, 3])
# as many output neurons as classes - data is one-hot encoded
y_ = tf.placeholder(tf.float32, shape=[None, len(image_types)])
# probability that a neuron's output is kept furing dropout
keep_prob = tf.placeholder(tf.float32)

In [7]:
def conv_net(x):
    x_image = x

    # First three convolutional layers, of 16 3x3 filters

    # specify number of weights
    W_conv1 = weight_variable([3, 3, 3, 16])
    # specify number of bias variables or the variables that will
    # be added to weights after multiplying them by the activation
    b_conv1 = bias_variable([16]) 
    # specify the activation
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 1) + b_conv1)
    
    W_conv2 = weight_variable([3, 3, 16, 16])
    b_conv2 = bias_variable([16]) 
    h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2, 1) + b_conv2)

    W_conv3 = weight_variable([3, 3, 16, 16])
    b_conv3 = bias_variable([16])
    h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1) + b_conv3)

    # Pooling layer
    h_pool4 = max_pool_2x2(h_conv3)
    n1, n2, n3, n4 = h_pool4.get_shape().as_list()
    W_fc1 = weight_variable([n2*n3*n4, 3])
    b_fc1 = bias_variable([3])

    # Flatten pool layer into a fully connected layer
    h_pool4_flat = tf.reshape(h_pool4, [-1, n2*n3*n4])
    logits = tf.matmul(h_pool4_flat, W_fc1) + b_fc1
    
    return logits

In [8]:
logits = conv_net(x)
logits1, logits2, logits3 = inference(x, is_training=True)

AttributeError: module 'tensorflow' has no attribute 'initializers'

In [None]:
sess = tf.InteractiveSession()

In [None]:
# Define loss function as computing softmax, and then cross entropy
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=y_))
loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=y_))
loss3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits3, labels=y_))
loss_function = (loss1 + loss2 + loss3)/3

# Specify ptimizer takes a learning rate, and a loss function
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss=loss_function)

# Initialize all variables which will show if the model is valid
sess.run(tf.global_variables_initializer())

In [None]:
saver = tf.train.Saver()

time_start = time.time()

v_loss = least_loss = 99999999

# Load data
full_set = []

for im_type in image_types:
    for ex in glob.glob(os.path.join(base_image_path, im_type, "*")):
        im = cv2.imread(ex) # load img
        if not im is None:
            im = cv2.resize(im, (32, 32)) # resize to input size

            # Create an array representing our classes and set it
            one_hot_array = [0] * len(image_types)
            one_hot_array[image_types.index(im_type)] = 1
            assert(im.shape == (32, 32, 3))

            full_set.append((im, one_hot_array, ex))

random.shuffle(full_set) # shuffle data set

In [None]:
# Split data into a training and test set 

split_index = int(math.floor(len(full_set) * train_test_split_ratio))
train_set = full_set[:split_index]
test_set = full_set[split_index:]

# Ensure that training and test sets are a multiple of batch size
train_set_offset = len(train_set) % batch_size
test_set_offset = len(test_set) % batch_size
train_set = train_set[: len(train_set) - train_set_offset]
test_set = test_set[: len(test_set) - test_set_offset]

train_x, train_y, train_z = zip(*train_set)
test_x, test_y, test_z = zip(*test_set)

In [None]:
print("Starting training... [{} training examples]".format(len(train_x)))

v_loss = 9999999
train_loss = []
val_loss = []

for i in range(0, max_epochs):

    # Iterate over our training set
    for tt in range(0, int(len(train_x) / batch_size)):
        start_batch = batch_size * tt
        end_batch = batch_size * (tt + 1)
        train_step.run(feed_dict={x: train_x[start_batch:end_batch], y_: train_y[start_batch:end_batch]})
        ex_seen = "Current epoch, examples seen: {:20} / {} \r".format(tt * batch_size, len(train_x))
        sys.stdout.write(ex_seen.format(tt * batch_size))
        sys.stdout.flush()

    ex_seen = "Current epoch, examples seen: {:20} / {} \r".format((tt + 1) * batch_size, len(train_x))
    sys.stdout.write(ex_seen.format(tt * batch_size))
    sys.stdout.flush()

    t_loss = loss_function.eval(feed_dict={x: train_x, y_: train_y})
    v_loss = loss_function.eval(feed_dict={x: test_x, y_: test_y})
    
    train_loss.append(t_loss)
    val_loss.append(v_loss)
    
    sys.stdout.write("Epoch {:5}: loss: {:15.10f}, val. loss: {:15.10f}".format(i + 1, t_loss, v_loss))
    
    if v_loss < least_loss:
        sys.stdout.write(", saving new best model to {}".format(checkpoint_name))
        least_loss = v_loss
        filename = saver.save(sess, checkpoint_name)
        
    sys.stdout.write("\n")