In [1]:
import tensorflow as tf
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
%config IPCompleter.greedy=True

In [2]:
IMAGE_HEIGHT = 64
IMAGE_WIDTH = 64
IMAGE_CHANNELS = 3

In [3]:
data_path = 'kaggle_inria/'

In [4]:
def createTFRecordsFile(src_dir, tfrecords_name):
    dir = src_dir
    writer = tf.python_io.TFRecordWriter(tfrecords_name)

    samples_size = 0
    index = -1
    classes_dict = {}

    for folder_name in os.listdir(dir):
        if folder_name != '.DS_Store':
            class_path = dir + '/' + folder_name + '/'
            # class_path = dir+'\\'+folder_name+'\\'
            index += 1
            classes_dict[index] = folder_name
            #print(index, folder_name + "!")
            for image_name in os.listdir(class_path):
                if image_name != '.DS_Store':
                    image_path = class_path + image_name
                    #print(image_path + "!!")
                    img = Image.open(image_path)
                    img = img.resize((IMAGE_HEIGHT, IMAGE_WIDTH))
                    img_raw = img.tobytes()
                    example = tf.train.Example(
                        features=tf.train.Features(
                            feature={
                                'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
                                # 'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[bytes(index)])),
                                'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
                            }
                        )
                    )
                    writer.write(example.SerializeToString())
                    samples_size += 1
                else:
                    pass
        else:
            pass
        
    writer.close()
    print("totally %i samples" % samples_size)
    print(classes_dict)
    return samples_size, classes_dict

In [5]:
def decodeTFRecordsFile(tfrecords_name):
    file_queue = tf.train.string_input_producer([tfrecords_name])
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)
    features = tf.parse_single_example(
        serialized_example,
        features={
            'label': tf.FixedLenFeature([], tf.int64),
            'image_raw': tf.FixedLenFeature([], tf.string)
        }
    )
    img = tf.decode_raw(features['image_raw'], tf.uint8)
    img = tf.reshape(img, [IMAGE_HEIGHT, IMAGE_WIDTH, 3])
    img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
    label = tf.cast(features['label'], tf.int32)

    return img, label

In [6]:
def inputs(tfrecords_name, batch_size, shuffle=True):
    image, label = decodeTFRecordsFile(tfrecords_name)
    if (shuffle):
        images, labels = tf.train.shuffle_batch([image, label],
                                                batch_size=batch_size,
                                                capacity=train_samples_size + batch_size,
                                                min_after_dequeue=train_samples_size)
    else:
        # input_queue = tf.train.slice_input_producer([image, label], shuffle=False)
        images, labels = tf.train.batch([image, label],
                                        batch_size=batch_size,
                                        capacity=batch_size * 2)
    return images, labels

In [7]:
def createPedestrianTrainTFRecordsFile():
    src_dir = data_path + 'train'
    # src_dir = 'fruits_360_dataset_2018_01_02\Training'
    print('createPedestrianTrainTFRecordsFile', src_dir)
    tfrecords_name = 'pedestrian_train.tfrecords'
    samples_size, pedestrian_dict = createTFRecordsFile(src_dir=src_dir, tfrecords_name=tfrecords_name)
    print('createPedestrianTrainTFRecordsFile done')
    return samples_size, pedestrian_dict


def createPedestrianTestTFRecordsFile():
    src_dir = data_path + 'validation'
    # src_dir = 'fruits_360_dataset_2018_01_02\Validation'
    print('createPedestrianTestTFRecordsFile', src_dir)
    tfrecords_name = 'pedestrian_test.tfrecords'
    samples_size, pedestrian_dict = createTFRecordsFile(src_dir=src_dir, tfrecords_name=tfrecords_name)
    print('createPedestrianTestTFRecordsFile done')
    return samples_size, pedestrian_dict

In [8]:
train_samples_size, pedestrian_dict = createPedestrianTrainTFRecordsFile()  # 28736
test_samples_size, pedestrian_dict = createPedestrianTestTFRecordsFile()  # 9673

createPedestrianTrainTFRecordsFile kaggle_inria/train
totally 3074 samples
{0: 'no pedestrian', 1: 'pedestrian'}
createPedestrianTrainTFRecordsFile done
createPedestrianTestTFRecordsFile kaggle_inria/validation
totally 354 samples
{0: 'no pedestrian', 1: 'pedestrian'}
createPedestrianTestTFRecordsFile done


In [9]:
def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2by2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

def convolutional_layer(input_x, shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])
    return tf.nn.relu(conv2d(input_x, W) + b)

def normal_full_layer(input_layer, size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, W) + b

### Constants

In [10]:
# network parameters
num_input = IMAGE_HEIGHT * IMAGE_WIDTH * IMAGE_CHANNELS
num_classes = len(pedestrian_dict)

print(num_classes)

useCkpt = False

num_steps = 5000
batch_size = 50
update_step = 100
display_step = 100
train_acc_target = 1
train_acc_target_cnt = train_samples_size / batch_size

2


In [11]:
x = tf.placeholder(tf.float32,shape=[None,num_input],name='image')
print(x)

Tensor("image:0", shape=(?, 12288), dtype=float32)


In [12]:
x_image = tf.reshape(x,[-1,IMAGE_HEIGHT,IMAGE_WIDTH,IMAGE_CHANNELS])
print(x_image)

Tensor("Reshape:0", shape=(?, 64, 64, 3), dtype=float32)


In [13]:
convo_1 = convolutional_layer(x_image,shape=[6,6,3,32])
convo_1_pooling = max_pool_2by2(convo_1)

In [14]:
convo_2 = convolutional_layer(convo_1_pooling,shape=[6,6,32,64])
convo_2_pooling = max_pool_2by2(convo_2)

In [15]:
convo_2_flat = tf.reshape(convo_2_pooling,[-1,16*16*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))

In [16]:
y_pred = normal_full_layer(full_layer_one, 2)
print(y_pred)

Tensor("add_3:0", shape=(?, 2), dtype=float32)


In [17]:
saver = tf.train.Saver()

In [18]:
y_true = tf.placeholder(tf.float32,shape=[None,num_classes])

In [19]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

In [20]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)

In [21]:
correct_pred = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [22]:
init = tf.global_variables_initializer()

In [23]:
tfrecords_name = 'pedestrian_test.tfrecords'

In [24]:
def trainModel():
    for i in range(num_steps):
        batch_x, y = sess.run([images, labels])

        batch_y = np.zeros(shape=[batch_size, num_classes])

        for index in range(batch_size):
            batch_y[index, y[index]] = 1
        batch_x = np.reshape(batch_x, [batch_size, num_input])

        # run optimization op (backprop)
        sess.run(train, feed_dict={x: batch_x, y_true: batch_y})
        
        if i%100 == 0:
            print('Currently on step {}'.format(i))
            print('Accuracy is:')
            # Test the Train Model
            
            matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))

            acc = tf.reduce_mean(tf.cast(matches,tf.float32))

            print(sess.run(acc,feed_dict={x: batch_x, y_true: batch_y}))
            print('\n')
            
        

In [25]:
def testModel(images, labels):
    samples_untest = test_samples_size
    acc_sum = 0
    test_sample_sum = 0
    while samples_untest > 0:
        test_batch_size = batch_size
        test_images, y = sess.run([images, labels])
        test_labels = np.zeros(shape=[test_batch_size, num_classes])
        for i in range(test_batch_size):
            test_labels[i, y[i]] = 1

        test_images = np.reshape(test_images, [test_batch_size, num_input])
        acc = sess.run(accuracy, feed_dict={x: test_images, y_true: test_labels})
        acc_sum += acc * test_batch_size
        samples_untest -= test_batch_size
        test_sample_sum += test_batch_size
    print("Testing accuracy = ", \
          # sess.run(accuracy,feed_dict={X:mnist.test.images/255, Y:mnist.test.labels}))
          acc_sum / test_sample_sum)

In [26]:
# start training
with tf.Session() as sess:
    # run the initailizer
    sess.run(init)

    # train batch
    tfrecords_name = 'pedestrian_train.tfrecords'
    images, labels = inputs(tfrecords_name, batch_size, shuffle=True)
    # create coord
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    if useCkpt:
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            pass

    # train the model
    print("Start training")
    trainModel()
    print("Training finish!")
    
    
    digit_probabilities = tf.nn.softmax(y_pred, name='prediction')
    print(y_pred)
    print(digit_probabilities)
    print(x_image)

    tf.add_to_collection('model', digit_probabilities)          # Specify the graph nodes that we want to use later
    tf.add_to_collection('model_inputs', x_image)
    
    save_path = saver.save(sess,'./saved_models/model.ckpt')
    
    print("Model saved in path: %s" % save_path)
    
    # test batch
    tfrecords_name = 'pedestrian_test.tfrecords'
    images, labels = inputs(tfrecords_name, batch_size, shuffle=False)
    
    # create coord
    coord2 = tf.train.Coordinator()
    threads2 = tf.train.start_queue_runners(sess=sess, coord=coord2)

    # test the model
    testModel(images, labels)

    # close coord
    coord.request_stop()
    coord.join(threads)
    coord2.request_stop()
    coord2.join(threads2)
    sess.close()

Start training
Currently on step 0
Accuracy is:
0.42


Currently on step 100
Accuracy is:
0.8


Currently on step 200
Accuracy is:
0.76


Currently on step 300
Accuracy is:
0.94


Currently on step 400
Accuracy is:
0.9


Currently on step 500
Accuracy is:
0.96


Currently on step 600
Accuracy is:
1.0


Currently on step 700
Accuracy is:
1.0


Currently on step 800
Accuracy is:
1.0


Currently on step 900
Accuracy is:
1.0


Currently on step 1000
Accuracy is:
1.0


Currently on step 1100
Accuracy is:
1.0


Currently on step 1200
Accuracy is:
1.0


Currently on step 1300
Accuracy is:
1.0


Currently on step 1400
Accuracy is:
1.0


Currently on step 1500
Accuracy is:
1.0


Currently on step 1600
Accuracy is:
1.0


Currently on step 1700
Accuracy is:
1.0


Currently on step 1800
Accuracy is:
1.0


Currently on step 1900
Accuracy is:
1.0


Currently on step 2000
Accuracy is:
1.0


Currently on step 2100
Accuracy is:
1.0


Currently on step 2200
Accuracy is:
1.0


Currently on step 2300
Accu