In [1]:
import numpy as np
import os
import glob
import tensorflow as tf
import math
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
# Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10

# Network Parameters
n_input = 9000 # MNIST data input (img shape: 50*60*3)
n_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units

In [3]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input], name = 'placeholder_x')
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32, name = 'keep_prob') #dropout (keep probability)

In [4]:
# Create model
def conv2d(img, w, b):
    return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(img, w,strides=[1, 1, 1, 1],padding='VALID'),b))

def max_pool(img, k):
    return tf.nn.max_pool(img, ksize=[1, k, k, 1],strides=[1, k, k, 1],padding='VALID')

In [5]:
# Store layers weight & bias

wc1 = tf.Variable(tf.random_normal([5, 5, 3, 32])) # 5x5 conv, 1 input, 32 outputs
wc2 = tf.Variable(tf.random_normal([5, 5, 32, 64])) # 5x5 conv, 32 inputs, 64 outputs
wd1 = tf.Variable(tf.random_normal([9*12*64, 1024])) # fully connected, 7*7*64 inputs, 1024 outputs
wout = tf.Variable(tf.random_normal([1024, n_classes])) # 1024 inputs, 10 outputs (class prediction)


bc1 = tf.Variable(tf.random_normal([32]))
bc2 = tf.Variable(tf.random_normal([64]))
bd1 = tf.Variable(tf.random_normal([1024]))
bout = tf.Variable(tf.random_normal([n_classes]))

In [6]:
# Construct model
_X = tf.reshape(x, shape=[-1, 50, 60, 3])


# Convolution Layer
conv1 = conv2d(_X,wc1,bc1)

# Max Pooling (down-sampling)
conv1 = max_pool(conv1, k=2)

# Apply Dropout
conv1 = tf.nn.dropout(conv1,keep_prob)


# Convolution Layer
conv2 = conv2d(conv1,wc2,bc2)

# Max Pooling (down-sampling)
conv2 = max_pool(conv2, k=2)

# Apply Dropout
conv2 = tf.nn.dropout(conv2, keep_prob)

In [7]:
# Fully connected layer
dense1 = tf.reshape(conv2, [-1, wd1.get_shape().as_list()[0]]) # Reshape conv2 output to fit dense layer input
dense1 = tf.nn.relu(tf.add(tf.matmul(dense1, wd1),bd1)) # Relu activation
dense1 = tf.nn.dropout(dense1, keep_prob) # Apply Dropout

# Output, class prediction
with tf.name_scope("pred"):
    pred = tf.add(tf.matmul(dense1, wout), bout)

#pred = conv_net(x, weights, biases, keep_prob)


regularizers = (tf.nn.l2_loss(wc1) + tf.nn.l2_loss(bc1)+(tf.nn.l2_loss(wc2) + tf.nn.l2_loss(bc2))+
                 (tf.nn.l2_loss(wd1) + tf.nn.l2_loss(bd1))+(tf.nn.l2_loss(wout) + tf.nn.l2_loss(bout)))

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) + 1e-6*regularizers
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [8]:
def get_pic(filename):
    img = Image.open(filename).convert('HSV')
    img = img.resize((50,60), Image.BILINEAR)
    arr = np.array(img)
    ## make a 1-dimensional view of arr
    flat_arr = arr.ravel()
    flat_arr= flat_arr.reshape((1,9000))
    return flat_arr

In [9]:
def get_label(filename):
    if filename[17]=='0':
        return np.array([1,0,0,0,0,0,0,0,0,0])
    if filename[17]=='1':
        return np.array([0,1,0,0,0,0,0,0,0,0])
    if filename[17]=='2':
        return np.array([0,0,1,0,0,0,0,0,0,0])
    if filename[17]=='3':
        return np.array([0,0,0,1,0,0,0,0,0,0])
    if filename[17]=='4':
        return np.array([0,0,0,0,1,0,0,0,0,0])
    if filename[17]=='5':
        return np.array([0,0,0,0,0,1,0,0,0,0])
    if filename[17]=='6':
        return np.array([0,0,0,0,0,0,1,0,0,0])
    if filename[17]=='7':
        return np.array([0,0,0,0,0,0,0,1,0,0])
    if filename[17]=='8':
        return np.array([0,0,0,0,0,0,0,0,1,0])
    if filename[17]=='9':
        return np.array([0,0,0,0,0,0,0,0,0,1])

In [10]:
def tget_label(filename):
    if filename[16]=='0':
        return np.array([1,0,0,0,0,0,0,0,0,0])
    if filename[16]=='1':
        return np.array([0,1,0,0,0,0,0,0,0,0])
    if filename[16]=='2':
        return np.array([0,0,1,0,0,0,0,0,0,0])
    if filename[16]=='3':
        return np.array([0,0,0,1,0,0,0,0,0,0])
    if filename[16]=='4':
        return np.array([0,0,0,0,1,0,0,0,0,0])
    if filename[16]=='5':
        return np.array([0,0,0,0,0,1,0,0,0,0])
    if filename[16]=='6':
        return np.array([0,0,0,0,0,0,1,0,0,0])
    if filename[16]=='7':
        return np.array([0,0,0,0,0,0,0,1,0,0])
    if filename[16]=='8':
        return np.array([0,0,0,0,0,0,0,0,1,0])
    if filename[16]=='9':
        return np.array([0,0,0,0,0,0,0,0,0,1])

In [11]:
def get_rank_pic(path,i):
    first = True
    for filename in glob.glob(os.path.join(path, '*.png'))[i:i+batch_size]:
        if first==False:
            rank_arr = flat_arr
        flat_arr = get_pic(filename)
        if first==False:
            flat_arr = np.vstack((rank_arr, flat_arr))
        first = False
    return flat_arr

In [12]:
def get_rank_lable(path,i):
    first = True
    for filename in glob.glob(os.path.join(path, '*.png'))[i:i+batch_size]:
        if first==False:
            rank_label = flat_label
        flat_label = get_label(filename)
        flat_label = flat_label.reshape((1,10))
        if first==False:
            flat_label = np.vstack((rank_label, flat_label))
        first = False
    return flat_label

In [13]:
def tget_rank_lable(path,i):
    first = True
    for filename in glob.glob(os.path.join(path, '*.png'))[i:i+batch_size]:
        if first==False:
            rank_label = flat_label
        flat_label = tget_label(filename)
        flat_label = flat_label.reshape((1,10))
        if first==False:
            flat_label = np.vstack((rank_label, flat_label))
        first = False
    return flat_label

In [14]:
# Initializing the variables
init = tf.global_variables_initializer()

In [15]:
# Launch the graph
path = 'rgbdata/training'
saver = tf.train.Saver()
sess=tf.Session()
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
    flat_arr = get_rank_pic(path,step)
    flat_label = get_rank_lable(path,step)
    #print flat_arr.shape
    #print flat_label.shape
    batch_xs = flat_arr
    batch_ys = flat_label
    # Fit training using batch data
    sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout})
    if step % display_step == 0:
        # Calculate batch accuracy
        acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
        # Calculate batch loss
        loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
        print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc)
    step += 1
print "Optimization Finished!"
# Test model
test_images = get_rank_pic('rgbdata/testing',0)
test_labels = tget_rank_lable('rgbdata/testing',0)
# Calculate accuracy for 256 mnist test images
print "Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_images, y: test_labels, keep_prob: 1.})

Iter 1280, Minibatch Loss= 6387220.500000, Training Accuracy= 0.23438
Iter 2560, Minibatch Loss= 6887334.500000, Training Accuracy= 0.32812
Iter 3840, Minibatch Loss= 3868294.000000, Training Accuracy= 0.46875
Iter 5120, Minibatch Loss= 3380660.500000, Training Accuracy= 0.50000
Iter 6400, Minibatch Loss= 2309599.500000, Training Accuracy= 0.54688
Iter 7680, Minibatch Loss= 1881872.875000, Training Accuracy= 0.61719
Iter 8960, Minibatch Loss= 1403372.750000, Training Accuracy= 0.66406
Iter 10240, Minibatch Loss= 1081899.750000, Training Accuracy= 0.73438
Iter 11520, Minibatch Loss= 916352.062500, Training Accuracy= 0.77344
Iter 12800, Minibatch Loss= 754082.687500, Training Accuracy= 0.75781
Iter 14080, Minibatch Loss= 700404.062500, Training Accuracy= 0.77344
Iter 15360, Minibatch Loss= 493137.718750, Training Accuracy= 0.81250
Iter 16640, Minibatch Loss= 499505.656250, Training Accuracy= 0.83594
Iter 17920, Minibatch Loss= 388595.812500, Training Accuracy= 0.86719
Iter 19200, Minibat

In [16]:
saver.save(sess, 'rgbcnnmodel')

'rgbcnnmodel'

In [17]:
def testing(filename):
    stest_images = get_pic(filename)
    print sess.run(tf.argmax(pred,1), feed_dict={x: stest_images, keep_prob: 1.})

In [18]:
pred

<tf.Tensor 'pred/Add:0' shape=(?, 10) dtype=float32>

In [19]:
filename = 'rgbdata/testing/2_216.png'
testing(filename)

[2]
