In [54]:
%load_ext autoreload
%autoreload 2

import collections
import cv2
import numpy as np
import os
import random
import reader 
import tensorflow as tf


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [55]:
class convolution_learner(object):
    def __init__(self, num_classes, height=28, width=28):
        self.num_classes = num_classes
        self.height = height
        self.width = width
        
        self.tf_sess = tf.InteractiveSession()
    
    def network(self, learning_rate=1e-4):
        # input and output
        x = tf.placeholder(tf.float32, shape=[None, self.height*self.width])
        self.i = x
        y_ = tf.placeholder(tf.float32, shape=[None, self.num_classes])
        self.l = y_

        W_conv1 = weight_variable([5, 5, 1, 32])
        b_conv1 = bias_variable([32])

        x_image = tf.reshape(x, [-1, self.height, self.width, 1])
        # TODO: check order or height and width

        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])

        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)

        W_fc1 = weight_variable([(self.height/4)*(self.width/4)*64, 1024])
        b_fc1 = bias_variable([1024])

        h_pool2_flat = tf.reshape(h_pool2, [-1, (self.height/4)*(self.width/4)*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

        keep_prob = tf.placeholder(tf.float32)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
        self.keep_prob = keep_prob

        W_fc2 = weight_variable([1024, self.num_classes])
        b_fc2 = bias_variable([self.num_classes])

        output = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
        self.output = output

        cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(output), reduction_indices=[1]))

        # training
        self.train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
        #self.train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
        
        correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(output,1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
        #initialize
        self.tf_sess.run(tf.initialize_all_variables())
        
    def test_accuracy(self, test_source):
        images, labels = test_source.all()
        return self.accuracy.eval(feed_dict={self.i: images, self.l: labels, self.keep_prob: 1.0})
        
    def train(self, num_iterations, train_source, test_source):
        for i in xrange(num_iterations):
            if i % 100 == 0: 
                print i, self.test_accuracy(test_source)
                images, _ = test_source.all()

            images, labels = train_source.next_batch(30)
            self.train_step.run(feed_dict={self.i: images, self.l: labels, self.keep_prob: 1.0})
        
        print self.test_accuracy(test_source)
        
    def inference(self, images):
        return self.output.eval(feed_dict={self.i: images, self.keep_prob: 1.0})

In [None]:
height = 28*2
width = 28*2

train_source = reader.reader('/home/lee/Desktop/rooms', height=height, width=width)
test_source = reader.reader('/home/lee/Desktop/rooms_test', height=height, width=width)


numClasses = train.num_classes()

learner = convolution_learner(numClasses, height=height, width=width)
learner.network(learning_rate=0.0001)

learner.train(5000, train_source, test_source)

In [53]:
print "----TRAINING----"
train.print_classes()
print "----TESTING----"
test.print_classes()
print ""

from sklearn.metrics import confusion_matrix

images, labels = test_source.all()
output = tf.argmax(learner.inference(images), 1).eval()
labels = tf.argmax(labels, 1).eval()


print "----Confusion Matrix----"
confusion_matrix(output, labels)

----TRAINING----
dining_room 42
kitchen 44
living_room 57
----TESTING----
dining_room 15
kitchen 15
living_room 15

----Confusion Matrix----


array([[ 5,  1,  8],
       [ 2, 13,  0],
       [ 8,  1,  7]])