# Gap / Char Classification
Testing Tensorflow with simple models (Sigmoid and Sofmax)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
# Helper functions - ploting and resizing
from ocr.helpers import implt

# Creating CSV
import glob
import csv

print("OpenCV: " + cv2.__version__)
print("Numpy: " + np.__version__)
print("TensorFlow: " + tf.__version__)

OpenCV: 3.1.0-dev
Numpy: 1.12.0
TensorFlow: 1.0.0-rc1


## Load Images and Lables in CSV

In [3]:
images = np.genfromtxt('data/gapdet/data.csv', delimiter=',')
labels = np.genfromtxt('data/gapdet/labels.csv', delimiter=',')
labels = np.reshape(labels, (len(labels), 1))

print("CSV data files loaded.")
print("Number of images: " + str(len(images)))

# Separate data into training/test dataset
div = int(0.8 * len(images))

trainData = images[0:div]
trainLabels = labels[0:div]
evalData = images[div:]
evalLabels = labels[div:]
print("Training images: %g" % div)

CSV data files loaded.
Number of images: 10043
Training images: 8034


# Create classifier

### Simoid

In [4]:
# Place holders for data and labels
x = tf.placeholder(tf.float32, [None, 1800], name = "x")
y_ = tf.placeholder(tf.float32, [None, 1], name = "y_")

# Variables
W = tf.Variable(tf.random_normal([1800, 1]), name = "W")
b = tf.Variable(tf.random_normal([1]), name = "b")

y = tf.matmul(x, W) + b

# Training Model
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y, name="Cost"))
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

# Evaluating model
correct_prediction = tf.equal(tf.sigmoid(y) , y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

# For saving and using model
activation = tf.sigmoid(tf.matmul(x, W) + b)
tf.add_to_collection("activation", activation)

for i in range(5000):
    # Training
    sess.run(train_step, feed_dict={x: trainData, y_: trainLabels})
    
    if i % 500 == 0:
        # Printing stats
        cost = cross_entropy.eval(feed_dict={x: trainData, y_: trainLabels})
        acc = accuracy.eval(feed_dict={x: trainData, y_: trainLabels})
        print("%d #  Cost: %g  Accuracy: %g" % (i, cost, acc))


# Saving model
saver = tf.train.Saver()
saver.save(sess, 'models/CGClassifier')
saver.export_meta_graph('models/CGClassifier.meta')

print("Eval accuracy: %g" % sess.run(accuracy, feed_dict={x: evalData, y_: evalLabels}))
print("Total accuracy: %g" % sess.run(accuracy, feed_dict={x: images, y_: labels}))

sess.close()

0 #  Cost: 896.013  Accuracy: 0.595096
500 #  Cost: 62.2901  Accuracy: 0.706622
1000 #  Cost: 31.6872  Accuracy: 0.691187
1500 #  Cost: 20.5937  Accuracy: 0.672268
2000 #  Cost: 16.4272  Accuracy: 0.668907
2500 #  Cost: 12.516  Accuracy: 0.657207
3000 #  Cost: 10.4846  Accuracy: 0.644635
3500 #  Cost: 9.44974  Accuracy: 0.631317
4000 #  Cost: 13.0443  Accuracy: 0.669032
4500 #  Cost: 6.65418  Accuracy: 0.599701
Eval accuracy: 0.503733
Total accuracy: 0.565269


### Sofmax

In [5]:
def sofmaxLabels(lab):
    """ Transform labels for sofmax classification """
    newLab = np.zeros((len(lab), 2), dtype=np.int8)
    for i in range(len(lab)):
        newLab[i][int(lab[i][0])] = 1
    return newLab

llabels = sofmaxLabels(labels)
trainLabels = llabels[0:div]
evalLabels = llabels[div:]

In [6]:
sess = tf.InteractiveSession()

# Placeholders for data and labels
x = tf.placeholder(tf.float32, [None, 1800])
y_ = tf.placeholder(tf.float32, [None, 2])

# Variables
W = tf.Variable(tf.random_normal([1800, 2]))
b = tf.Variable(tf.random_normal([2]))

y = tf.matmul(x,W) + b

# Cost function and optimizet
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

# Evaluation
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess.run(tf.global_variables_initializer())

for i in range(5000):
    # Training
    sess.run(train_step, feed_dict={x: trainData, y_: trainLabels})
    
    if i % 500 == 0:
        # Printing stats
        cost = cross_entropy.eval(feed_dict={x: trainData, y_: trainLabels})
        acc = accuracy.eval(feed_dict={x: trainData, y_: trainLabels})
        print("%d #  Cost: %g  Accuracy: %g" % (i, cost, acc))

    

print("Eval accuracy: %g" % accuracy.eval(feed_dict={x: evalData, y_: evalLabels}))
print("Total accuracy: %g" % accuracy.eval(feed_dict={x: images, y_: llabels}))

sess.close()

0 #  Cost: 1266.78  Accuracy: 0.63754
500 #  Cost: 68.9104  Accuracy: 0.790142
1000 #  Cost: 37.6502  Accuracy: 0.786532
1500 #  Cost: 31.6991  Accuracy: 0.826736
2000 #  Cost: 31.6826  Accuracy: 0.808439
2500 #  Cost: 19.9402  Accuracy: 0.835574
3000 #  Cost: 13.1255  Accuracy: 0.814538
3500 #  Cost: 11.0731  Accuracy: 0.822753
4000 #  Cost: 12.0895  Accuracy: 0.838935
4500 #  Cost: 15.069  Accuracy: 0.804456
Eval accuracy: 0.670981
Total accuracy: 0.806432
