In [1]:
import cv2
import os
import tensorflow as tf

In [2]:
import glob

In [68]:
import numpy as np

In [3]:
tf.app.flags.DEFINE_string('train_images_path', './datasets/images/train', 'Path to training images.')
tf.app.flags.DEFINE_string('test_images_path', './datasets/images/test', 'Path to training images.')
tf.app.flags.DEFINE_string('model_output_path', './datasets/output/', 'Path to model checkpoint.')

FLAGS = tf.app.flags.FLAGS

In [69]:
def read_captcha_images(image_path):
    if not os.path.exists(image_path):
        raise ValueError('image path is not exist')
        
    images = []
    labels = []
    images_path = os.path.join(image_path, '*.jpg')
    count = 0
    for image_file in glob.glob(images_path):
        count += 1
        if count % 5000 == 0:
            print('Load {} images.'.format(count))
        image = cv2.imread(image_file)
        
        #image = image[:, :, (2, 1, 0)] # change channel
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = int(image_file.split('_')[-1].split('.')[0])
        images.append(image)
        labels.append(label)
    images = np.array(images)
    labels = np.array(labels)
    return images, labels

In [5]:
def preprocess(inputs):
    preprocessed_inputs = tf.to_float(inputs)
    preprocessed_inputs = tf.subtract(preprocessed_inputs, 128.0)
    preprocessed_inputs = tf.div(preprocessed_inputs, 128.0)
    return preprocessed_inputs

In [79]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)



In [101]:
"""cnn 模型层级：
    ·conv1
    ·conv2
    ·pool3
    ·conv4
    ·conv5
    ·pool6
    ·conv7
    ·conv8
    ·fc9
    ·fc10"""
def cnn(x):
    #[n, 28, 28, 3]
    with tf.name_scope('reshape1'):
        x_image = tf.reshape(x, [-1, 28, 28, 3])
        
    #[n, 28, 28, 32]
    with tf.name_scope('conv1'):
        W_conv1 = weight_variable([3, 3, 3, 32])
        b_conv1 = bias_variable([32])
        layer_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    
     #[n, 28, 28, 32]
    with tf.name_scope('conv2'):
        W_conv2 = weight_variable([3, 3, 32, 32])
        b_conv2 = bias_variable([32])
        layer_conv2 = tf.nn.relu(conv2d(layer_conv1, W_conv2) + b_conv2)
        
    #[n, 14, 14, 32]
    with tf.name_scope('pool3'):
        layer_pool3 = max_pool_2x2(layer_conv2)
    
    #[n, 14, 14, 64]
    with tf.name_scope('conv4'):
        W_conv4 = weight_variable([3, 3, 32, 64])
        b_conv4 = bias_variable([64])
        layer_conv4 = tf.nn.relu(conv2d(layer_pool3, W_conv4) + b_conv4)
    
    #[n, 14, 14, 64]
    with tf.name_scope('conv5'):
        W_conv5 = weight_variable([3, 3, 64, 64])
        b_conv5 = bias_variable([64])
        layer_conv5 = tf.nn.relu(conv2d(layer_conv4, W_conv5) + b_conv5) 
    
    #[n, 7, 7, 64]
    with tf.name_scope('pool6'):
        layer_pool6 = max_pool_2x2(layer_conv5)
        
    #[n, 7, 7, 128]
    print(layer_pool6.shape)
    with tf.name_scope('conv7'):
        W_conv7 = weight_variable([3, 3, 64, 128])
        b_conv7 = bias_variable([128])
        layer_conv7 = tf.nn.relu(conv2d(layer_pool6, W_conv7) + b_conv7)
        
    #[n, 7, 7, 128]
    with tf.name_scope('conv8'):
        W_conv8 = weight_variable([3, 3, 128, 128])
        b_conv8 = bias_variable([128])
        layer_conv8 = tf.nn.relu(conv2d(layer_conv7, W_conv8) + b_conv8)
    
    #[n, 7, 7, 128]
    with tf.name_scope('reshape9'):
        layer_reshape9 = tf.reshape(layer_conv8, [-1, 7 * 7 * 128])
        
    #[n, 7, 7, 128]
    with tf.name_scope('fc10'):
        W_fc10 = weight_variable([7 * 7 * 128, 1024])
        b_fc10 = bias_variable([1024])
        layer_fc10 = tf.nn.relu(tf.matmul(layer_reshape9, W_fc10) + b_fc10)
        
    with tf.name_scope('dropout11'):
        dropout_prob = tf.placeholder(tf.float32)
        layer_dropout11 = tf.nn.dropout(layer_fc10, dropout_prob)
        
    with tf.name_scope('fc12'):
        W_fc12 = weight_variable([1024, 10])
        b_fc12 = bias_variable([10])
        y = tf.matmul(layer_dropout11, W_fc12) + b_fc12
        
    return y, dropout_prob

In [27]:
def loss(labels, predict_labels):
#     cross_entropy = tf.reduce_mean(
#         tf.nn.sparse_softmax_cross_entropy_with_logits(
#             logits=predict_labels, labels=labels))
    
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
            labels = labels, logits = predict_labels)
    cross_entropy = tf.reduce_mean(cross_entropy)
    return cross_entropy

In [28]:
def next_batch_set(images, labels, batch_size=128):
    """Generate a batch training data.
    
    Args:
        images: A 4-D array representing the training images.
        labels: A 1-D array representing the classes of images.
        batch_size: An integer.
        
    Return:
        batch_images: A batch of images.
        batch_labels: A batch of labels.
    """
    indices = np.random.choice(len(images), batch_size)
    batch_images = images[indices]
    batch_labels = labels[indices]
    return batch_images, batch_labels

In [99]:
def main(_):
    
    x = tf.placeholder(tf.float32, shape=[None, 28, 28, 3], name='inputs')
    y_ = tf.placeholder(tf.int32, shape=[None], name='labels')
    
    x = preprocess(x)
    
    predict_labels, dropout_prob = cnn(x)
    
    with tf.name_scope('loss'):
        cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=predict_labels, labels=y_))

    
    with tf.name_scope('adam_optimizer'):
        #train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(0.1, global_step, 150, 0.9)
        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
        train_step = optimizer.minimize(cross_entropy, global_step)
    
    with tf.name_scope('accuracy'):
        logits = tf.nn.softmax(predict_labels)
        classes = tf.cast(tf.argmax(logits, axis=1), dtype=tf.int32)
        classes_ = tf.identity(classes, name='classes')
        accuracy = tf.reduce_mean(tf.cast(tf.equal(classes, y_), 'float'))
        
#         correct_prediction = tf.equal(tf.argmax(predict_labels, 1), y_)
#         correct_prediction = tf.cast(correct_prediction, tf.float32)
#         accuracy = tf.reduce_mean(correct_prediction)
        
    train_images, train_targets = read_captcha_images(FLAGS.train_images_path)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(1000):
            batch_images, batch_labels = next_batch_set(train_images, train_targets)
            train_dict = {x: batch_images, y_: batch_labels, dropout_prob: 1.0}
            sess.run(train_step, feed_dict=train_dict)
            
            if i % 100 == 0:
                loss_, acc_ = sess.run([cross_entropy, accuracy], feed_dict=train_dict)
                train_text = 'step: {}, loss: {}, acc: {}'.format(i+1, loss_, acc_)
                print(train_text)
#                 train_accuracy = accuracy.eval(feed_dict={
#                     x: batch_images, y_: batch_labels, dropout_prob: 1.0})
#                 print('step %d, training accuracy %g' % (i, train_accuracy))
                
            #train_step.run(feed_dict={x: batch_images, y_: batch_labels, dropout_prob: 0.5})

        test_images, test_targets = read_captcha_images(FLAGS.test_images_path)
        print('test accuracy %g' % accuracy.eval(feed_dict={
            x: test_images, y_: test_targets, dropout_prob: 1.0}))

In [None]:
if __name__ == '__main__':
    tf.app.run(main=main)

(?, 7, 7, 64)
Load 5000 images.
Load 10000 images.
Load 15000 images.
Load 20000 images.
Load 25000 images.
Load 30000 images.
Load 35000 images.
Load 40000 images.
step: 1, loss: 3.218753188807743e+27, acc: 0.1171875
step: 101, loss: nan, acc: 0.1015625
step: 201, loss: nan, acc: 0.109375
step: 301, loss: nan, acc: 0.1171875
step: 401, loss: nan, acc: 0.0390625
step: 501, loss: nan, acc: 0.0703125
step: 601, loss: nan, acc: 0.0859375
step: 701, loss: nan, acc: 0.078125
step: 801, loss: nan, acc: 0.0859375
step: 901, loss: nan, acc: 0.078125
step: 1001, loss: nan, acc: 0.1484375
step: 1101, loss: nan, acc: 0.1640625
step: 1201, loss: nan, acc: 0.1015625
step: 1301, loss: nan, acc: 0.09375
step: 1401, loss: nan, acc: 0.140625
step: 1501, loss: nan, acc: 0.1328125
step: 1601, loss: nan, acc: 0.125
step: 1701, loss: nan, acc: 0.1328125
step: 1801, loss: nan, acc: 0.0859375
step: 1901, loss: nan, acc: 0.0546875
step: 2001, loss: nan, acc: 0.1015625
step: 2101, loss: nan, acc: 0.1015625
ste