In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

import cv2

import os
import time
from datetime import datetime


tf.logging.set_verbosity(tf.logging.INFO)

  from ._conv import register_converters as _register_converters


In [30]:
width = 640
height = 480
pix = [height, width]
num_chan = 3
num_class = 2

In [3]:
# torn: 0-1484
# untorn: 0-1779
num_torn = 1485
num_untorn = 1780
total_pix = width*height*num_chan

torn_data = np.zeros((num_torn, total_pix), dtype=np.float32)
print(torn_data.shape)
untorn_data = np.zeros((num_untorn, total_pix), dtype=np.float32)

(1485, 921600)


In [4]:
torn_idx = np.arange(num_torn)
untorn_idx = np.arange(num_untorn)
print(torn_idx)

np.random.shuffle(torn_idx)
np.random.shuffle(untorn_idx)

print(torn_idx)

[   0    1    2 ... 1482 1483 1484]
[ 809 1233 1088 ... 1408  850  243]


In [5]:
torn_label = np.full((num_torn,1), 1, dtype=np.float32)
untorn_label = np.full((num_untorn,1), 1, dtype=np.float32)

In [6]:
# Training 80%, evaluation 20%
# Training data rate
tr_rate = 0.8


num_tr_torn = int(tr_rate * num_torn)
num_ev_torn = num_torn - num_tr_torn
print(num_tr_torn, num_ev_torn)

num_tr_untorn = int(tr_rate * num_untorn)
num_ev_untorn = num_untorn - num_tr_untorn
print(num_tr_untorn, num_ev_untorn)

tr_torn_idx = torn_idx[:num_tr_torn]
ev_torn_idx = torn_idx[num_tr_torn:]

tr_untorn_idx = untorn_idx[:num_tr_untorn]
ev_untorn_idx = untorn_idx[num_tr_untorn:]


tr_idx = np.append(tr_torn_idx, tr_untorn_idx)
ev_idx = np.append(ev_torn_idx, ev_untorn_idx)

tr_label = np.append(np.full((num_tr_torn), 1, dtype=np.int32), np.full((num_tr_untorn), 0, dtype=np.int32))
ev_label = np.append(np.full((num_ev_torn), 1, dtype=np.int32), np.full((num_ev_untorn), 0, dtype=np.int32))

print(tr_label[num_tr_torn-1], tr_label[num_tr_torn])

1188 297
1424 356
1 0


In [7]:
# Shuffle
tr_shuf_idx = np.arange(tr_label.shape[0])
np.random.shuffle(tr_shuf_idx)

ev_shuf_idx = np.arange(ev_label.shape[0])
np.random.shuffle(ev_shuf_idx)


tr_data_shuf = np.zeros((tr_label.shape[0], total_pix), dtype = np.float32)
tr_label_shuf = np.zeros(tr_label.shape[0], dtype=np.int32)


ev_data_shuf = np.zeros((ev_label.shape[0], total_pix), dtype = np.float32)
ev_label_shuf = np.zeros(ev_label.shape[0], dtype=np.int32)

In [22]:
print(tr_shuf_idx[0:3])
print(tr_label[2566])
print(tr_idx[2566])

[ 150 1742 2566]
0
946


In [23]:
for i in range(tr_label.shape[0]):
    if tr_label[tr_shuf_idx[i]] == 1:
        tr_label_shuf[i] = 1
        img = cv2.imread("/dataset/torn/img-%04d.png" % tr_idx[tr_shuf_idx[i]]).flatten()
        tr_data_shuf[i][:] = img
        
    else:
        tr_label_shuf[i] = 0
        img = cv2.imread("/dataset/untorn/img-%04d.png" % tr_idx[tr_shuf_idx[i]]).flatten()      
        tr_data_shuf[i][:] = img

In [24]:
for i in range(ev_label.shape[0]):
    if ev_label[ev_shuf_idx[i]] == 1:
        ev_label_shuf[i] = 1
        img = cv2.imread("/dataset/torn/img-%04d.png" % ev_idx[ev_shuf_idx[i]]).flatten()
        ev_data_shuf[i][:] = img
        
    else:
        ev_label_shuf[i] = 0
        img = cv2.imread("/dataset/untorn/img-%04d.png" % ev_idx[ev_shuf_idx[i]]).flatten()
        ev_data_shuf[i][:] = img

In [25]:
today = datetime.today().strftime("%Y-%m-%d-%H-%M-%S")
print(today)

2018-04-26-08-54-10


In [26]:
today = datetime.today().strftime("%Y-%m-%d-%H-%M-%S")

np.savetxt("./data_index/tr_idx-%s.csv" % today, tr_shuf_idx, delimiter=',')
np.savetxt("./data_index/tr_label-%s.csv" % today, tr_label_shuf, delimiter=',')


np.savetxt("./data_index/ev_idx-%s.csv" % today, ev_shuf_idx, delimiter=',')
np.savetxt("./data_index/ev_label-%s.csv" % today, ev_label_shuf, delimiter=',')

In [27]:
def init_weight(shape):
    return tf.Varaible(tf.random_normal(shape, stddev=0.01))

In [31]:
X = tf.placeholder("float", [None, total_pix])
Y = tf.placeholder("float", [None, num_class])
p_keep = tf.placeholder("float")

In [40]:
def cnn_model_fn(features, keep_rate):
    
    """Model function for CNN."""
    # Input Layer
    # Reshape X to 4-D tensor: [batch_size, height, width, channels]
    # Our Fishing net image size is 640x480 and 3-channel (RGB)
    input_layer = tf.reshape(features, [-1, 480, 640, 3])

    # Convolutional Layer #1
    # Computes 48 features using a 5x5 filter with ReLU activation.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 480, 640, 3]
    # Output Tensor Shape: [batch_size, 480, 640, 48]
    conv1 = tf.layers.conv2d(
        
        inputs=input_layer,
        filters=48,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)

    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 480, 640, 48]
    # Output Tensor Shape: [batch_size, 240, 320, 48]
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
    #pool1 = tf.nn.dropout(pool1, keep_rate)
    

    # Convolutional Layer #2
    # Computes 96 features using a 5x5 filter.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 240, 320, 48]
    # Output Tensor Shape: [batch_size, 240, 320, 96]
    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=96,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)

    # Pooling Layer #2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 240, 320, 96]
    # Output Tensor Shape: [batch_size, 120, 160, 96]
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    #pool2 = tf.nn.dropout(pool2, keep_rate)

    
    
    
    # Input Tensor Shape: [batch_size, 120, 160, 96]
    # Output Tensor Shape: [batch_size, 120, 160, 96]    
    conv3 = tf.layers.conv2d(
        inputs=pool2,
        filters=96,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    
    # Input Tensor Shape: [batch_size, 120, 160, 96]
    # Output Tensor Shape: [batch_size, 60, 80, 96]
    pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)
    #pool3 = tf.nn.dropout(pool3, keep_rate)
    
    
    
    # Input Tensor Shape: [batch_size, 60, 80, 96]
    # Output Tensor Shape: [batch_size, 60, 80, 96]    
    conv4 = tf.layers.conv2d(
        inputs=pool3,
        filters=96,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    
    # Input Tensor Shape: [batch_size, 60, 80, 96]
    # Output Tensor Shape: [batch_size, 30, 40, 96]
    pool4 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2)
    #pool4 = tf.nn.dropout(pool4, keep_rate)
    
    
    
    # Input Tensor Shape: [batch_size, 30, 40, 96]
    # Output Tensor Shape: [batch_size, 30, 40, 96]    
    conv5 = tf.layers.conv2d(
        inputs=pool4,
        filters=96,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    
    # Input Tensor Shape: [batch_size, 30, 40, 96]
    # Output Tensor Shape: [batch_size, 15, 20, 96]
    pool5 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[2, 2], strides=2)
    #pool5 = tf.nn.dropout(pool5, keep_rate)
    
    
    
    
    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 15, 20, 96]
    # Output Tensor Shape: [batch_size, 15 * 20 * 96]
    pool5_flat = tf.reshape(pool5, [-1, 15 * 20 * 96])
    


    # Dense Layer
    # Densely connected layer with 1024 neurons
    # Input Tensor Shape: [batch_size, 15 * 20 * 96]
    # Output Tensor Shape: [batch_size, 1024]
    dense = tf.layers.dense(inputs=pool5_flat, units=1024, activation=tf.nn.relu)
    
    
    dropout = tf.layers.dropout(inputs=dense, rate=(1-keep_rate))
    

    # Logits layer
    # Input Tensor Shape: [batch_size, 1024]
    # Output Tensor Shape: [batch_size, 1]
    logits = tf.layers.dense(inputs=dropout, units=2)

    return logits

In [41]:
py_x = cnn_model_fn(X, p_keep)
loss = tf.losses.sparse_softmax_cross_entropy(labels = Y, logits = py_x)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())


ValueError: Can not squeeze dim[1], expected a dimension of 1, got 2 for 'sparse_softmax_cross_entropy_loss/remove_squeezable_dimensions/Squeeze' (op: 'Squeeze') with input shapes: [?,2].

In [None]:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
sess = tf.Session(config=config)

In [None]:
epoch = 1
batch_size = 128
keep_r = 0.5

In [None]:
tf.global_variables_initializer().run()

training_steps = int(epoch*tr_label_shuf.shape[0]/batch_size)

#tr_idx_select = np.arange(tr_label_shuf.shape[0])

# without random selection
for i in range(training_steps):
    if i != range(training_steps)[-1]:
        sess.run(optimizer, feed_dict={X: tr_data_shuf[i*batch_size:(i+1)*batch_size],
                                      Y: tr_label_shuf[i*batch_size:(i+1)*batch_size],
                                      keep_rate: keep_r})
    else:
        sess.run(optimizer, feed_dict={X: tr_data_shuf[i*batch_size:],
                                      Y: tr_label_shuf[i*batch_size:],
                                      keep_rate: keep_r})
    