In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
import tensorlayer as tl
import cv2

import os

tf.logging.set_verbosity(tf.logging.INFO)


  from ._conv import register_converters as _register_converters


In [2]:
def cnn_model_fn(features, labels, mode):
    
    """Model function for CNN."""
    # Input Layer
    # Reshape X to 4-D tensor: [batch_size, height, width, channels]
    # Our Fishing net image size is 640x480 and 3-channel (RGB)
    input_layer = tf.reshape(features["x"], [-1, 480, 640, 3])

    # Convolutional Layer #1
    # Computes 48 features using a 5x5 filter with ReLU activation.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 480, 640, 3]
    # Output Tensor Shape: [batch_size, 480, 640, 48]
    conv1 = tf.layers.conv2d(
        
        inputs=input_layer,
        filters=48,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)

    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 480, 640, 48]
    # Output Tensor Shape: [batch_size, 240, 320, 48]
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Convolutional Layer #2
    # Computes 96 features using a 5x5 filter.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 240, 320, 48]
    # Output Tensor Shape: [batch_size, 240, 320, 96]
    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=96,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)

    # Pooling Layer #2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 240, 320, 96]
    # Output Tensor Shape: [batch_size, 120, 160, 96]
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    
    
    
    # Input Tensor Shape: [batch_size, 120, 160, 96]
    # Output Tensor Shape: [batch_size, 120, 160, 96]    
    conv3 = tf.layers.conv2d(
        inputs=pool2,
        filters=96,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    # Input Tensor Shape: [batch_size, 120, 160, 96]
    # Output Tensor Shape: [batch_size, 60, 80, 96]
    pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)
    
    
    
    # Input Tensor Shape: [batch_size, 60, 80, 96]
    # Output Tensor Shape: [batch_size, 60, 80, 96]    
    conv4 = tf.layers.conv2d(
        inputs=pool3,
        filters=96,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    # Input Tensor Shape: [batch_size, 60, 80, 96]
    # Output Tensor Shape: [batch_size, 30, 40, 96]
    pool4 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2)
    
    
    
    # Input Tensor Shape: [batch_size, 30, 40, 96]
    # Output Tensor Shape: [batch_size, 30, 40, 96]    
    conv5 = tf.layers.conv2d(
        inputs=pool4,
        filters=96,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    # Input Tensor Shape: [batch_size, 30, 40, 96]
    # Output Tensor Shape: [batch_size, 15, 20, 96]
    pool5 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[2, 2], strides=2)
    
    
    
    
    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 15, 20, 96]
    # Output Tensor Shape: [batch_size, 15 * 20 * 96]
    pool5_flat = tf.reshape(pool5, [-1, 15 * 20 * 96])


    # Dense Layer
    # Densely connected layer with 1024 neurons
    # Input Tensor Shape: [batch_size, 15 * 20 * 96]
    # Output Tensor Shape: [batch_size, 1024]
    dense = tf.layers.dense(inputs=pool5_flat, units=1024, activation=tf.nn.relu)

    # Add dropout operation; 0.6 probability that element will be kept
    dropout = tf.layers.dropout(
        inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits layer
    # Input Tensor Shape: [batch_size, 1024]
    # Output Tensor Shape: [batch_size, 1]
    logits = tf.layers.dense(inputs=dropout, units=2)

    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(input=logits, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    # sparse_softmax_cross_entropy cannot use one-hot encoding
    
    #loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
        "accuracy": tf.metrics.accuracy(
            labels=labels, predictions=predictions["classes"])}
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

## Code Run


### Training
Batch size: 128  
Epoch: 5  
Shuffle: True  
Step: 20000

## Loading dataset
Load dataset and split them into training data and evaluation data

In [3]:
# torn: 0-1484
# untorn: 0-1779
num_torn = 1485
num_untorn = 1780
total_pix = 480*640*3

torn_data = np.zeros((num_torn, total_pix), dtype=np.float32)
print(torn_data.shape)
untorn_data = np.zeros((num_untorn, total_pix), dtype=np.float32)

(1485, 921600)


In [4]:
torn_idx = np.arange(num_torn)
untorn_idx = np.arange(num_untorn)
print(torn_idx)

np.random.shuffle(torn_idx)
np.random.shuffle(untorn_idx)

print(torn_idx)

[   0    1    2 ... 1482 1483 1484]
[ 259  612  262 ...  268  191 1242]


In [5]:
for i in range(num_torn):
    img = cv2.imread("/dataset/torn/img-%04d.png" % torn_idx[i]).flatten()
    torn_data[i][:] = img

In [6]:
for i in range(num_untorn):
    img = cv2.imread("/dataset/untorn/img-%04d.png" % untorn_idx[i]).flatten()
    untorn_data[i][:] = img

In [7]:
torn_label = np.full((num_torn,1), 1, dtype=np.float32)
untorn_label = np.full((num_untorn,1), 1, dtype=np.float32)

In [8]:
# 80% training, 20% evaluation
tr_rate = 0.8
num_tr_torn = int(tr_rate * num_torn)
num_ev_torn = num_torn - num_tr_torn
print(num_tr_torn, num_ev_torn)

num_tr_untorn = int(tr_rate * num_untorn)
num_ev_untorn = num_untorn - num_tr_untorn
print(num_tr_untorn, num_ev_untorn)

tr_torn = torn_data[:num_tr_torn][:]
ev_torn = torn_data[num_tr_torn:][:]
print(tr_torn.shape)
print(ev_torn.shape)

tr_untorn = untorn_data[:num_tr_untorn][:]
ev_untorn = untorn_data[num_tr_untorn:][:]
print(tr_untorn.shape)
print(ev_untorn.shape)



tr_data = np.append(tr_torn, tr_untorn, axis=0)
print(tr_data.shape)
ev_data = np.append(ev_torn, ev_untorn, axis=0)
print(ev_data.shape)

tr_label = np.append(np.full((num_tr_torn), 1, dtype=np.int32), np.full((num_tr_untorn), 0, dtype=np.int32))
ev_label = np.append(np.full((num_ev_torn), 1, dtype=np.int32), np.full((num_ev_untorn), 0, dtype=np.int32))
print(tr_label.shape[0], ev_label.shape[0])



1188 297
1424 356
(1188, 921600)
(297, 921600)
(1424, 921600)
(356, 921600)
(2612, 921600)
(653, 921600)
2612 653


In [9]:
shuf1 = np.arange(tr_label.shape[0])
np.random.shuffle(shuf1)
print(shuf1)

shuf2 = np.arange(ev_label.shape[0])
np.random.shuffle(shuf2)
print(shuf2)

tr_data_sh = np.zeros((tr_data.shape), dtype=np.float32)
tr_label_sh = np.zeros((tr_label.shape), dtype=np.int32)
ev_data_sh = np.zeros((ev_data.shape), dtype=np.float32)
ev_label_sh = np.zeros((ev_label.shape), dtype=np.int32)


for i in range(tr_label.shape[0]):
    tr_data_sh[i][:] = tr_data[shuf1[i]][:]
    tr_label_sh[i] = tr_label[shuf1[i]]
    
for i in range (ev_label.shape[0]):
    ev_data_sh[i][:] = ev_data[shuf2[i]][:]
    ev_label_sh[i] = ev_label[shuf2[i]]

[2048  315 1783 ... 1947 1249  333]
[306 238 308 597 421 182 507 264 111  27 194 569 386 564 433 496  93 454
 371 121 505 440 173 358 628 196 393 225  35 273 298 113 637 324 275 175
 245  34 644 456  48 171 253 410 568 115 519  87 367 417  64 170  26 477
 100 197 395 633 586 562 200 228  98 159 415 290 381  43 218 609 610  94
  67 470 573  58  62 281 233 212 474 337 279 214 599 198  49 260 160 150
  31  52 107 208  84 335 616 289 582  33  18 469 453 157  25 278 548 592
 388 276 428   7 219 452 486  73 118 321 575 130 295 429  92  29 541 262
 550  66 579 318  44  24 340 443 216 203 265 520 542   3 480 473 104 227
 221 312 420 108  89 561 571 618 296 315 524  15 525 494 412 334 430 338
 640 167 643 416 434 414 215 188  96  16 636 369  20 341 356 534  75 101
 413  71 376   5 372 605 283 348 450 593   8 206 451 501  79 399 596 254
 169 365 602   0 523 556 177 237 391 189 204 465 195 154 574 223 161 476
 119 374 502 387 392 540  32 551 611 327  59 134 181 135 570 139 583 271
 329 375 368 48

In [10]:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.7
session = tf.Session(config=config)

# Create the Estimator
net_classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir="/models/CNN2600-0510")

# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
    tensors=tensors_to_log, every_n_iter=50)

batch_s = 16
epochs = 5
tr_steps = int(tr_data.shape[0]/batch_s*epochs)
in_steps = 1

for d in ['/device:GPU:0', '/device:GPU:1', '/device:GPU:2']:
    with tf.device(d):

        # Train the model
        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": tr_data},
            y=tr_label,
            batch_size=batch_s,
            num_epochs=epochs,
            shuffle=True)
        net_classifier.train(
            input_fn=train_input_fn,
            steps=20000,
            hooks=[logging_hook])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3f63da30b8>, '_save_summary_steps': 100, '_session_config': None, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_steps': None, '_log_step_count_steps': 100, '_num_worker_replicas': 1, '_task_type': 'worker', '_task_id': 0, '_master': '', '_global_id_in_cluster': 0, '_is_chief': True, '_model_dir': '/models/CNN2600-0510', '_evaluation_master': '', '_save_checkpoints_secs': 600, '_tf_random_seed': None, '_keep_checkpoint_max': 5, '_num_ps_replicas': 0, '_service': None}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /models/CNN2600-0510/model.ckpt.
INFO:tensorflow:probabilities = [[0.         1.

NanLossDuringTrainingError: NaN loss during training.

In [None]:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
session = tf.Session(config=config)

ev_results = np.zeros((len(ev_label), 1))
with tf.device('/device:GPU:3'):

    # Evaluate the model and print results
    for i in range(len(ev_label)):
        eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": ev_data_sh[i:i+1]},
            y=ev_label_sh[i:i+1],
            num_epochs=1,
            shuffle=False)
        eval_results = net_classifier.evaluate(input_fn=eval_input_fn)
        
        ev_results[i] = eval_results["accuracy"]

In [None]:
print(np.mean(ev_results))

In [None]:
fail_idx = np.where(ev_results == 0)
print(fail_idx[0])
print(len(fail_idx[0]))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

for i in range(len(fail_idx[0])):

    temp_img = ev_data_sh[fail_idx[0][i]][:].reshape([480, 640, 3])

    temp_img = 255-temp_img

    width = 12
    height = 12
    plt.figure(figsize=(width, height))
    plt.axis("off")
    plt.imshow(cv2.cvtColor(temp_img, cv2.COLOR_BGR2RGB))
    plt.show()