In [1]:
import tensorflow as tf
import inception_v3
from datagenerator import ImageDataGenerator
from tensorflow.contrib.data import Iterator
import numpy as np
from datetime import datetime
import time
import os
import json
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="0"

  from ._conv import register_converters as _register_converters


In [2]:
# Use same number for training and validation
# Ex) 0th folding -> 'train0' for training, 'valid0' for validation
data_txt = open('/HS_code/ICROS_data/data_split/5fold_0802.txt', 'r')
data_json = data_txt.read()
tr_data_dir = json.loads(data_json)
print(tr_data_dir.keys())

dict_keys(['train3', 'valid4', 'train4', 'train1', 'valid3', 'valid1', 'valid0', 'valid2', 'train0', 'train2'])


In [3]:
# Path to the textfiles for the trainings and validation set
num = 0
date = "20180810"
train_file = '/HS_code/0_Code/train%d_HR_ICROS.txt'%num
val_file1 = '/HS_code/0_Code/valid%d_HR_ICROS.txt'%num
batch_size = 128
num_classes = 2
learning_rate = 0.001
training_epochs = 50
display_step = 20

# Path for tf.summary.FileWriter and to store model checkpoints
filewriter_path = "/HS_code/2_Result_TB/tensorboard_HS_%s_GoogLeNet_hr%d_valid%d_%depoch_0_001"%(date,num,num,training_epochs)
checkpoint_path = "/HS_code/1_Model_CP/checkpoints_HS_%s_GoogLeNet_hr%d_valid%d_%depoch_0_001"%(date,num,num,training_epochs)

In [4]:
with tf.device('/gpu:0'):
    # data load
    tr_data = ImageDataGenerator(train_file,
                                 mode='training',
                                 batch_size=batch_size,
                                 num_classes=num_classes,
                                 shuffle=True)
    val_data1 = ImageDataGenerator(val_file1,
                                  mode='inference',
                                  batch_size=batch_size,
                                  num_classes=num_classes,
                                  shuffle=False)

    # create an reinitializable iterator given the dataset structure
    iterator = Iterator.from_structure(tr_data.data.output_types,
                                           tr_data.data.output_shapes)

    next_batch = iterator.get_next()
    

    # Ops for initializing the two different iterators
    training_init_op = iterator.make_initializer(tr_data.data)
    validation_init_op1 = iterator.make_initializer(val_data1.data)

    # TF placeholder for graph input and output
    x = tf.placeholder(tf.float32, [batch_size, 227, 227, 3])
    y = tf.placeholder(tf.float32, [batch_size, num_classes])


    net, net_points = inception_v3.inception_v3(x, 
                                              num_classes=num_classes,
                                              dropout_keep_prob=0.5
                                              )
        
    # Op for calculating the loss
    with tf.name_scope("cross_ent"):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=net, labels=y))
                
    # Train op
    with tf.name_scope("train"):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=net, labels=y))
        train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
            
    # Add the loss to summary
    tf.summary.scalar('cross_entropy', loss)

    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(net, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Add the accuracy to the summary
    tf.summary.scalar('accuracy', accuracy)
    
    # Merge all summaries together
    merged_summary = tf.summary.merge_all()

    # Initialize the FileWriter
    writer = tf.summary.FileWriter(filewriter_path)

    # Initialize an saver for store model checkpoints
    saver = tf.train.Saver()

    train_batches_per_epoch = int(np.floor(tr_data.data_size / batch_size))
    val_batches_per_epoch1 = int(np.floor(val_data1.data_size / batch_size)) 
    print(train_batches_per_epoch)
    print(val_batches_per_epoch1)

Instructions for updating:
Use `tf.data.Dataset.from_tensor_slices()`.
Instructions for updating:
Replace `num_threads=T` with `num_parallel_calls=T`. Replace `output_buffer_size=N` with `ds.prefetch(N)` on the returned dataset.
28
7


In [5]:
# Training
vaild_result = np.array([])
vaild_result.resize((training_epochs,1))


config=tf.ConfigProto(allow_soft_placement = True, log_device_placement=True)
config.gpu_options.allow_growth = True
with tf.Session(config=config, graph=tf.get_default_graph()) as sess:
    sess.run(tf.global_variables_initializer())
    
    # Add the model graph to TensorBoard
    writer.add_graph(sess.graph)
   
    print("{} Start training...".format(datetime.now()))
    print("{} Open tensorboard --logdir={}".format(datetime.now(),
                                                      filewriter_path))
    
    img_batch = np.zeros((batch_size,227,227,3), dtype ='uint8')
    
    for epoch in range(training_epochs):
       
        print("{} Epoch number: {}".format(datetime.now(), epoch+1))
       
        # Initialize iterator with the training dataset
        sess.run(training_init_op)

        for step in range(train_batches_per_epoch):

            # get next batch of data
            img_batch, label_batch = sess.run(next_batch)       

            # And run the training op
            sess.run(train_op, feed_dict={x: img_batch, y: label_batch})

            # Generate summary with the current batch of data and write to file
            if step % display_step == 0:
                s = sess.run(merged_summary, feed_dict={x: img_batch,
                                                        y: label_batch})
                writer.add_summary(s, epoch*train_batches_per_epoch + step)
                print("{} {} step".format(datetime.now(), step))

        # Validate the model on the entire validation set
        print("{} Start validation".format(datetime.now()))
        sess.run(validation_init_op1)
        test_acc = 0.
        test_count = 0
        wrong_cnt = 0
        pre = np.array([])
        pre.resize((val_batches_per_epoch1,batch_size))
        
        for a in range(val_batches_per_epoch1):

            img_batch, label_batch = sess.run(next_batch)
            acc = sess.run(accuracy, feed_dict={x: img_batch,
                                                y: label_batch})
            pre[test_count] = sess.run(correct_prediction, feed_dict={x: img_batch,
                                                y: label_batch})
            
            if epoch == (training_epochs-1):
                pre = pre.astype('uint32')
                for i in range(batch_size):
                    if pre[test_count][i] == False:
                        order = step*batch_size + i
                        set_num = num+1
                        temp = tr_data_dir['train%d'%set_num][order]
                        name = temp.split('/')[-1].split('.')[0].split('(')[1].split(')')[0]
                        print(name)
            
            test_acc += acc
            test_count += 1
            
        test_acc /= test_count
        print("{} Validation Accuracy = {:.4f}".format(datetime.now(),
                                                       test_acc))
        vaild_result[epoch] = test_acc
        
        if epoch == training_epochs-1 :
            print("{} Saving checkpoint of model...".format(datetime.now()))
            # save checkpoint of the model
            checkpoint_name = os.path.join(checkpoint_path,
                                           'model_epoch'+str(epoch+1)+'.ckpt')
            save_path = saver.save(sess, checkpoint_name)

            print("{} Model checkpoint saved at {}".format(datetime.now(),
                                                           checkpoint_name))

2018-08-10 09:08:28.605049 Start training...
2018-08-10 09:08:28.606058 Open tensorboard --logdir=/HS_code/2_Result_TB/tensorboard_HS_20180810_GoogLeNet_hr0_valid0_50epoch_0_001
2018-08-10 09:08:28.606307 Epoch number: 1
2018-08-10 09:08:34.581492 0 step
2018-08-10 09:08:45.853959 20 step
2018-08-10 09:08:49.741648 Start validation
2018-08-10 09:08:53.249174 Validation Accuracy = 0.5312
2018-08-10 09:08:53.249376 Epoch number: 2
2018-08-10 09:08:55.682083 0 step
2018-08-10 09:09:06.915848 20 step
2018-08-10 09:09:10.833190 Start validation
2018-08-10 09:09:13.905429 Validation Accuracy = 0.5312
2018-08-10 09:09:13.905637 Epoch number: 3
2018-08-10 09:09:16.368209 0 step
2018-08-10 09:09:28.100426 20 step
2018-08-10 09:09:32.192301 Start validation
2018-08-10 09:09:35.286052 Validation Accuracy = 0.6998
2018-08-10 09:09:35.286245 Epoch number: 4
2018-08-10 09:09:37.870504 0 step
2018-08-10 09:09:49.755288 20 step
2018-08-10 09:09:54.085107 Start validation
2018-08-10 09:09:57.276763 Val

2018-08-10 09:22:26.268457 Validation Accuracy = 0.9710
2018-08-10 09:22:26.268673 Epoch number: 39
2018-08-10 09:22:28.817974 0 step
2018-08-10 09:22:41.318797 20 step
2018-08-10 09:22:45.556591 Start validation
2018-08-10 09:22:48.814862 Validation Accuracy = 0.9654
2018-08-10 09:22:48.815058 Epoch number: 40
2018-08-10 09:22:51.335028 0 step
2018-08-10 09:23:03.427077 20 step
2018-08-10 09:23:07.617350 Start validation
2018-08-10 09:23:10.866104 Validation Accuracy = 0.9777
2018-08-10 09:23:10.866291 Epoch number: 41
2018-08-10 09:23:13.360867 0 step
2018-08-10 09:23:25.084097 20 step
2018-08-10 09:23:29.173997 Start validation
2018-08-10 09:23:32.320306 Validation Accuracy = 0.9676
2018-08-10 09:23:32.320509 Epoch number: 42
2018-08-10 09:23:34.843656 0 step
2018-08-10 09:23:46.697072 20 step
2018-08-10 09:23:50.753426 Start validation
2018-08-10 09:23:54.036890 Validation Accuracy = 0.9855
2018-08-10 09:23:54.037095 Epoch number: 43
2018-08-10 09:23:56.537708 0 step
2018-08-10 09:

In [6]:
vaild_result

array([[0.53125   ],
       [0.53125   ],
       [0.69977679],
       [0.67522321],
       [0.75446429],
       [0.75334821],
       [0.67857143],
       [0.71540179],
       [0.72767857],
       [0.72544643],
       [0.765625  ],
       [0.796875  ],
       [0.79910714],
       [0.81584821],
       [0.76450893],
       [0.84598214],
       [0.88727679],
       [0.86049107],
       [0.87834821],
       [0.91741071],
       [0.92299107],
       [0.92633929],
       [0.94084821],
       [0.96651786],
       [0.95647321],
       [0.96428571],
       [0.95758929],
       [0.97433036],
       [0.96428571],
       [0.97433036],
       [0.96763393],
       [0.96986607],
       [0.97767857],
       [0.95758929],
       [0.97767857],
       [0.94084821],
       [0.96428571],
       [0.97098214],
       [0.96540179],
       [0.97767857],
       [0.96763393],
       [0.98549107],
       [0.97321429],
       [0.97879464],
       [0.9765625 ],
       [0.9765625 ],
       [0.984375  ],
       [0.970

# 맞추면 1, 틀리면 0
test_count = 0
config=tf.ConfigProto(allow_soft_placement = True, log_device_placement=True)
config.gpu_options.allow_growth = True
pre = np.array([])
pre.resize((val_batches_per_epoch1,batch_size))
with tf.Session(config=config, graph=tf.get_default_graph()) as sess:
    sess.run(tf.global_variables_initializer())
    img_batch = np.zeros((batch_size,227,227,3), dtype ='uint8')
    for epoch in range(training_epochs):
        # Initialize iterator with the training dataset
        sess.run(validation_init_op1)
        for step in range(val_batches_per_epoch1):
            # get next batch of data
            img_batch, label_batch = sess.run(next_batch)    
            #print(len(label_batch))
            pre[test_count] = sess.run(correct_prediction, feed_dict={x: img_batch,
                                                y: label_batch})
            pre = pre.astype('uint32')
            for i in range(batch_size):
                if pre[test_count][i] == False:
                    order = step*batch_size + i
                    set_num = num+1
                    temp = tr_data_dir['train%d'%set_num][order]
                    name = temp.split('/')[-1].split('.')[0].split('(')[1].split(')')[0]
                    print(name)
            test_count+=1
            
