In [12]:
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '2, 3'

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

from tensorflow.contrib.data import Dataset, Iterator
import matplotlib.pyplot as plt
import glob
import re
from scipy import misc
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

### Loading Data

In [13]:
data_dir = './../brian/cedl/'
frame_dir = data_dir + 'frames/'
label_dir = data_dir + 'labels/'
place = ['house', 'lab', 'office']

FA_enc = OneHotEncoder()
ges_enc = OneHotEncoder()
obj_enc = OneHotEncoder()

def load_data():
    data = []
    for _p in place:
        if 'lab' not in _p:
            for _ in range(1, 4):
                path = frame_dir+"train/"+_p+"/"+str(_)+"/Lhand/"
                file_names = os.listdir(path)
                file_names = sorted(file_names, key=lambda x: int(re.sub('\D', '', x)))
                for image in file_names:
                    data.append(path+image)#misc.imread(path+image))
                path = frame_dir+"train/"+_p+"/"+str(_)+"/Rhand/"
                file_names = os.listdir(path)
                file_names = sorted(file_names, key=lambda x: int(re.sub('\D', '', x)))
                for image in file_names:
                    data.append(path+image)#misc.imread(path+image))
        else:
            for _ in range(1, 5):
                path = frame_dir+"train/"+_p+"/"+str(_)+"/Lhand/"
                file_names = os.listdir(path)
                file_names = sorted(file_names, key=lambda x: int(re.sub('\D', '', x)))
                for image in file_names:
                    data.append(path+image)#misc.imread(path+image))
                path = frame_dir+"train/"+_p+"/"+str(_)+"/Rhand/"
                file_names = os.listdir(path)
                file_names = sorted(file_names, key=lambda x: int(re.sub('\D', '', x)))
                for image in file_names:
                    data.append(path+image)#misc.imread(path+image))
    return data

def load_label():
    place = ['house', 'lab', 'office']
    FA_label_enc = []
    FA_label = np.load(label_dir+"house/FA_left1.npy")
    ges_label = np.load(label_dir+"house/ges_left1.npy")
    obj_label = np.load(label_dir+"house/obj_left1.npy")
    for _p in place:
        if 'house' not in _p:
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left1.npy"))
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left1.npy"))
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left1.npy"))
            
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right1.npy"))
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left2.npy"))
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right2.npy"))
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left3.npy"))
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right3.npy"))
        
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right1.npy"))
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left2.npy"))
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right2.npy"))
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left3.npy"))
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right3.npy"))
        
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right1.npy"))
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left2.npy"))
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right2.npy"))
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left3.npy"))
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right3.npy"))
        if 'lab' in _p:
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left4.npy"))
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right4.npy"))
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left4.npy"))
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right4.npy"))
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left4.npy"))
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right4.npy"))
 
    FA_label = FA_label.astype(int)
    FA_label = np.eye(2)[FA_label]
    ges_label = ges_label.astype(int)
    ges_label = np.eye(13)[ges_label]
    obj_label = obj_label.astype(int)
    obj_label = np.eye(24)[obj_label]
    print(type(FA_label), FA_label.shape)
    #print(np.concatenate((FA_label, ges_label, obj_label), axis=1).shape)
    return np.concatenate((FA_label, ges_label, obj_label), axis=1)

def load_test():
    data = []
    for _p in place:
        if 'lab' not in _p:
            for _ in range(1, 4):
                path = frame_dir+"test/"+_p+"/"+str(_)+"/Lhand/"
                file_names = os.listdir(path)
                file_names = sorted(file_names, key=lambda x: int(re.sub('\D', '', x)))
                for image in file_names:
                    data.append(path+image)#misc.imread(path+image))
                path = frame_dir+"test/"+_p+"/"+str(_)+"/Rhand/"
                file_names = os.listdir(path)
                file_names = sorted(file_names, key=lambda x: int(re.sub('\D', '', x)))
                for image in file_names:
                    data.append(path+image)#misc.imread(path+image))
        else:
            for _ in range(1, 5):
                path = frame_dir+"test/"+_p+"/"+str(_)+"/Lhand/"
                file_names = os.listdir(path)
                file_names = sorted(file_names, key=lambda x: int(re.sub('\D', '', x)))
                for image in file_names:
                    data.append(path+image)#misc.imread(path+image))
                path = frame_dir+"test/"+_p+"/"+str(_)+"/Rhand/"
                file_names = os.listdir(path)
                file_names = sorted(file_names, key=lambda x: int(re.sub('\D', '', x)))
                for image in file_names:
                    data.append(path+image)#misc.imread(path+image))
    return data

def load_test_label():
    place = ['house', 'lab', 'office']
    FA_label_enc = []
    FA_label = np.load(label_dir+"house/FA_left4.npy")
    ges_label = np.load(label_dir+"house/ges_left4.npy")
    obj_label = np.load(label_dir+"house/obj_left4.npy")
    for _p in place:
        if 'office' in _p:
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left4.npy"))
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left4.npy"))
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left4.npy"))
        if 'lab' not in _p: 
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right4.npy"))
            
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left5.npy"))
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right5.npy"))
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left6.npy"))
        FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right6.npy"))
        
        if 'lab' not in _p:
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right4.npy"))
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left5.npy"))
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right5.npy"))
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left6.npy"))
        ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right6.npy"))
        
        if 'lab' not in _p:
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right4.npy"))
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left5.npy"))
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right5.npy"))
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left6.npy"))
        obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right6.npy"))
        
        if 'lab' in _p:
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left7.npy"))
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right7.npy"))
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left7.npy"))
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right7.npy"))
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left7.npy"))
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right7.npy"))
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_left8.npy"))
            FA_label = np.append(FA_label, np.load(label_dir+_p+"/FA_right8.npy"))
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_left8.npy"))
            ges_label = np.append(ges_label, np.load(label_dir+_p+"/ges_right8.npy"))
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_left8.npy"))
            obj_label = np.append(obj_label, np.load(label_dir+_p+"/obj_right8.npy"))
 
    FA_label = FA_label.astype(int)
    FA_label = np.eye(2)[FA_label]
    ges_label = ges_label.astype(int)
    ges_label = np.eye(13)[ges_label]
    obj_label = obj_label.astype(int)
    obj_label = np.eye(24)[obj_label]
    print(type(FA_label), FA_label.shape)
    #print(np.concatenate((FA_label, ges_label, obj_label), axis=1).shape)
    return np.concatenate((FA_label, ges_label, obj_label), axis=1)

def get_batch(image_path):
    data = []
    for image in image_path:
        img = misc.imread(image)
        img = misc.imresize(img, [270, 480])
        data.append(img)
    return data

In [14]:
Data = load_data()

len(Data)

14992

In [15]:
labels = load_label()

print(labels[0].shape)

<class 'numpy.ndarray'> (14992, 2)
(39,)


In [16]:
X_train, X_validate, y_train, y_validation = train_test_split(np.array(Data), labels, test_size=0.01, random_state=42)

### Model

In [17]:
from tensorflow.contrib.layers import flatten

def VGGNet(x):    
    # Hyperparameters
    mu = 0
    sigma = 0.1
    layer_depth = {
        'layer_1' : 6,
        'layer_2' : 16,
        'layer_3' : 120,
        'layer_f1' : 84
    }

    
    # Layer 1: Convolutional. Input = 1080*1920*3. Output = 1076x1916x16.
    conv1_w = tf.Variable(tf.truncated_normal(shape = [5,5,3,16], mean = mu, stddev = sigma))
    conv1_b = tf.Variable(tf.zeros(16))
    conv1 = tf.nn.conv2d(x,conv1_w, strides = [1,1,1,1], padding = 'VALID') + conv1_b 
    # Activation. 
    conv1 = tf.nn.relu(conv1)
    
    pool_1 = tf.nn.max_pool(conv1,ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
    # Layer 1 : Second Convolutional. output = 1072*1916*32
    conv11_w = tf.Variable(tf.truncated_normal(shape = [5,5,16,32], mean = mu, stddev = sigma))
    conv11_b = tf.Variable(tf.zeros(32))
    conv11 = tf.nn.conv2d(pool_1,conv11_w, strides = [1,1,1,1], padding = 'VALID') + conv11_b 
    #conv11 = tf.nn.conv2d(conv1,conv11_w, strides = [1,1,1,1], padding = 'VALID') + conv11_b 
    # Activation. 
    conv11 = tf.nn.relu(conv11)

    # Pooling. Input = 1072*1916*32. Output = 536*958*32.
    pool_11 = tf.nn.max_pool(conv11,ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
    
    # Layer 2: Convolutional. Output = 532*954*64.
    conv2_w = tf.Variable(tf.truncated_normal(shape = [5,5,32,64], mean = mu, stddev = sigma))
    conv2_b = tf.Variable(tf.zeros(64))
    conv2 = tf.nn.conv2d(pool_11, conv2_w, strides = [1,1,1,1], padding = 'VALID') + conv2_b
    #  Activation.
    conv2 = tf.nn.relu(conv2)
    
    #pool_2 = tf.nn.max_pool(conv2,ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
    # Layer 2: Sencond Convolutional. Output = 528*950*128
    conv22_w = tf.Variable(tf.truncated_normal(shape = [5,5,64,128], mean = mu, stddev = sigma))
    conv22_b = tf.Variable(tf.zeros(128))
    conv22 = tf.nn.conv2d(conv2, conv22_w, strides = [1,1,1,1], padding = 'VALID') + conv22_b
    # Activation.
    conv22 = tf.nn.relu(conv22)

    # Pooling. Input = 528*950*128. Output = 264*475*128.
    pool_22 = tf.nn.max_pool(conv22, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID') 
    
    # Layer 3: Convolutional. Output = 260x471x256.
    conv3_w = tf.Variable(tf.truncated_normal(shape = [5, 5, 128, 256], mean = mu, stddev = sigma))
    conv3_b = tf.Variable(tf.zeros(256))
    conv3 = tf.nn.conv2d(pool_22, conv3_w, strides = [1,1,1,1], padding = 'VALID') + conv3_b
    # Activation.
    conv3 = tf.nn.relu(conv3)
    #pool_3 = tf.nn.max_pool(conv3,ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
    # Layer 3: Second Convolutional. Output = 256*467*256
    conv33_w = tf.Variable(tf.truncated_normal(shape = [5, 5, 256, 128], mean = mu, stddev = sigma))
    conv33_b = tf.Variable(tf.zeros(128))
    conv33 = tf.nn.conv2d(conv3, conv33_w, strides = [1,1,1,1], padding = 'VALID') + conv33_b
    # Activation.
    conv33 = tf.nn.relu(conv33)

    # Pooling. Input = 256*467*256. Output = 128*234*256.
    pool_3 = tf.nn.max_pool(conv33, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
    #pool_4 = tf.nn.max_pool(pool_3, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
    
    
   
    # Flatten. Input = 60*113*64. Output = 462,848.
    fc1 = flatten(pool_3)
    
    # Layer 3: Fully Connected (FA prediction). Input = 462,848. Output = 5,000.
    fc1_FA_w = tf.Variable(tf.truncated_normal(shape = (29440,1000), mean = mu, stddev = sigma))
    fc1_FA_b = tf.Variable(tf.zeros(1000))
    fc1_FA = tf.matmul(fc1,fc1_FA_w) + fc1_FA_b
    # Activation.
    fc1_FA = tf.nn.relu(fc1_FA)
    
     # Layer 3: Fully Connected (Guesture prediction). Input = 462,848. Output = 5,000.
    fc1_ges_w = tf.Variable(tf.truncated_normal(shape = (29440,1000), mean = mu, stddev = sigma))
    fc1_ges_b = tf.Variable(tf.zeros(1000))
    fc1_ges = tf.matmul(fc1,fc1_ges_w) + fc1_ges_b
    # Activation.
    fc1_ges = tf.nn.relu(fc1_ges)
    
     # Layer 3: Fully Connected (FA prediction). Input = 462,848. Output = 5,000.
    fc1_obj_w = tf.Variable(tf.truncated_normal(shape = (29440,1000), mean = mu, stddev = sigma))
    fc1_obj_b = tf.Variable(tf.zeros(1000))
    fc1_obj = tf.matmul(fc1,fc1_obj_w) + fc1_obj_b
    # Activation.
    fc1_obj = tf.nn.relu(fc1_obj)
    

    # Layer 4: Fully Connected(FA). Input = 5000. Output = 100.
    fc2_FA_w = tf.Variable(tf.truncated_normal(shape = (1000,100), mean = mu, stddev = sigma))
    fc2_FA_b = tf.Variable(tf.zeros(100))
    fc2_FA = tf.matmul(fc1_FA,fc2_FA_w) + fc2_FA_b
    # Activation.
    fc2_FA = tf.nn.relu(fc2_FA)
    
    # Layer 4: Fully Connected.(Gesture) Input = 5000. Output = 100.
    fc2_ges_w = tf.Variable(tf.truncated_normal(shape = (1000,100), mean = mu, stddev = sigma))
    fc2_ges_b = tf.Variable(tf.zeros(100))
    fc2_ges = tf.matmul(fc1_ges,fc2_ges_w) + fc2_ges_b
    # Activation.
    fc2_ges = tf.nn.relu(fc2_ges)
    
    # Layer 4: Fully Connected.(Object) Input = 5000. Output = 100.
    fc2_obj_w = tf.Variable(tf.truncated_normal(shape = (1000,100), mean = mu, stddev = sigma))
    fc2_obj_b = tf.Variable(tf.zeros(100))
    fc2_obj = tf.matmul(fc1_obj,fc2_obj_w) + fc2_obj_b
    # Activation.
    fc2_obj = tf.nn.relu(fc2_obj)
    
    # Layer 5: Fully Connected.(FA) Input = 100. Output = 2.
    fc3_FA_w = tf.Variable(tf.truncated_normal(shape = (100,2), mean = mu , stddev = sigma))
    fc3_FA_b = tf.Variable(tf.zeros(2))
    logits_FA = tf.matmul(fc2_FA, fc3_FA_w) + fc3_FA_b
    
    # Layer 5: Fully Connected.(Gesture) Input = 100. Output = 13.
    fc3_ges_w = tf.Variable(tf.truncated_normal(shape = (100,13), mean = mu , stddev = sigma))
    fc3_ges_b = tf.Variable(tf.zeros(13))
    logits_ges = tf.matmul(fc2_ges, fc3_ges_w) + fc3_ges_b
    
    # Layer 5: Fully Connected.(Object) Input = 100. Output = 24.
    fc3_obj_w = tf.Variable(tf.truncated_normal(shape = (100,24), mean = mu , stddev = sigma))
    fc3_obj_b = tf.Variable(tf.zeros(24))
    logits_obj = tf.matmul(fc2_obj, fc3_obj_w) + fc3_obj_b
    
    # Output Layer: Softmax.(FA)
    sm_FA = tf.nn.softmax(logits_FA)
    # Output Layer: Softmax.(Guesture)
    sm_ges = tf.nn.softmax(logits_ges)
    # Output Layer: Softmax.(Object)
    sm_obj = tf.nn.softmax(logits_obj)    
    
    return tf.concat([sm_FA, sm_ges, sm_obj], axis=1)

In [18]:
# input output define
x = tf.placeholder(tf.float32, (None,270, 480, 3))
y = tf.placeholder(tf.int32, (None))


In [20]:
# training steps
rate = 0.001

logits = VGGNet(x)
cross_entropy_FA = tf.nn.softmax_cross_entropy_with_logits(labels=y[:, 0:2], logits=logits[:, 0:2])
cross_entropy_ges = tf.nn.softmax_cross_entropy_with_logits(labels=y[:, 2:15], logits=logits[:, 2:15])
cross_entropy_obj = tf.nn.softmax_cross_entropy_with_logits(labels=y[:, 15:], logits=logits[:, 15:])
loss_operation = tf.reduce_mean(cross_entropy_FA) + tf.reduce_mean(cross_entropy_ges) + tf.reduce_mean(cross_entropy_obj)
optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)

In [23]:
correct_prediction_FA = tf.equal(tf.argmax(logits[0:2], 1), tf.argmax(y[0:2], 1))
correct_prediction_ges = tf.equal(tf.argmax(logits[2:15], 1), tf.argmax(y[2:15], 1))
correct_prediction_obj = tf.equal(tf.argmax(logits[15:], 1), tf.argmax(y[15:], 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction_FA, tf.float32)) +\
                        tf.reduce_mean(tf.cast(correct_prediction_ges, tf.float32)) +\
                        tf.reduce_mean(tf.cast(correct_prediction_obj, tf.float32))
saver = tf.train.Saver()
BATCH_SIZE = 16
epoch = 2

X_validation = get_batch(X_validate)
def evaluate(X_data, y_data):
    num_examples = len(X_data)
    total_accuracy = 0
    sess = tf.get_default_session()
    for offset in range(0, num_examples, BATCH_SIZE):
        batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
        accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y})
        total_accuracy += (accuracy * len(batch_x))
    return total_accuracy / num_examples

In [None]:
def shuffle(X, y):
    """Shuffle training data"""
    r = np.random.permutation(len(y))
    return X[r], y[r]

### Training

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    num_examples = len(X_train)
    
    print("Training...")
    print()
    for i in range(epoch):
        #print(type(X_train))
        X_train, y_train = shuffle(X_train, y_train)
        for offset in range(0, num_examples, BATCH_SIZE):
            end = offset + BATCH_SIZE
            batch_x_path, batch_y = X_train[offset:end], y_train[offset:end]
            batch_x = get_batch(batch_x_path)
            sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
            print(offset)
            
        validation_accuracy = evaluate(X_validation, y_validation)
        print("EPOCH {} ...".format(i+1))
        print("Validation Accuracy = {:.3f}".format(validation_accuracy))
        print()
        
        saver.save(sess, './CheckPoint/VGGNet')
        print("Model saved")

Training...

0
16
32
48
64
80
96
112
128
144
160
176
192
208
224
240
256
272
288
304
320
336
352
368
384
400
416
432
448
464
480
496
512
528
544
560
576
592
608
624
640
656
672
688
704
720
736
752
768
784
800
816
832
848
864
880
896
912
928
944
960
976
992
1008
1024
1040
1056
1072
1088
1104
1120
1136
1152
1168
1184
1200
1216
1232
1248
1264
1280
1296
1312
1328
1344
1360
1376
1392
1408
1424
1440
1456
1472
1488
1504
1520
1536
1552
1568
1584
1600
1616
1632
1648
1664
1680
1696
1712
1728
1744
1760
1776
1792
1808
1824
1840
1856
1872
1888
1904
1920
1936
1952
1968
1984
2000
2016
2032
2048
2064
2080
2096
2112
2128
2144
2160
2176
2192
2208
2224
2240
2256
2272
2288
2304
2320
2336
2352
2368
2384
2400
2416
2432
2448
2464
2480
2496
2512
2528
2544
2560
2576
2592
2608
2624
2640
2656
2672
2688
2704
2720
2736
2752
2768
2784
2800
2816
2832
2848
2864
2880
2896
2912
2928
2944
2960
2976
2992
3008
3024
3040
3056
3072
3088
3104
3120
3136
3152
3168
3184
3200
3216
3232
3248
3264
3280
3296
3312
3328
3344
3360
337

### Testing

In [None]:
X_test_path = load_test()
y_test_full = load_test_label()
X_test1_test, X_test_path, y_test1, y_test = train_test_split(np.array(X_test_path), y_test_full, test_size=0.1, random_state=42)
X_test = get_batch(X_test_path)

In [None]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('./CheckPoint'))
    test_accuracy = evaluate(X_test, y_test)
    print("Test Accuracy = {:.3f}".format(test_accuracy))