In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import math
import re
import random
import csv

  return f(*args, **kwds)


In [2]:
# lstm network for sighted people
# leave-one-out & cross-validation
# Network Parameters
num_input = 6 # number of used sensor data
timesteps = 100 # timesteps
num_hidden = 100 # hidden layer num of features
num_output= 1

In [3]:
#read sensor data from csv files
def read_sensor(filename):
    with open(filename, "r") as csvfile:
        reader = csv.reader(csvfile)
        #[17:20] rotationRateX, rotation-RateY, rotationRateZ, 
        #[20:23] userAccelerationX, userAc-celerationY, userAccelerationZ
        dataset = [row[17:23] for row in reader]
        return np.array(dataset)

# read ground trurh data from xml files
def read_xml(filename, length):
    f = open(filename)
    cts = f.read()
    f.close()

    p_foot = re.compile(r'<WhichFoot>(.*?)</WhichFoot>')
    all_foot = p_foot.findall(cts)

    p_time = re.compile(r'<Time>(.*?)</Time>')
    all_time = p_time.findall(cts)

    strike_times = []
    #L-1 R-0
    strike_times.append([0.0, 0.5])
    for i in range(len(all_foot)):
        if(all_foot[i]=='L'):
            strike_times.append([float(all_time[i]), 1])
        else:
            strike_times.append([float(all_time[i]), 0])
    strike_times[-1][1] = 0.5
    strike_times.append([length/25.0, 0.5])

    #transfer the ground truth data to binary signals
    #left steps toggle the signal from 0 to 1 
    #and right steps toggle the signal from 1 to 0
    window_y = []
    strike_index=0
    for i in range(length):
        if(i/25.0 >= strike_times[strike_index+1][0]):
            strike_index += 1
        window_y.append([strike_times[strike_index][1]])
    
    #change y to 0.5 in turn segments, which will be removed later
    p_info = re.compile(r'<StartTime>(.*?)</StartTime>\n\t<EndTime>(.*?)</EndTime>\n\t<NSteps>(.*?)</NSteps>\n\t<Direction>(.*?)</Direction>')
    all_info = p_info.findall(cts)#start_time, end_time, step_num, direction=turn

    for i in range(len(all_info)):
        if(all_info[i][3][:4]=='Turn'):
            #print(all_info[i])
            start_time = int(float(all_info[i][0])*25)
            end_time = int(float(all_info[i][1])*25)
            #print('start and end time : ', all_info[i][0], all_info[i][1])
            for t in range(start_time-1, end_time):
                window_y[t] = [0.5]
                
    #change y to 0.5 under feature label, which will be removed later
    p_feature = re.compile(r'<Feature>\n\t\t\t<StartTime>(.*?)</StartTime>\n\t\t\t<EndTime>(.*?)</EndTime>')
    all_feature = p_feature.findall(cts)#start_time, end_time, step_num, direction=turn
    
    #print(filename, length)
    for i in range(len(all_feature)):
        start_time = int(float(all_feature[i][0])*25)
        end_time = int(float(all_feature[i][1])*25)
        #print(start_time-1, end_time)
        if(end_time<=length):
            for t in range(start_time-1, end_time):
                window_y[t] = [0.5]
            
    return window_y

# add corresponding sensor data and label to training set
def add_data(path, person, phone_location, assistant):
    data_x = read_sensor('weallwalk/sensor/iPhoneSensors_T'+str(path)+'_ID'+str(person)+'_'+phone_location+'_'+assistant+'.csv')
    data_y = read_xml('weallwalk/xml/T'+str(path)+'_ID'+str(person)+'_'+assistant+'.xml', len(data_x))
    
    #remove the sensor data in turn segment and feature labels which is marked as 0.5
    split_x, split_x_part = [], []
    split_y, split_y_part = [], []
    for i in range(len(data_y)):
        if(data_y[i][0]!=0.5):
            split_y_part.append(data_y[i])
            split_x_part.append(data_x[i])
        else:
            if(len(split_y_part)>0):
                split_y.append(split_y_part)
                split_x.append(split_x_part)
                split_y_part = []
                split_x_part = []
    
    #windowed the data (window size = timesteps)
    data_x_seq, data_y_seq = [], []
    for i in range(len(split_x)):
        data_x_part, data_y_part = [], []
        for j in range(len(split_x[i])-timesteps):
            x = split_x[i][j:j+timesteps]
            y = split_y[i][j:j+timesteps]
            data_x_part.append(x)
            data_y_part.append(y)
        if(len(data_x_part)>0):
            data_x_seq.append(data_x_part)
            data_y_seq.append(data_y_part)
    
    return data_x_seq, data_y_seq

In [4]:
# training set (including validation)
# participant id 11, 12, 13, 14
all_train_data_list = []
for i in range(1,7):
    all_train_data_list.append([i, 11, '1L', 'NA'])
    all_train_data_list.append([i, 12, '1L', 'NA'])
    all_train_data_list.append([i, 13, '1L', 'NA'])
    all_train_data_list.append([i, 14, '1L', 'NA'])
#     step_data_list.append([i, 15, '1L', 'NA'])
    all_train_data_list.append([i, 11, '2R', 'NA'])
    all_train_data_list.append([i, 12, '2R', 'NA'])
    all_train_data_list.append([i, 13, '2R', 'NA'])
    all_train_data_list.append([i, 14, '2R', 'NA'])
#     step_data_list.append([i, 15, '2R', 'NA'])    
    
print(all_train_data_list)

[[1, 11, '1L', 'NA'], [1, 12, '1L', 'NA'], [1, 13, '1L', 'NA'], [1, 14, '1L', 'NA'], [1, 11, '2R', 'NA'], [1, 12, '2R', 'NA'], [1, 13, '2R', 'NA'], [1, 14, '2R', 'NA'], [2, 11, '1L', 'NA'], [2, 12, '1L', 'NA'], [2, 13, '1L', 'NA'], [2, 14, '1L', 'NA'], [2, 11, '2R', 'NA'], [2, 12, '2R', 'NA'], [2, 13, '2R', 'NA'], [2, 14, '2R', 'NA'], [3, 11, '1L', 'NA'], [3, 12, '1L', 'NA'], [3, 13, '1L', 'NA'], [3, 14, '1L', 'NA'], [3, 11, '2R', 'NA'], [3, 12, '2R', 'NA'], [3, 13, '2R', 'NA'], [3, 14, '2R', 'NA'], [4, 11, '1L', 'NA'], [4, 12, '1L', 'NA'], [4, 13, '1L', 'NA'], [4, 14, '1L', 'NA'], [4, 11, '2R', 'NA'], [4, 12, '2R', 'NA'], [4, 13, '2R', 'NA'], [4, 14, '2R', 'NA'], [5, 11, '1L', 'NA'], [5, 12, '1L', 'NA'], [5, 13, '1L', 'NA'], [5, 14, '1L', 'NA'], [5, 11, '2R', 'NA'], [5, 12, '2R', 'NA'], [5, 13, '2R', 'NA'], [5, 14, '2R', 'NA'], [6, 11, '1L', 'NA'], [6, 12, '1L', 'NA'], [6, 13, '1L', 'NA'], [6, 14, '1L', 'NA'], [6, 11, '2R', 'NA'], [6, 12, '2R', 'NA'], [6, 13, '2R', 'NA'], [6, 14, '2R'

In [5]:
#spliting training data and validation data
step_train_data_list = []
step_valid_data_list = []
for i in range(11, 15):
    step_train_data_list_part = [j for j in all_train_data_list if j[1]!=i]
    step_valid_data_list_part = [j for j in all_train_data_list if j[1]==i]
    print(len(step_train_data_list_part))
    print(step_train_data_list_part)
    print('-----')
    print(len(step_valid_data_list_part))
    print(step_valid_data_list_part)
    print('=====')
    step_train_data_list.append(step_train_data_list_part)
    step_valid_data_list.append(step_valid_data_list_part)

36
[[1, 12, '1L', 'NA'], [1, 13, '1L', 'NA'], [1, 14, '1L', 'NA'], [1, 12, '2R', 'NA'], [1, 13, '2R', 'NA'], [1, 14, '2R', 'NA'], [2, 12, '1L', 'NA'], [2, 13, '1L', 'NA'], [2, 14, '1L', 'NA'], [2, 12, '2R', 'NA'], [2, 13, '2R', 'NA'], [2, 14, '2R', 'NA'], [3, 12, '1L', 'NA'], [3, 13, '1L', 'NA'], [3, 14, '1L', 'NA'], [3, 12, '2R', 'NA'], [3, 13, '2R', 'NA'], [3, 14, '2R', 'NA'], [4, 12, '1L', 'NA'], [4, 13, '1L', 'NA'], [4, 14, '1L', 'NA'], [4, 12, '2R', 'NA'], [4, 13, '2R', 'NA'], [4, 14, '2R', 'NA'], [5, 12, '1L', 'NA'], [5, 13, '1L', 'NA'], [5, 14, '1L', 'NA'], [5, 12, '2R', 'NA'], [5, 13, '2R', 'NA'], [5, 14, '2R', 'NA'], [6, 12, '1L', 'NA'], [6, 13, '1L', 'NA'], [6, 14, '1L', 'NA'], [6, 12, '2R', 'NA'], [6, 13, '2R', 'NA'], [6, 14, '2R', 'NA']]
-----
12
[[1, 11, '1L', 'NA'], [1, 11, '2R', 'NA'], [2, 11, '1L', 'NA'], [2, 11, '2R', 'NA'], [3, 11, '1L', 'NA'], [3, 11, '2R', 'NA'], [4, 11, '1L', 'NA'], [4, 11, '2R', 'NA'], [5, 11, '1L', 'NA'], [5, 11, '2R', 'NA'], [6, 11, '1L', 'NA'],

In [6]:
batch_size = 256
# add data to training set
train_x_list, train_y_list = [], []
print(batch_size)

for step_train_data in step_train_data_list:
    data_x, data_y = [], []
    
    for i in step_train_data:
        data_x_segement, data_y_segement = add_data(i[0], i[1], i[2], i[3])
        for dx in data_x_segement:
            data_x.extend(dx)
        for dy in data_y_segement:
            data_y.extend(dy)
    
    print(len(data_x))
#     print(len(data_y))
    
    order = list(range(0,len(data_x),1))
    random.shuffle(order)

    train_x = [data_x[i] for i in order]
#     train_x.extend([train_x[i] for i in range(0, batch_size)])
    train_y = [data_y[i] for i in order]
#     train_y.extend([train_y[i] for i in range(0, batch_size)])

    print(len(train_x))
    
    train_x_list.append(train_x)
    train_y_list.append(train_y)

256
66949
66949
70644
70644
66239
66239
61833
61833


In [7]:
# add data to validation set
valid_x_list, valid_y_list = [], []

for step_valid_data in step_valid_data_list:
    valid_x, valid_y=[], []
    for i in step_valid_data:
        data_x_segement, data_y_segement = add_data(i[0], i[1], i[2], i[3])
        valid_x.extend(data_x_segement)
        valid_y.extend(data_y_segement)
    
    print(len(valid_x))
#     print(len(valid_y))
    
    valid_x_list.append(valid_x)
    valid_y_list.append(valid_y)

52
47
56
56


In [8]:
# testing set
# participant id 15
step_test_list = []
for i in range(1,7):
    step_test_list.append([i, 15, '1L', 'NA'])
    step_test_list.append([i, 15, '2R', 'NA'])    
        
print(step_test_list)

[[1, 15, '1L', 'NA'], [1, 15, '2R', 'NA'], [2, 15, '1L', 'NA'], [2, 15, '2R', 'NA'], [3, 15, '1L', 'NA'], [3, 15, '2R', 'NA'], [4, 15, '1L', 'NA'], [4, 15, '2R', 'NA'], [5, 15, '1L', 'NA'], [5, 15, '2R', 'NA'], [6, 15, '1L', 'NA'], [6, 15, '2R', 'NA']]


In [9]:
test_x, test_y=[], []
for i in step_test_list:
    data_x_segement, data_y_segement = add_data(i[0], i[1], i[2], i[3])
    test_x.extend(data_x_segement)
    test_y.extend(data_y_segement)
    
print(len(test_x))
print(len(test_y))

print([len(i) for i in test_y])

54
54
[30, 66, 725, 25, 30, 66, 725, 25, 63, 1037, 57, 8, 63, 1037, 57, 8, 1085, 220, 1085, 220, 148, 1024, 37, 971, 72, 787, 54, 707, 148, 1024, 37, 971, 72, 787, 54, 707, 289, 577, 87, 343, 231, 1049, 289, 577, 87, 343, 231, 1049, 554, 1091, 724, 554, 1091, 724]


In [10]:
NUM_LAYERS=2

def LstmCell():
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden)#, forget_bias=1.0)
    cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=0.5)
    return cell

graph = tf.Graph()
with graph.as_default(), tf.device('/gpu:0'):
    # tf Graph input
    X = tf.placeholder("float", [None, timesteps, num_input])
    Y = tf.placeholder("float", [None, timesteps, num_output])
    
    # Define weights
    weights = {
        'out': tf.Variable(tf.random_normal([num_hidden, num_output]))
    }
    biases = {
        'out': tf.Variable(tf.random_normal([num_output]))
    }
    
    def RNN(x, weights, biases):
        x = tf.unstack(x, timesteps, 1)
        cell = tf.contrib.rnn.MultiRNNCell([LstmCell() for _ in range(NUM_LAYERS)])
        outputs, state = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
        outputs = tf.transpose(outputs, [1,0,2])

#         return tf.matmul(outputs[-2], weights['out']) + biases['out']
        ret = []
#         print(outputs.shape)
        for i in range(0, timesteps):
            ret.append(tf.matmul(outputs[i], weights['out']) + biases['out'])
            
        return ret
    
    logits = RNN(X, weights, biases)
    logits = tf.transpose(logits, [1,0,2])
#     print(len(logits))
    print(logits.shape)
    print(logits[0].shape)
    mean_train = tf.reduce_mean(X)
    
    loss = tf.reduce_mean(tf.squared_difference(logits, Y))

    optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss)
    
    saver = tf.train.Saver()

(?, 100, 1)
(100, 1)


In [11]:
#calculate the accuarcy between real y and prediction _y
def cal_accuarcy(y, _y):
    count = 0
    total = 0
    for i in range(len(y[0])):
        total += 1
        if(y[0][i][0] == round(_y[0][i][0])):
            count += 1
    for i in range(1, len(y)):
        total += 1
        if(y[i][-1][0] == round(_y[i][-1][0])):
            count += 1;
    return(count*1.0/total)
#     print("train accuarcy : ", count/len(y))

In [13]:
training_steps = 2001
valid_loss_list, valid_logits_list = [], []
test_loss_list, test_logits_list = [], []
batch_start = 0

for i in range(0,len(valid_x_list)):
    batch_start=0
    print("cross validation ", i)
    train_x = train_x_list[i]
    train_y = train_y_list[i]
    train_length = len(train_x_list[i])-256
    valid_x = valid_x_list[i]
    valid_y = valid_y_list[i]
    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(graph=graph, config=config) as session:
#         saver.restore(session, tf.train.latest_checkpoint('lstm_check'))
#         saver.restore(session, "lstm_check/my-model-gpu-error-metric-dropout2-wc-10fold-2.ckpt-00")
        tf.global_variables_initializer().run()
        print('Initialized')
        for step in range(training_steps):
            if(batch_start+batch_size>len(train_x)):
                print("epoch+1")
                order = list(range(0,len(train_x),1))
                random.shuffle(order)
                train_x = [train_x[i] for i in order]
                train_y = [train_y[i] for i in order]
                batch_start=0
            feed_dict = {X: train_x[batch_start:batch_start+batch_size], Y: train_y[batch_start:batch_start+batch_size]}
            _, l, predictions, m = session.run([optimizer, loss, logits, mean_train], feed_dict = feed_dict)
            batch_start += batch_size
            if (step % 200 == 0):
                train_accuarcy = cal_accuarcy(train_y[batch_start:batch_start+batch_size], predictions)
                print('Loss at step %d: %f, train accuarcy : %f' % (step, l, train_accuarcy))
                #train accuarcy
            if (step % 500 == 0):
                saver.save(session, 'lstm_check/my-model-gpu-error-metric-dropout2-wc-10fold-'+str(i+1)+'.ckpt', global_step=step)
            batch_start += batch_size
            if(batch_start>=train_length):
                batch_start -=train_length
        valid_loss, valid_logits = [], []
        for j in range(len(valid_x)):
            valid_loss_part, valid_logits_part = session.run([loss,logits], feed_dict={X: valid_x[j], Y: valid_y[j]})
            valid_loss.append(valid_loss_part)
            valid_logits.append(valid_logits_part)

        valid_loss_list.append(valid_loss)
        valid_logits_list.append(valid_logits)

        test_loss, test_logits = [], []
        for j in range(len(test_x)):
            test_loss_part, test_logits_part = session.run([loss,logits], feed_dict={X: test_x[j], Y: test_y[j]})
            test_loss.append(test_loss_part)
            test_logits.append(test_logits_part)

        test_loss_list.append(test_loss)
        test_logits_list.append(test_logits)

cross validation  0
Initialized
Loss at step 0: 1.307528, train accuarcy : 0.352113
Loss at step 200: 0.119049, train accuarcy : 0.526761
Loss at step 400: 0.099955, train accuarcy : 0.591549
Loss at step 600: 0.086865, train accuarcy : 0.560563
Loss at step 800: 0.083721, train accuarcy : 0.577465
Loss at step 1000: 0.070504, train accuarcy : 0.402817
Loss at step 1200: 0.065027, train accuarcy : 0.571831
Loss at step 1400: 0.065760, train accuarcy : 0.628169
Loss at step 1600: 0.062293, train accuarcy : 0.397183
Loss at step 1800: 0.056003, train accuarcy : 0.422535
Loss at step 2000: 0.059960, train accuarcy : 0.416901
cross validation  1
Initialized
Loss at step 0: 1.538159, train accuarcy : 0.183099
Loss at step 200: 0.127567, train accuarcy : 0.630986
Loss at step 400: 0.105386, train accuarcy : 0.569014
Loss at step 600: 0.092791, train accuarcy : 0.521127
Loss at step 800: 0.083204, train accuarcy : 0.602817
Loss at step 1000: 0.079383, train accuarcy : 0.614085
Loss at step 12

# Error Metric

In [14]:
m1u, m1o, m2u, m2o, m3u, m3o = [], [], [], [], [], []
    
for k in range(len(valid_logits_list)):
    
    valid_logits = valid_logits_list[k]
    valid_y = valid_y_list[k]    
    
    valid_y_seq_list, valid_logits_bin_list = [], []

    for t in valid_logits:  
        t_all = [i for i in t[0]]
        t_all.extend([i[-1] for i in t[1:]])
    #     print(len(t_all)-len(t))

        valid_logits_part = [[round(i[0])] for i in t_all]
        for i in range(1, len(valid_logits_part)-1):
            if(valid_logits_part[i-1][0]!=valid_logits_part[i][0] and valid_logits_part[i-1][0]==valid_logits_part[i+1][0]):
                valid_logits_part[i][0]=valid_logits_part[i-1][0]  

        valid_logits_bin_list.append(valid_logits_part)

    for t in valid_y:  
        t_all = [i for i in t[0]]
        t_all.extend([i[-1] for i in t[1:]])
        valid_y_seq_list.append(t_all)

    # print(valid_y_seq_list)

    count = 0
    total = 0
    for i in range(0, len(valid_y_seq_list)):
        total += len(valid_y_seq_list[i])
    #     print(len(valid_y_seq_list[i]), len(valid_logits_bin_list[i]))

        for j in range(0, len(valid_y_seq_list[i])):
            if(valid_y_seq_list[i][j][0] == valid_logits_bin_list[i][j][0]):
                count += 1

    print(count)
    print(total)
    print("valid accuarcy : ", count*1.0/total)
    
    #===========================================
    step_time_actual, step_time_predict= [], []
    step_time_actual_gap = []

    #valid_y_seq_list, valid_logits_bin_list
    for i in range(0, len(valid_y_seq_list)):
        step_time_actual_part, step_time_predict_part= [], []
        for j in range(1, len(valid_y_seq_list[i])):
            if(abs(valid_y_seq_list[i][j][0]-valid_y_seq_list[i][j-1][0])>0.5):
                step_time_actual_part.append(j)
        #     if(abs(valid_logits[i][0]-valid_logits[i-1][0])>0.5):
            if(abs(valid_logits_bin_list[i][j][0]-valid_logits_bin_list[i][j-1][0])>0.5):
                step_time_predict_part.append(j)
        step_time_actual.append(step_time_actual_part)
        step_time_predict.append(step_time_predict_part)

        step_time_actual_gap_part = []
        step_time_actual_gap_part.append(0)
    #     step_time_actual_gap_part.append(step_time_actual_part[0]/2.0)
        for i in range(1, len(step_time_actual_part)):
            step_time_actual_gap_part.append((step_time_actual_part[i-1]+step_time_actual_part[i])/2.0)
        step_time_actual_gap_part.append(step_time_actual_part[-1]*2)
        step_time_actual_gap.append(step_time_actual_gap_part)

    print('Step number for each segement')
    print([len(i) for i in step_time_actual])   
    print([len(i) for i in step_time_predict])
    
    #===================================================
    total_step_count = sum([len(i) for i in step_time_actual])
    metric1_undercount = 0
    metric2_undercount = 0
    metric3_undercount = 0
    metric1_overcount = 0
    metric2_overcount = 0
    metric3_overcount = 0
    
    metric1_overcount_list = []
    metric2_overcount_list = []
    metric3_overcount_list = []

    print(len(valid_y))

    for i in range(len(valid_y)):
        step_count = len(step_time_actual[i])
        undercount = 0
        overcount = 0
        
        metric1_overcount += len([t for t in step_time_predict[i] if t<step_time_actual[i][0]])
        for j in range(1, step_count):
            gap_count = [t for t in step_time_predict[i] if t>=step_time_actual[i][j-1] and t<step_time_actual[i][j]]
        #     print(gap_count)
            if(len(gap_count)>1):
                overcount += len(gap_count)-1
            if(len(gap_count)<1):
                undercount += 1
        gap_count = [t for t in step_time_predict[i] if t>=step_time_actual[i][-1]]
        if(len(gap_count)>1):
            overcount += len(gap_count)-1
        if(len(gap_count)<1):
            undercount += 1

        metric1_undercount += undercount
        metric1_overcount += overcount
        metric1_overcount_list.append(overcount)

        undercount = 0
        overcount = 0
        for j in range(1, len(step_time_actual_gap[i])):
            gap_count = [t for t in step_time_predict[i] if t>=step_time_actual_gap[i][j-1] and t<step_time_actual_gap[i][j]]
        #     print(gap_count)
            if(len(gap_count)>1):
                overcount += (len(gap_count)-1)
            if(len(gap_count)<1):
                undercount += 1    

        metric2_undercount += undercount
        metric2_overcount += overcount  
        metric2_overcount_list.append(overcount)

        diff = len(step_time_predict[i])-len(step_time_actual[i])
#         print(step_time_actual[i])
#         print(step_time_predict[i])
#         print('------')

        if(diff<0):
    #         print('segement undercount : ', 1-len(step_time_predict[i])*1.0/len(step_time_actual[i]))
            metric3_undercount -= diff
            metric3_overcount_list.append(0)
    #     if(diff>=0):
        else:
    #         print('segement overcount : ', 1-len(step_time_actual[i])*1.0/len(step_time_predict[i]))
            metric3_overcount += diff
            metric3_overcount_list.append(diff)

    print(len(metric1_overcount_list))
    print(sum(metric1_overcount_list))
    print(metric1_overcount_list)
    print(len(metric3_overcount_list))
    print(sum(metric3_overcount_list))
    print(metric3_overcount_list)

    print('error : ')
    print("total undercount metric 1:\t", metric1_undercount*1.0/total_step_count)
    print("total overcount metric 1:\t", metric1_overcount*1.0/total_step_count)
    print("total undercount metric 2:\t", metric2_undercount*1.0/total_step_count)
    print("total overcount metric 2:\t", metric2_overcount*1.0/total_step_count)
    print("total undercount metric 3:\t", metric3_undercount*1.0/total_step_count)
    print("total overcount metric 3:\t", metric3_overcount*1.0/total_step_count)
    
    m1u.append(metric1_undercount*1.0/total_step_count)
    m1o.append(metric1_overcount*1.0/total_step_count)
    m2u.append(metric2_undercount*1.0/total_step_count)
    m2o.append(metric2_overcount*1.0/total_step_count)
    m3u.append(metric3_undercount*1.0/total_step_count)
    m3o.append(metric3_overcount*1.0/total_step_count)
    
    print("==============================================================")

# filename = 'tmp/na_valid_timesteps='+str(timesteps)+'_trainingsteps='+str(training_steps)+'lr='+str(0.1)+'1.csv'
# with open(filename,"w") as csvfile: 
#     writer = csv.writer(csvfile)
#     writer.writerow(["cv0","cv1","cv2","cv3"])
#     writer.writerows([m1u, m1o, m2u, m2o, m3u, m3o])

17887
26754
valid accuarcy :  0.6685729236749645
Step number for each segement
[10, 58, 8, 10, 58, 8, 8, 84, 12, 7, 8, 84, 12, 7, 79, 22, 79, 22, 20, 80, 10, 79, 11, 60, 10, 54, 20, 80, 10, 79, 11, 60, 10, 54, 26, 44, 20, 27, 21, 82, 26, 44, 20, 27, 21, 82, 30, 77, 6, 30, 77, 6]
[12, 66, 8, 11, 58, 9, 13, 120, 17, 8, 11, 86, 12, 7, 86, 26, 80, 25, 23, 99, 16, 96, 12, 79, 12, 76, 22, 80, 12, 81, 12, 59, 11, 55, 29, 57, 23, 32, 30, 107, 28, 44, 23, 28, 22, 91, 39, 103, 5, 30, 80, 8]
52
52
407
[2, 14, 2, 1, 0, 1, 5, 47, 6, 1, 0, 9, 1, 1, 7, 6, 11, 7, 3, 23, 6, 16, 3, 30, 2, 30, 1, 5, 1, 7, 0, 1, 3, 7, 9, 19, 3, 8, 10, 34, 0, 3, 0, 0, 0, 8, 14, 36, 0, 0, 2, 2]
52
291
[2, 8, 0, 1, 0, 1, 5, 36, 5, 1, 3, 2, 0, 0, 7, 4, 1, 3, 3, 19, 6, 17, 1, 19, 2, 22, 2, 0, 2, 2, 1, 0, 1, 1, 3, 13, 3, 5, 9, 25, 2, 0, 3, 1, 1, 9, 9, 26, 0, 0, 3, 2]
error : 
total undercount metric 1:	 0.09682539682539683
total overcount metric 1:	 0.24973544973544973
total undercount metric 2:	 0.08042328042328042
total overc

In [16]:
m1u, m1o, m2u, m2o, m3u, m3o = [], [], [], [], [], []
    
for k in range(len(test_logits_list)):
    
    test_logits = test_logits_list[k]   
    
    test_y_seq_list, test_logits_bin_list = [], []

    for t in test_logits:  
        t_all = [i for i in t[0]]
        t_all.extend([i[-1] for i in t[1:]])
    #     print(len(t_all)-len(t))

        test_logits_part = [[round(i[0])] for i in t_all]
        for i in range(1, len(test_logits_part)-1):
            if(test_logits_part[i-1][0]!=test_logits_part[i][0] and test_logits_part[i-1][0]==test_logits_part[i+1][0]):
                test_logits_part[i][0]=test_logits_part[i-1][0]  

        test_logits_bin_list.append(test_logits_part)

    for t in test_y:  
        t_all = [i for i in t[0]]
        t_all.extend([i[-1] for i in t[1:]])
        test_y_seq_list.append(t_all)

    # print(test_y_seq_list)

    count = 0
    total = 0
    for i in range(0, len(test_y_seq_list)):
        total += len(test_y_seq_list[i])
    #     print(len(test_y_seq_list[i]), len(test_logits_bin_list[i]))

        for j in range(0, len(test_y_seq_list[i])):
            if(test_y_seq_list[i][j][0] == test_logits_bin_list[i][j][0]):
                count += 1

    print(count)
    print(total)
    print("test accuarcy : ", count*1.0/total)
    
    #===========================================
    step_time_actual, step_time_predict= [], []
    step_time_actual_gap = []

    #test_y_seq_list, test_logits_bin_list
    for i in range(0, len(test_y_seq_list)):
        step_time_actual_part, step_time_predict_part= [], []
        for j in range(1, len(test_y_seq_list[i])):
            if(abs(test_y_seq_list[i][j][0]-test_y_seq_list[i][j-1][0])>0.5):
                step_time_actual_part.append(j)
        #     if(abs(test_logits[i][0]-test_logits[i-1][0])>0.5):
            if(abs(test_logits_bin_list[i][j][0]-test_logits_bin_list[i][j-1][0])>0.5):
                step_time_predict_part.append(j)
        step_time_actual.append(step_time_actual_part)
        step_time_predict.append(step_time_predict_part)

        step_time_actual_gap_part = []
        step_time_actual_gap_part.append(0)
    #     step_time_actual_gap_part.append(step_time_actual_part[0]/2.0)
        for i in range(1, len(step_time_actual_part)):
            step_time_actual_gap_part.append((step_time_actual_part[i-1]+step_time_actual_part[i])/2.0)
        step_time_actual_gap_part.append(step_time_actual_part[-1]*2)
        step_time_actual_gap.append(step_time_actual_gap_part)

    print('Step number for each segement')
    print([len(i) for i in step_time_actual])   
    print([len(i) for i in step_time_predict])
    
    #===================================================
    total_step_count = sum([len(i) for i in step_time_actual])
    metric1_undercount = 0
    metric2_undercount = 0
    metric3_undercount = 0
    metric1_overcount = 0
    metric2_overcount = 0
    metric3_overcount = 0
    
    metric1_overcount_list = []
    metric2_overcount_list = []
    metric3_overcount_list = []

    print(len(test_y))

    for i in range(len(test_y)):
        step_count = len(step_time_actual[i])
        undercount = 0
        overcount = 0
        
        metric1_overcount += len([t for t in step_time_predict[i] if t<step_time_actual[i][0]])
        for j in range(1, step_count):
            gap_count = [t for t in step_time_predict[i] if t>=step_time_actual[i][j-1] and t<step_time_actual[i][j]]
        #     print(gap_count)
            if(len(gap_count)>1):
                overcount += len(gap_count)-1
            if(len(gap_count)<1):
                undercount += 1
        gap_count = [t for t in step_time_predict[i] if t>=step_time_actual[i][-1]]
        if(len(gap_count)>1):
            overcount += len(gap_count)-1
        if(len(gap_count)<1):
            undercount += 1

        metric1_undercount += undercount
        metric1_overcount += overcount
        metric1_overcount_list.append(overcount)

        undercount = 0
        overcount = 0
        for j in range(1, len(step_time_actual_gap[i])):
            gap_count = [t for t in step_time_predict[i] if t>=step_time_actual_gap[i][j-1] and t<step_time_actual_gap[i][j]]
        #     print(gap_count)
            if(len(gap_count)>1):
                overcount += (len(gap_count)-1)
            if(len(gap_count)<1):
                undercount += 1    

        metric2_undercount += undercount
        metric2_overcount += overcount  
        metric2_overcount_list.append(overcount)

        diff = len(step_time_predict[i])-len(step_time_actual[i])
#         print(step_time_actual[i])
#         print(step_time_predict[i])
#         print('------')

        if(diff<0):
    #         print('segement undercount : ', 1-len(step_time_predict[i])*1.0/len(step_time_actual[i]))
            metric3_undercount -= diff
            metric3_overcount_list.append(0)
    #     if(diff>=0):
        else:
    #         print('segement overcount : ', 1-len(step_time_actual[i])*1.0/len(step_time_predict[i]))
            metric3_overcount += diff
            metric3_overcount_list.append(diff)

    print(len(metric1_overcount_list))
    print(sum(metric1_overcount_list))
    print(metric1_overcount_list)
    print(len(metric3_overcount_list))
    print(sum(metric3_overcount_list))
    print(metric3_overcount_list)

    print('error : ')
    print("total undercount metric 1:\t", metric1_undercount*1.0/total_step_count)
    print("total overcount metric 1:\t", metric1_overcount*1.0/total_step_count)
    print("total undercount metric 2:\t", metric2_undercount*1.0/total_step_count)
    print("total overcount metric 2:\t", metric2_overcount*1.0/total_step_count)
    print("total undercount metric 3:\t", metric3_undercount*1.0/total_step_count)
    print("total overcount metric 3:\t", metric3_overcount*1.0/total_step_count)
    
    m1u.append(metric1_undercount*1.0/total_step_count)
    m1o.append(metric1_overcount*1.0/total_step_count)
    m2u.append(metric2_undercount*1.0/total_step_count)
    m2o.append(metric2_overcount*1.0/total_step_count)
    m3u.append(metric3_undercount*1.0/total_step_count)
    m3o.append(metric3_overcount*1.0/total_step_count)
    
    print("==============================================================")

# filename = 'tmp/na_test_timesteps='+str(timesteps)+'_trainingsteps='+str(training_steps)+'lr='+str(0.1)+'1.csv'
# with open(filename,"w") as csvfile: 
#     writer = csv.writer(csvfile)
#     writer.writerow(["cv0","cv1","cv2","cv3"])
#     writer.writerows([m1u, m1o, m2u, m2o, m3u, m3o])

12722
29468
test accuarcy :  0.431722546491109
Step number for each segement
[8, 10, 57, 7, 8, 10, 57, 7, 9, 80, 11, 6, 9, 80, 11, 6, 84, 22, 84, 22, 17, 82, 10, 80, 12, 63, 11, 57, 17, 82, 10, 80, 12, 63, 11, 57, 27, 46, 13, 31, 22, 82, 27, 46, 13, 31, 22, 82, 43, 82, 58, 43, 82, 58]
[12, 12, 59, 12, 9, 14, 56, 12, 11, 94, 13, 10, 13, 55, 11, 8, 88, 22, 73, 20, 19, 86, 10, 81, 12, 64, 12, 57, 16, 67, 16, 39, 12, 66, 7, 72, 29, 46, 12, 30, 25, 88, 30, 52, 12, 34, 13, 52, 48, 94, 60, 57, 93, 62]
54
54
625
[4, 5, 5, 4, 4, 7, 27, 5, 3, 21, 3, 3, 5, 27, 5, 3, 18, 2, 35, 9, 3, 10, 4, 6, 1, 8, 2, 2, 7, 32, 8, 17, 5, 34, 3, 32, 3, 3, 1, 1, 2, 15, 13, 23, 6, 12, 5, 24, 16, 23, 7, 27, 44, 31]
54
159
[4, 2, 2, 5, 1, 4, 0, 5, 2, 14, 2, 4, 4, 0, 0, 2, 4, 0, 0, 0, 2, 4, 0, 1, 0, 1, 1, 0, 0, 0, 6, 0, 0, 3, 0, 15, 2, 0, 0, 0, 3, 6, 3, 6, 0, 3, 0, 0, 5, 12, 2, 14, 11, 4]
error : 
total undercount metric 1:	 0.31407766990291264
total overcount metric 1:	 0.32233009708737864
total undercount metric 2:	 