In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import pandas as pd
import numpy as np
from gtda.time_series import SlidingWindow
import matplotlib.pyplot as plt
from math import atan2, pi, sqrt, cos, sin, floor
from data_utils import *
import tensorflow as tf
from tensorflow.python.keras.backend import set_session
config = tf.compat.v1.ConfigProto() 
config.gpu_options.allow_growth = True  
config.log_device_placement = True  
sess2 = tf.compat.v1.Session(config=config)
set_session(sess2)  
from tensorflow.keras.layers import Dense, MaxPooling1D, Flatten
from tensorflow.keras import Input, Model
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow.compat.v1.keras.backend as K
from tensorflow.keras.models import load_model
from tcn import TCN, tcn_full_summary
from sklearn.metrics import mean_squared_error
from mango.tuner import Tuner
from scipy.stats import uniform
from keras_flops import get_flops
import pickle
import csv
import random
import itertools
import quaternion
import math
from hardware_utils import *
import time

## Import Training, Validation and Test Set

In [None]:
sampling_rate = 100
window_size = 200
stride = 10
f = '/home/nesl/swapnil/TinyOdom/Human/datasets/oxiod/' #dataset directory

In [None]:
#Training Set
X, Y_disp, Y_head, Y_pos, x0_list, y0_list, size_of_each, x_vel, y_vel, head_s, head_c, X_orig = import_oxiod_dataset(type_flag = 2, 
                        useMagnetometer = True, useStepCounter = True, AugmentationCopies = 0,
                         dataset_folder = f,
                         sub_folders = ['handbag/','handheld/','pocket/','running/','slow_walking/','trolley/'],
                         sampling_rate = sampling_rate, 
                         window_size = window_size, stride = stride, verbose=False)

In [None]:
#Validation Set
X_val, Y_disp_val, Y_head_val, Y_pos_val, x0_list_val, y0_list_val, size_of_each_val, x_vel_val, y_vel_val, head_s_val, head_c_val, X_orig_val = import_oxiod_dataset(type_flag = 3, 
                        useMagnetometer = True, useStepCounter = True, AugmentationCopies = 0,
                         dataset_folder = f,
                         sub_folders = ['handbag/','handheld/','pocket/','running/','slow_walking/','trolley/'],
                         sampling_rate = sampling_rate, 
                         window_size = window_size, stride = stride, verbose=False)

In [None]:
#Test Set
X_test, Y_disp_test, Y_head_test, Y_pos_test, x0_list_test, y0_list_test, size_of_each_test, x_vel_test, y_vel_test, head_s_test, head_c_test, X_orig_test = import_oxiod_dataset(type_flag = 4, 
                        useMagnetometer = True, useStepCounter = True, AugmentationCopies = 0,
                         dataset_folder = f,
                         sub_folders = ['handbag/','handheld/','pocket/','running/','slow_walking/','trolley/'],
                         sampling_rate = sampling_rate, 
                         window_size = window_size, stride = stride, verbose=False)

## Training and NAS

In [None]:
device = "NUCLEO_F746ZG" #hardware name
model_name = 'TD_Oxiod_'+device+'.hdf5'
dirpath="/home/nesl/Mbed Programs/tinyodom_tcn/" #hardware program directory
HIL = True #use real hardware or proxy?
quantization = False #use quantization or not?
model_epochs = 900 #epochs to train each model for
NAS_epochs = 50 #epochs for hyperparameter tuning
output_name = 'g_model.tflite'
log_file_name = 'log_NAS_Oxiod_'+device+'.csv'
if os.path.exists(log_file_name):
    os.remove(log_file_name)
row_write = ['score', 'rmse_vel_x','rmse_vel_y','RAM','Flash','Flops','Latency',
                 'nb_filters','kernel_size','dilations','dropout_rate','use_skip_connections','norm_flag']
with open(log_file_name, 'a', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(row_write)
if os.path.exists(log_file_name[0:-4]+'.p'):
    os.remove(log_file_name[0:-4]+'.p')

In [None]:
def objective_NN(epochs=500,nb_filters=32,kernel_size=7,dilations=[1, 2, 4, 8, 16, 32, 64, 128],dropout_rate=0,
                 use_skip_connections=False,norm_flag=0):
    
    inval = 0
    rmse_vel_x = 'inf'
    rmse_vel_y = 'inf'
    batch_size, timesteps, input_dim = 256, window_size, X.shape[2]
    i = Input(shape=(timesteps, input_dim))
    
    if(norm_flag==1):
        m = TCN(nb_filters=nb_filters,kernel_size=kernel_size,dilations=dilations,dropout_rate=dropout_rate,
                use_skip_connections=use_skip_connections,use_batch_norm=True)(i)
    else:
        m = TCN(nb_filters=nb_filters,kernel_size=kernel_size,dilations=dilations,dropout_rate=dropout_rate,
                use_skip_connections=use_skip_connections)(i)        
        
    m = tf.reshape(m, [-1, nb_filters, 1])

    m = MaxPooling1D(pool_size=(2))(m)
    m = Flatten()(m)
    m = Dense(32, activation='linear', name='pre')(m)
    output1 = Dense(1, activation='linear', name='velx')(m)
    output2 = Dense(1, activation='linear', name='vely')(m)
    model = Model(inputs=[i], outputs=[output1, output2])
    opt = tf.keras.optimizers.Adam()
    model.compile(loss={'velx': 'mse','vely':'mse'},optimizer=opt)  
    Flops = get_flops(model, batch_size=1)
    convert_to_tflite_model(model=model,training_data=X,quantization=quantization,output_name=output_name) 
    maxRAM, maxFlash = return_hardware_specs(device)
    
    if(HIL==True):
        convert_to_cpp_model(dirpath)
        RAM, Flash, Latency, idealArenaSize, errorCode = HIL_controller(dirpath=dirpath,
                                                                       chosen_device=device,
                                                                       window_size=window_size, 
                                                                    number_of_channels = input_dim,
                                                                   quantization=quantization)     
        score = -5.0
        if(Flash==-1):
            row_write = [score, rmse_vel_x,rmse_vel_y,RAM,Flash,Flops,Latency,
                 nb_filters,kernel_size,dilations,dropout_rate,use_skip_connections,norm_flag]
            print('Design choice:',row_write)
            with open(log_file_name, 'a', newline='') as csvfile:
                csvwriter = csv.writer(csvfile)
                csvwriter.writerow(row_write)
            return score
        
        elif(Flash!=-1):
            checkpoint = ModelCheckpoint(model_name, monitor='loss', verbose=1, save_best_only=True)
            model.fit(x=X, y=[x_vel, y_vel],epochs=epochs, shuffle=True,callbacks=[checkpoint],batch_size=batch_size)     
            model = load_model(model_name,custom_objects={'TCN': TCN})
            y_pred = model.predict(X_val)
            rmse_vel_x = mean_squared_error(x_vel_val, y_pred[0], squared=False)
            rmse_vel_y = mean_squared_error(y_vel_val, y_pred[1], squared=False)
            model_acc = -(rmse_vel_x+rmse_vel_y) 
            resource_usage = (RAM/maxRAM) + (Flash/maxFlash) 
            score = model_acc + 0.01*resource_usage - 0.05*Latency  #weigh each component as you like
                
            row_write = [score, rmse_vel_x,rmse_vel_y,RAM,Flash,Flops,Latency,
                 nb_filters,kernel_size,dilations,dropout_rate,use_skip_connections,norm_flag]
            print('Design choice:',row_write)
            with open(log_file_name, 'a', newline='') as csvfile:
                csvwriter = csv.writer(csvfile)
                csvwriter.writerow(row_write)
            
    else:
        score = -5.0
        Flash = os.path.getsize(output_name)
        RAM = get_model_memory_usage(batch_size=1,model=model)
        Latency=-1
        max_flops = (30e6)
    
        if(RAM < maxRAM and Flash<maxFlash):
            checkpoint = ModelCheckpoint(model_name, monitor='loss', verbose=1, save_best_only=True)
            model.fit(x=X, y=[x_vel, y_vel],epochs=epochs, shuffle=True,callbacks=[checkpoint],batch_size=batch_size)     
            model = load_model(model_name,custom_objects={'TCN': TCN})
            y_pred = model.predict(X_val)
            rmse_vel_x = mean_squared_error(x_vel_val, y_pred[0], squared=False)
            rmse_vel_y = mean_squared_error(y_vel_val, y_pred[1], squared=False)
            model_acc = -(rmse_vel_x+rmse_vel_y) 
            resource_usage = (RAM/maxRAM) + (Flash/maxFlash)
            score = model_acc + 0.01*resource_usage - 0.05*(Flops/max_flops)  #weigh each component as you like
            
        row_write = [score, rmse_vel_x,rmse_vel_y,RAM,Flash,Flops,Latency,
                 nb_filters,kernel_size,dilations,dropout_rate,use_skip_connections,norm_flag]
        print('Design choice:',row_write)
        with open(log_file_name, 'a', newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(row_write)  
            
    return score

In [None]:
import pickle 

def save_res(data, file_name):
    pickle.dump( data, open( file_name, "wb" ) )
    
min_layer = 3
max_layer = 8
a_list = [1,2,4,8,16,32,64,128,256]
all_combinations = []
dil_list = []
for r in range(len(a_list) + 1):
    combinations_object = itertools.combinations(a_list, r)
    combinations_list = list(combinations_object)
    all_combinations += combinations_list
all_combinations = all_combinations[1:]
for item in all_combinations:
    if(len(item) >= min_layer and len(item) <= max_layer):
        dil_list.append(list(item))
        
param_dict = {
    'nb_filters': range(2,64),
    'kernel_size': range(2,16),
    'dropout_rate': np.arange(0.0,0.5,0.1),
    'use_skip_connections': [True, False],
    'norm_flag': np.arange(0,1),
    'dil_list': dil_list
}


def objfunc(args_list):

    objective_evaluated = []
    
    start_time = time.time()
    
    for hyper_par in args_list:
        nb_filters = hyper_par['nb_filters']
        kernel_size = hyper_par['kernel_size']
        dropout_rate = hyper_par['dropout_rate']
        use_skip_connections = hyper_par['use_skip_connections']
        norm_flag=hyper_par['norm_flag']
        dil_list = hyper_par['dil_list']
            
        objective = objective_NN(epochs=model_epochs,nb_filters=nb_filters,kernel_size=kernel_size,
                                 dilations=dil_list,
                                 dropout_rate=dropout_rate,use_skip_connections=use_skip_connections,
                                 norm_flag=norm_flag)
        objective_evaluated.append(objective)
        
        end_time = time.time()
        print('objective:', objective, ' time:',end_time-start_time)
        
    return objective_evaluated

conf_Dict = dict()
conf_Dict['batch_size'] = 1
conf_Dict['num_iteration'] = NAS_epochs
conf_Dict['initial_random']= 5
tuner = Tuner(param_dict, objfunc,conf_Dict)
all_runs = []
results = tuner.maximize()
all_runs.append(results)
save_res(all_runs,log_file_name[0:-4]+'.p')

## Train the Best Model

In [None]:
nb_filters = results['best_params']['nb_filters']
kernel_size = results['best_params']['kernel_size']
dilations = results['best_params']['dilations']
dropout_rate = results['best_params']['dropout_rate']
use_skip_connections = results['best_params']['use_skip_connections']
norm_flag = results['best_params']['norm_flag']

batch_size, timesteps, input_dim = 256, window_size, X.shape[2]
i = Input(shape=(timesteps, input_dim))
if(norm_flag==1):
    m = TCN(nb_filters=nb_filters,kernel_size=kernel_size,dilations=dilations,dropout_rate=dropout_rate,
            use_skip_connections=use_skip_connections,use_batch_norm=True)(i)
else:
    m = TCN(nb_filters=nb_filters,kernel_size=kernel_size,dilations=dilations,dropout_rate=dropout_rate,
            use_skip_connections=use_skip_connections)(i)  

m = tf.reshape(m, [-1, nb_filters, 1])
m = MaxPooling1D(pool_size=(2))(m)
m = Flatten()(m)
m = Dense(32, activation='linear', name='pre')(m)
output1 = Dense(1, activation='linear', name='velx')(m)
output2 = Dense(1, activation='linear', name='vely')(m)
model = Model(inputs=[i], outputs=[output1, output2])
opt = tf.keras.optimizers.Adam()
model.compile(loss={'velx': 'mse','vely':'mse'},optimizer=opt)  
checkpoint = ModelCheckpoint(model_name, monitor='loss', verbose=1, save_best_only=True)
model.fit(x=X, y=[x_vel, y_vel],epochs=model_epochs, shuffle=True,callbacks=[checkpoint],batch_size=batch_size)     

## Evaluate the Best Model

#### Velocity Prediction RMSE

In [None]:
model = load_model(model_name,custom_objects={'TCN': TCN})
y_pred = model.predict(X_test)
rmse_vel_x = mean_squared_error(x_vel_test, y_pred[0], squared=False)
rmse_vel_y = mean_squared_error(y_vel_test, y_pred[1], squared=False)
print('Vel_X RMSE, Vel_Y RMSE:',rmse_vel_x,rmse_vel_y)

#### ATE and RTE Metrics

In [None]:
a = 0
b = size_of_each_test[0]
ATE = []
RTE = []
ATE_dist = []
RTE_dist = []
for i in range(len(size_of_each_test)):
    X_test_sel = X_test[a:b,:,:]
    x_vel_test_sel = x_vel_test[a:b]
    y_vel_test_sel = y_vel_test[a:b]
    Y_head_test_sel = Y_head_test[a:b]
    Y_disp_test_sel = Y_disp_test[a:b]
    if(i!=len(size_of_each_test)-1):
        a += size_of_each_test[i]
        b += size_of_each_test[i]

    y_pred = model.predict(X_test_sel)

    pointx = []
    pointy = []
    Lx =  x0_list_test[i]
    Ly = y0_list_test[i]
    for j in range(len(x_vel_test_sel)):
        Lx = Lx + (x_vel_test_sel[j]/(((window_size-stride)/stride)))
        Ly = Ly + (y_vel_test_sel[j]/(((window_size-stride)/stride)))    
        pointx.append(Lx)
        pointy.append(Ly)   
    Gvx = pointx
    Gvy = pointy
    
    pointx = []
    pointy = []
    Lx =  x0_list_test[i]
    Ly = y0_list_test[i]
    for j in range(len(x_vel_test_sel)):
        Lx = Lx + (y_pred[0][j]/(((window_size-stride)/stride)))
        Ly = Ly + (y_pred[1][j]/(((window_size-stride)/stride)))
        pointx.append(Lx)
        pointy.append(Ly)
    Pvx = pointx
    Pvy = pointy    
    
    at, rt, at_all, rt_all = Cal_TE(Gvx, Gvy, Pvx, Pvy,
                                    sampling_rate=sampling_rate,window_size=window_size,stride=stride)
    ATE.append(at)
    RTE.append(rt)
    ATE_dist.append(Cal_len_meters(Gvx, Gvy))
    RTE_dist.append(Cal_len_meters(Gvx, Gvy, 600))
    print('ATE, RTE, Trajectory Length, Trajectory Length (60 seconds)',ATE[i],RTE[i],ATE_dist[i],RTE_dist[i])
    
print('Median ATE and RTE', np.median(ATE),np.median(RTE))

#### Sample Trajectory Plotting

In [None]:
#you can use the size_of_each_test variable to control the region to plot. We plot for the last trajectory
a = sum(size_of_each_test[0:5])
b = sum(size_of_each_test[0:5])+900

X_test_sel = X_test[a:b,:,:]
x_vel_test_sel = x_vel_test[a:b]
y_vel_test_sel = y_vel_test[a:b]
Y_head_test_sel = Y_head_test[a:b]
Y_disp_test_sel = Y_disp_test[a:b]

y_pred = model.predict(X_test_sel)

pointx = []
pointy = []
Lx =  x0_list_test[i]
Ly = y0_list_test[i]
for j in range(len(x_vel_test_sel)):
    Lx = Lx + (x_vel_test_sel[j]/(((window_size-stride)/stride)))
    Ly = Ly + (y_vel_test_sel[j]/(((window_size-stride)/stride)))    
    pointx.append(Lx)
    pointy.append(Ly)   
Gvx = pointx
Gvy = pointy

pointx = []
pointy = []
Lx =  x0_list_test[i]
Ly = y0_list_test[i]
for j in range(len(x_vel_test_sel)):
    Lx = Lx + (y_pred[0][j]/(((window_size-stride)/stride)))
    Ly = Ly + (y_pred[1][j]/(((window_size-stride)/stride)))
    pointx.append(Lx)
    pointy.append(Ly)
Pvx = pointx
Pvy = pointy  

print('Plotting Trajectory of length (meters): ',Cal_len_meters(Gvx, Gvy))

ptox = Pvx
ptoy = Pvy

plt.plot(Gvx,Gvy,label='Ground Truth',color='salmon')
plt.plot(ptox,ptoy,label='TinyOdom',color='green',linestyle='-')
plt.grid()
plt.legend(loc='best')
plt.title('PDR - OxIOD Dataset')
plt.xlabel('East (m)')
plt.ylabel('North (m)')
plt.show()

#### Error Evolution

In [None]:
#For the last trajectory

a = sum(size_of_each_test[0:5])
b = sum(size_of_each_test[0:5])+size_of_each_test[5]

X_test_sel = X_test[a:b,:,:]
x_vel_test_sel = x_vel_test[a:b]
y_vel_test_sel = y_vel_test[a:b]
Y_head_test_sel = Y_head_test[a:b]
Y_disp_test_sel = Y_disp_test[a:b]

y_pred = model.predict(X_test_sel)

pointx = []
pointy = []
Lx =  x0_list_test[i]
Ly = y0_list_test[i]
for j in range(len(x_vel_test_sel)):
    Lx = Lx + (x_vel_test_sel[j]/(((window_size-stride)/stride)))
    Ly = Ly + (y_vel_test_sel[j]/(((window_size-stride)/stride)))    
    pointx.append(Lx)
    pointy.append(Ly)   
Gvx = pointx
Gvy = pointy

pointx = []
pointy = []
Lx =  x0_list_test[i]
Ly = y0_list_test[i]
for j in range(len(x_vel_test_sel)):
    Lx = Lx + (y_pred[0][j]/(((window_size-stride)/stride)))
    Ly = Ly + (y_pred[1][j]/(((window_size-stride)/stride)))
    pointx.append(Lx)
    pointy.append(Ly)
Pvx = pointx
Pvy = pointy  

at, rt, at_all, rt_all = Cal_TE(Gvx, Gvy, Pvx, Pvy,
                                    sampling_rate=sampling_rate,window_size=window_size,stride=stride)

x_ax = np.linspace(0,60,len(rt_all))
print('Plotting for trajectory of length (meters): ',Cal_len_meters(Gvx, Gvy))

plt.plot(x_ax,rt_all,label='TinyOdom',color='green',linestyle='-')
plt.legend()
plt.xlabel('Time (seconds)')
plt.ylabel('Position Error (m)')
plt.title('PDR - OxIOD Dataset')
plt.grid()
plt.show()

## Deployment

#### Conversion to TFLite

In [None]:
convert_to_tflite_model(model=model,training_data=X_tr,quantization=quantization,output_name='g_model.tflite') 

#### Conversion to C++

In [None]:
convert_to_cpp_model(dirpath)