In [40]:
import pandas as pd
import numpy as np
from numpy import linalg
import os, sys

from utility.praser import deleteNAN
from matplotlib import pyplot as plt
from scipy import stats

# Build Error Dataset

In [41]:
# meta-data
data_types = ['cardboard', 'foam', 'metal', 'plastic', 'wooden-cabinet', 'wooden-shelf']
meas_numbers = 6
type_encoding = {
    'nothing':0,
    'cardboard':1, 
    'foam':2, 
    'metal':3, 
    'plastic':4, 
    'wooden-cabinet':5, 
    'wooden-shelf':6
}

In [42]:
# collecting all relevant files
df_list = []
pos_list = []
for data_type in data_types:
    for num in range(1,meas_numbers):
        # collect csv
        path = f"../../my_data/identification-dataset/nlos/anTag/{data_type}/data{num}/{data_type}-anTag-data{num}_data.csv"
        df = pd.read_csv(path)
        df['material'] = data_type
        # add material encoding
        df['encoded_material'] = type_encoding[df['material'].unique()[0]]
        # save df
        df_list.append(df)

        # collect pos
        folder = f"../../my_data/identification-dataset/nlos/anTag/{data_type}/data{num}/"
        # search for txt files in the folder
        for file in os.listdir(folder):
            if file.endswith(".txt"):
                pose_txt = os.path.join(folder, file)
        
        f = open(pose_txt,"r")
        pos=[];    quat=[]       # position and quaternion
        for line in f:
            x = line.split(",")
            if len(x) == 4:
                arr_x = [float(x[1]), float(x[2]), float(x[3])]
                pos.append(arr_x)
            if len(x) == 5:
                q_x = [float(x[1]), float(x[2]), float(x[3]), float(x[4])]
                quat.append(q_x)

        # calculate values
        pos = np.array(pos)     # [an1_p, an2_p, tag_p]
        quat = np.array(quat)   # [an1_quat, an2_quat, tag_quat]
        an1_p = pos[0,:]; 
        an2_p = pos[1,:]; 
        tag_p = pos[2,:]
        obs_up = pos[3:,:]
        obs_bt = np.copy(obs_up);  obs_bt[:,2] = 0 
        obstacle = np.concatenate((obs_up, obs_bt), axis=0)

        # add to list
        pos_list.append([pos, quat, an1_p, an2_p, tag_p, obs_up, obs_bt, obstacle])


In [43]:
# splitting and relabeling the data
splitted_df_list = []
for df, pos in zip(df_list, pos_list):
    # extract data
    tdoa12 = deleteNAN(np.array(df['tdoa12']))

    # compute ground truth (gt)
    tag_p = pos[4]
    an1_p = pos[2]
    an2_p = pos[3]

    gt_d_12 = linalg.norm(an2_p - tag_p) - linalg.norm(an1_p - tag_p)
    gt_an = linalg.norm(an1_p - an2_p)

    # compute the tdoa12 err
    err12 = tdoa12 - gt_d_12

    df['error'] = err12
    splitted_df_list.append(df)


In [44]:
# builing the final df
df = pd.concat(splitted_df_list)
df.drop(columns=['material'])
df.dropna(inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 59871 entries, 0 to 1989
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   tdoa12            59871 non-null  float64
 1   tdoa21            59871 non-null  float64
 2   snr_an1           59871 non-null  float64
 3   power_dif_an1     59871 non-null  float64
 4   snr_an2           59871 non-null  float64
 5   power_dif_an2     59871 non-null  float64
 6   an1_rx_snr        59871 non-null  float64
 7   an1_rx_powerdif   59871 non-null  float64
 8   an1_tof           59871 non-null  float64
 9   an2_rx_snr        59871 non-null  float64
 10  an2_rx_powerdif   59871 non-null  float64
 11  an2_tof           59871 non-null  float64
 12  material          59871 non-null  object 
 13  encoded_material  59871 non-null  int64  
 14  error             59871 non-null  float64
dtypes: float64(13), int64(1), object(1)
memory usage: 7.3+ MB


In [45]:
# storing the df
df.to_csv('error-identification-dataset.csv', index=False,  encoding='utf-8')