## 0. Imports and Functions

In [2]:
import pandas as pd
import os

In [3]:
def read_data_files(data_dir:str="R6"):
    """
    Function that reads the csv files and converts them to list of df with the pose and time columns.

    Args:
        data_dir (str, optional): Name of the folder you want to take the data from. Defaults to "R6")

    Returns:
        all_sensors (list): List of all the dataframes obtained from the files
        name: List of the names of the data file contained in each df
    """
    
    all_sensors = []
    name = []
    base_dir = "../data/"
    for dir in os.listdir(base_dir):
        if dir.startswith(data_dir):
            path = base_dir + dir
            for i,filename in enumerate(os.listdir(path)):
                if filename.endswith(".csv"):
                    filepath = os.path.join(path, filename)
                    with open(filepath, "r") as file:
                        df = pd.read_csv(filepath)
                        df["time"] = df["header.stamp.secs"] + df["header.stamp.nsecs"]/1e9
                        if i==0:
                            min = df.loc[0, "time"]
                        elif df.loc[0, "time"] < min:
                            min = df.loc[0, "time"]
                        if "child_frame_id" in list(df.columns):
                            df = df.drop(["child_frame_id"], axis=1)
                        df = df.drop(["Time", "header.seq", "header.frame_id", "header.stamp.secs", "header.stamp.nsecs"],axis=1)
                        all_sensors.append(df.rename(columns=lambda x: str(i) + "_" + x if x!="time" else x))
                        name.append(filename)
    # Normalize the time (to start from 0)
    for j in all_sensors:
        j["time"] = j["time"].sub(min)
    return all_sensors, name



1. Obtain data

In [4]:
df_list, name_list = read_data_files("R6")

2. Join Data

In [5]:
df_entrada = df_list[0]
for i, df_dentro in enumerate(df_list):
    if i != (name_list.index("odometry-filtered_map.csv") or 0):
        df_entrada = pd.merge(left=df_entrada,right=df_dentro, how='outer', on='time')
df_entrada = df_entrada.set_index(keys="time").sort_index(ascending=True)

In [6]:
df_entrada.head(20)[["0_pose.pose.position.x_x", "6_pose.pose.position.x", "4_orientation.x", "3_pose.pose.position.x","5_latitude"]]

Unnamed: 0_level_0,0_pose.pose.position.x_x,6_pose.pose.position.x,4_orientation.x,3_pose.pose.position.x,5_latitude
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,,-3.497493,,,
0.004433,,,,384.128444,
0.007797,-1.653783,,,,43.357153
0.022076,,,,384.12521,
0.049337,,-3.476576,,,
0.051913,,,,384.118611,
0.055698,,,,,
0.068434,,,-0.046049,384.114745,
0.088763,,,,384.109914,
0.1,,-3.477195,,,


## 3. Metodos de Interpolación

### 3.1. Método Anteror

In [7]:
met_anterior = df_entrada.ffill().dropna()
met_anterior = met_anterior.drop(columns=met_anterior.select_dtypes(exclude=["float64"]).columns)
met_anterior.to_csv("R6_entrada_anterior.csv")

In [138]:
met_anterior.head(20)[["0_pose.pose.position.x_x", "6_pose.pose.position.x", "4_orientation.x", "3_pose.pose.position.x","5_latitude"]]

Unnamed: 0_level_0,0_pose.pose.position.x_x,6_pose.pose.position.x,4_orientation.x,3_pose.pose.position.x,5_latitude
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.068434,-1.653783,-3.476576,-0.046049,384.114745,43.357153
0.088763,-1.653783,-3.476576,-0.046049,384.109914,43.357153
0.1,-1.653783,-3.477195,-0.046049,384.109914,43.357153
0.111923,-1.653783,-3.477195,-0.046049,384.10383,43.357153
0.1164,-1.653783,-3.477195,-0.049903,384.10383,43.357153
0.122096,-1.653783,-3.477195,-0.049903,384.101169,43.357153
0.149322,-1.653783,-3.459518,-0.049903,384.101169,43.357153
0.151912,-1.653783,-3.459518,-0.049903,384.092602,43.357153
0.155433,-1.653783,-3.459518,-0.049903,384.092602,43.357153
0.164388,-1.653783,-3.459518,-0.046565,384.089018,43.357153


In [143]:
#Añadir lineas de tiempo de entrada para conseguir interpolación
df_salida = pd.merge(met_anterior.reset_index()["time"], df_list[2], how='outer', on='time')
df_salida = df_salida.set_index("time").sort_index(ascending=True).interpolate(method="values")
#Borrar las lineas que no están en la entrada para que la longitud de entrada y salida sea igual
df_salida = pd.merge(df_salida.reset_index(), met_anterior.reset_index()["time"], how="right", on="time").set_index("time")
df_salida.to_csv("R6_salida.csv")

  df_salida = df_salida.set_index("time").sort_index(ascending=True).interpolate(method="values")


In [142]:
df_salida.iloc[:,:-1]

Unnamed: 0,time,2_pose.pose.position.x,2_pose.pose.position.y,2_pose.pose.position.z,2_pose.pose.orientation.x,2_pose.pose.orientation.y,2_pose.pose.orientation.z,2_pose.pose.orientation.w,2_pose.covariance,2_twist.twist.linear.x,2_twist.twist.linear.y,2_twist.twist.linear.z,2_twist.twist.angular.x,2_twist.twist.angular.y,2_twist.twist.angular.z
0,0.068434,-3.132471,-8.010657,-0.396838,-0.003879,0.011943,0.306024,0.951940,,3.871326e-01,-8.868303e-04,0.0,0.0,0.0,0.124097
1,0.088763,-3.102076,-8.002796,-0.383493,-0.003865,0.011939,0.307045,0.951612,"(0.1166715677656245, -0.0002609015436593346, 0...",4.066517e-01,-1.420183e-03,0.0,0.0,0.0,0.139510
2,0.100000,-3.107441,-8.002164,-0.388062,-0.003853,0.011934,0.307859,0.951349,,4.207627e-01,-1.153898e-03,0.0,0.0,0.0,0.164429
3,0.111923,-3.113133,-8.001494,-0.392909,-0.003840,0.011929,0.308723,0.951069,,4.357346e-01,-8.713696e-04,0.0,0.0,0.0,0.190869
4,0.116400,-3.115271,-8.001242,-0.394730,-0.003835,0.011927,0.309048,0.950964,,4.413569e-01,-7.652724e-04,0.0,0.0,0.0,0.200798
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149284,1736.282808,-5.478260,-9.566387,-0.364581,-0.001775,0.004915,0.307667,0.951480,,-1.901418e-149,-1.216407e-151,0.0,0.0,0.0,0.000384
149285,1736.288743,-5.470067,-9.564912,-0.360469,-0.001775,0.004913,0.307667,0.951480,"(0.10304303429722178, -3.720507646421011e-07, ...",-3.090798e-162,-1.977298e-164,0.0,0.0,0.0,0.000527
149286,1736.292831,-5.473234,-9.565471,-0.361926,-0.001774,0.004912,0.307667,0.951480,,-2.711866e-162,-1.734881e-164,0.0,0.0,0.0,0.000469
149287,1736.311913,-5.488021,-9.568080,-0.368730,-0.001771,0.004906,0.307668,0.951479,,-9.429427e-163,-6.032355e-165,0.0,0.0,0.0,0.000201


3.2. Metodo Interpolación

In [93]:
met_interp = df_entrada.interpolate(method="values")

  met_interp = df_entrada.interpolate(method="values")


In [94]:
met_interp.head(20)[["0_pose.pose.position.x_x", "6_pose.pose.position.x", "4_orientation.x", "3_pose.pose.position.x","5_latitude"]]

Unnamed: 0_level_0,0_pose.pose.position.x_x,6_pose.pose.position.x,4_orientation.x,3_pose.pose.position.x,5_latitude
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,,-3.497493,,,
0.004433,,-3.495614,,384.128444,
0.007797,-1.653783,-3.494188,,384.127827,43.357153
0.022076,-1.649731,-3.488134,,384.12521,43.357153
0.049337,-1.641996,-3.476576,,384.119181,43.357153
0.051913,-1.641266,-3.476608,,384.118611,43.357153
0.055698,-1.640192,-3.476654,,384.117725,43.357153
0.068434,-1.636578,-3.47681,-0.046049,384.114745,43.357153
0.088763,-1.63081,-3.477058,-0.047682,384.109914,43.357153
0.1,-1.627621,-3.477195,-0.048585,384.106962,43.357153


### 1.1. Process Data 

1704343625.79219

In [53]:
def points_out_of_std(df:pd.DataFrame)->pd.DataFrame:
    std = df.std().abs() #Calculate the standar deviation of each column
    difference = df.diff().abs() #Substraction
    comp = difference.gt(std) #Compare the value
    return comp
     

In [54]:
comp_list = []
for i in df_list:
    comp_list.append(points_out_of_std(i))

In [58]:
comp_list[4].any()

header.stamp.secs       False
header.stamp.nsecs       True
pose.pose.position.x    False
pose.pose.position.y    False
pose.pose.position.z    False
dtype: bool

### 2. Salida

In [128]:
#Añadir lineas de tiempo de entrada para conseguir interpolación
df_salida = pd.merge(df_entrada.reset_index()["time"], df_list[2], how='outer', on='time')
df_salida = df_salida.set_index("time").sort_index(ascending=True).interpolate(method="values")
#Borrar las lineas que no están en la entrada para que la longitud de entrada y salida sea igual
df_salida = pd.merge(df_salida.reset_index(), df_entrada.reset_index()["time"], how="right", on="time")

  df_salida = df_salida.set_index("time").sort_index(ascending=True).interpolate(method="values")
