In [48]:
import pandas as pd
import json
import os
import time

Read data files

In [49]:
test_folder = 'test_data_22_05'
start_file = '2024-05-22 21_44_15.135255.json'

# Get all files after the start file (including it)
files = os.listdir(test_folder)
files = sorted(files)[files.index(start_file):]

In [50]:
# Collect all data points into one list
json_data = []
for file in files:
    with open(os.path.join(test_folder, file), 'r') as f:
        dic = json.load(f)
        json_data.extend(dic['data'])


Format time

In [51]:
start_time = json_data[0]['t']
end_time = json_data[-1]['t']

for i in range(len(json_data)):
    json_data[i]['t'] = json_data[i]['t'] - start_time

# Total time of recorded data
print(time.strftime('%H:%M:%S', time.gmtime(json_data[-1]['t'])))

00:15:30


Map dictionary items to bus numbers

In [52]:
devices = ["L_ARM_ESP32", "R_ARM_ESP32", "L_LEG_NRF52", "R_LEG_NRF52"]

device_data = dict.fromkeys(devices)

for d in devices:
    # Two buses
    # Two MPUs and one QMC, then one MPU and one QMC
    device_data[d] = [[[], [], []], [[], []]]

for point in json_data:
    for device in devices:
        # Add empty data points for the missing data
        for i in range(2):
            for j in range(3):
                if i == 1 and j == 2:
                    continue
                device_data[device][i][j].append(None)

        if device not in point['v']:
            continue

        device_point = point['v'][device]
        
        # Remove missing MPU data
        for i in range(len(device_point['mpu'])):
            if device_point['mpu'][i] is None:
                device_point['mpu'].pop(i)
        
        # Legs don't have qmc data
        if device_point['qmc'] is None:
            device_point['qmc'] = []
        
        mpu_len = len(device_point['mpu'])
        qmc_len = len(device_point['qmc'])

        # Assign the data points that exist
        if mpu_len > 0:
            device_data[device][0][0][-1] = device_point['mpu'][0]
        if mpu_len > 1:
            device_data[device][0][1][-1] = device_point['mpu'][1]
        if qmc_len > 0:
            device_data[device][0][2][-1] = device_point['qmc'][0]

        if mpu_len > 2:
            device_data[device][1][0][-1] = device_point['mpu'][2]
        if qmc_len > 1:
            device_data[device][1][1][-1] = device_point['qmc'][1]

In [None]:
print(f'# of missing points: {sum(1 for x in device_data[devices[0]][0][0] if x is None)}')
device_data[devices[0]][0][0]

Map bus numbers to location names

In [54]:
device_locations = {
    'L_ARM_ESP32': [['8 Acc LUA^ accX', '17 Acc BACK accX', '83 IMU LUA magneticX'], ['14 Acc LH accX', '96 IMU LLA magneticX']],
    'R_ARM_ESP32': [['11 Acc RUA_ accX', '23 Acc RWR accX', '57 IMU RUA magneticX'], ['5 Acc HIP accX', '44 IMU BACK magneticX']],
    'L_LEG_NRF52': [['103 IMU L-SHOE EuX'], []],
    # 'R_LEG_NRF52': [['20 Acc RKN_ accX', '119 IMU R-SHOE EuX'], []],
    
    # FIXME: This is only for that run on 22/05 when the 2nd mpu was not connected
    # Either RKN or R-SHOE was lost. Need to check data and compare with L-SHOE, also check that stepping movement we did with each foot
    'R_LEG_NRF52': [['119 IMU R-SHOE EuX'], []],
}

acc_to_imu = {
    '8 Acc LUA^ accX': '77 IMU LUA accX',
    '17 Acc BACK accX': '38 IMU BACK accX',
    '11 Acc RUA_ accX': '51 IMU RUA accX',
}

is_euler = ['103 IMU L-SHOE EuX']

device_columns = {}

for device in device_locations:
    for i in range(len(device_locations[device])):
        for j in range(len(device_locations[device][i])):
            if device_locations[device][i][j] == '':
                continue
            
            # Normalize device data
            for k in range(len(device_data[device][i][j])):
                if device_data[device][i][j][k] is None:
                    device_data[device][i][j][k] = {'q': [None] * 4, 'e': [None] * 3, 'g': [None] * 3, 'a': [None] * 3, 'm': [None] * 3}

            sample_point = device_data[device][i][j][0]
            initial_column_name = device_locations[device][i][j]
            
            initial_column_index = int(initial_column_name.split(' ')[0])
            initial_column_name = ' '.join(initial_column_name.split(' ')[1:])

            axes = ['X', 'Y', 'Z']
            
            def add_columns(key):
                for k in range(3):
                    device_columns[f'{initial_column_index + k} {initial_column_name[:-1] + axes[k]}'] = [round(x[key][k]) if x[key][k] is not None else None for x in device_data[device][i][j]]

            if 'a' in sample_point:
                # MPU

                if 'SHOE EuX' in initial_column_name:
                    # Euler angles
                    add_columns('e')
                    
                    # Skip Nav_A
                    initial_column_index += 3
                    
                    initial_column_name = initial_column_name.replace('Eu', 'Body_A')
                    initial_column_index += 3
                    
                    # Body acceleration
                    axes = ['x', 'y', 'z']
                    add_columns('a')
                    axes = ['X', 'Y', 'Z']
                        
                    initial_column_name = initial_column_name.replace('Body_A', 'AngVelBodyFrame')
                    initial_column_index += 3

                    # Body angular velocity
                    add_columns('g')

                    # Ignore Nav angular velocity and compass

                    continue

                # Acceleration
                add_columns('a')

                # IMU
                if device_locations[device][i][j] in acc_to_imu:
                    initial_imu_name = acc_to_imu[device_locations[device][i][j]]
                    
                    initial_imu_index = int(initial_imu_name.split(' ')[0])
                    initial_imu_name = ' '.join(initial_imu_name.split(' ')[1:])
                    
                    def add_imu_columns(key):
                        for k in range(3):
                            device_columns[f'{initial_imu_index + k} {initial_imu_name[:-1] + axes[k]}'] = [round(x[key][k]) if x[key][k] is not None else None for x in device_data[device][i][j]]

                    
                    # Acceleration
                    add_imu_columns('a')
                    
                    initial_imu_name = initial_imu_name.replace('acc', 'gyro')
                    initial_imu_index += 3
                    
                    # Gyro
                    add_imu_columns('g')
                    
                    # Skip Magnetic
                    initial_imu_index += 3
                    
                    initial_imu_name = initial_imu_name.replace('gyro' + axes[0], 'Quaternion1')
                    initial_imu_index += 3
                    
                    # Quaternion
                    for k in range(4):
                        device_columns[f'{initial_imu_index + k} {initial_imu_name[:-1] + str(k+1)}'] = [round(x['q'][k]) if x['q'][k] is not None else None for x in device_data[device][i][j]]

            elif 'm' in sample_point:
                # QMC
                
                # Magnetic
                for k in range(3):
                    device_columns[f'{initial_column_index + k} {initial_column_name[:-1] + axes[k]}'] = [round(x['m'][k]) if x['m'][k] is not None else None for x in device_data[device][i][j]]



In [None]:
print(len(device_columns))
print(list(device_columns.keys()))
print(len(device_columns['83 IMU LUA magneticX']))

Add time column in ms

In [56]:
time_column = [round(x['t'] * 1000) for x in json_data]
print(time_column[:10])

[0, 1024, 2339, 3704, 4631, 5752, 6825, 7897, 8970, 10140]


Fill in all the missing columns with empty values

In [57]:
column_numbers = [int(x.split(' ')[0]) for x in device_columns.keys()]

for i in range(2, 251):
    if i not in column_numbers:
        device_columns[f'{i}'] = [None] * len(time_column)

# Re-insert all the columns in sorted order
sorted_columns = {}
for key in sorted(device_columns.keys(), key=lambda x: int(x.split(' ')[0])):
    sorted_columns[key] = device_columns[key]

device_columns = sorted_columns

In [58]:
for c in device_columns:
    for i in range(len(device_columns[c])):
        if device_columns[c][i] is False:
            print('False!')
            
            device_columns[c][i] = 0

Combine columns into a csv and export

In [59]:
# Create dataframe with time column and all the device data
df = pd.DataFrame({'1 MILLISEC': time_column, **device_columns}, dtype='Int64')

# Save dataframe as CSV with space separated values
df.to_csv('test_data_22_05.csv', sep=' ', index=False, na_rep='NaN')