This is a notebook meant to extract GPMF (GoPro Metadata Format) data from GoPro footage. 
The relevant datastreams will extract: 
ACCL: accelerometer data (in 3 dimensions, in m/s^2), 
GYRO: gyroscope data (in 3 dimensions, in radians/sec), and 
MAGN: magnetometer (in microTesla). For adjusting drifting sensor data.

This is done by using a telemetry_parser from Github, written in rust but implemented as a python module
We build a wheel to 'pip install' this module for the preprocessing extractor.

In [None]:
import pandas as pd
print(pd.__version__)

In [2]:
import telemetry_parser
import pandas as pd
import numpy as np
from config import config
from pprint import pprint

In [None]:
tp = telemetry_parser.Parser(str(config.DATA_DIR / 'GH010045.MP4'))
print('Camera: ', tp.camera)
print('Model: ', tp.model)

# # return all telemetry as an array of dicts
# print('Telemetry', tp.telemetry())

# format the values with units etc
print('Telemetry formatted')

single_sample = tp.telemetry(human_readable = True)[19]     #index unit is amount of seconds

print(type(single_sample))
print(single_sample['Accelerometer']['Data'])
# pprint(single_sample)
# print(tuple(bytes.fromhex(key[2:]).decode('ASCII') for key in single_sample['Default'].keys() if key.startswith('0x')))

# # return only gyro and accel with timestamps, normalized to a single orientation and scaled to deg/s and m/s2
# print('Normalized IMU', tp.normalized_imu())

We want a function that goes loops per second through the parser. It will use regex to extract relevant accl. information, and store it in a pandas dataframe. We will hardcode timestamps, as this is not provided.

In [None]:
# Parser
import re

def parse_IMU(file_contents):               # str to list[dict[str, float]]
    data = []
    # Regex pattern om alle Vector3-waarden te matchen
    pattern = r"Vector3\s*\{\s*x:\s*(-?\d+\.?\d*),\s*y:\s*(-?\d+\.?\d*),\s*z:\s*(-?\d+\.?\d*)\s*\}"
    # Zoek alle overeenkomsten in de string
    matches = re.findall(pattern, file_contents)
    
    # Zet elke match om naar een dictionary met floats
    for match in matches:
        x, y, z = map(float, match)
        data.append({"x": x, "y": y, "z": z})
    return data

# single sample imu data

print(type(single_sample['Accelerometer']['Data']))   # bugfixing

parsed_data_accl = parse_IMU((single_sample['Accelerometer']['Data']))
parsed_data_gyro = parse_IMU((single_sample['Gyroscope']['Data']))

print(type(parsed_data_accl))           # bugfixing

# ---------------------------------------------------------------------------

# test if all the lists are the same length

# for i in range(len(tp.telemetry(human_readable = True)) - 1):
#     single_sample = tp.telemetry(human_readable = True)[i+1]
#     parsed_data_accl = parse_IMU((single_sample['Accelerometer']['Data']))
#     parsed_data_gyro = parse_IMU((single_sample['Gyroscope']['Data']))     
#     if len(parsed_data_accl) != len(parsed_data_gyro):
#         print(len(parsed_data_accl))
#         print('and')
#         print(len(parsed_data_gyro))


# bouwen van de dataframe

In [None]:
import numpy as np
import pandas as pd

def parser_to_df(list_of_dicts):
    # Convert list of dicts to DataFrame
    df_temp = pd.DataFrame(list_of_dicts)
    return df_temp

# begin met bouwen

kolommen = ['TIMESTAMP', 'ACCL_x', 'ACCL_y', 'ACCL_z', 'GYRO_x', 'GYRO_y', 'GYRO_z']
imu_data_df = pd.DataFrame(columns=kolommen)
display(imu_data_df)

length = len(tp.telemetry(human_readable=True)) - 1  # should be number of seconds in the video
print(length)

# list to collect data -- faster than a for loop adding rows iteratively
all_rows = []

# Iterate over each telemetry sample (seconds)
for i in range(length):
    time_temp = i
    temp = tp.telemetry(human_readable=True)[i + 1]
    
    # Parse accelerometer and gyroscope data (list of dicts)
    temp_accl = parse_IMU(temp['Accelerometer']['Data'])
    temp_gyro = parse_IMU(temp['Gyroscope']['Data'])
    
    # Check the types of parsed data
    # print(type(temp_accl))
    # print(type(temp_gyro))

    # omzetten naar dataframe
    temp_parsed_accl = parser_to_df(temp_accl)
    temp_parsed_gyro = parser_to_df(temp_gyro)
    
    # print(type(temp_parsed_accl))
    # print(type(temp_parsed_gyro))

    if len(temp_parsed_accl) == len(temp_parsed_gyro):  # Expected to have 197 elements, verified for sample videos
        
        # handmatig timestamps maken
        timestamp_temp = np.linspace(time_temp, time_temp + 1, len(temp_parsed_accl))

        # Collect rows
        for j in range(len(temp_parsed_accl)):
            accl_x, accl_y, accl_z = temp_parsed_accl.iloc[j]['x'], temp_parsed_accl.iloc[j]['y'], temp_parsed_accl.iloc[j]['z']
            gyro_x, gyro_y, gyro_z = temp_parsed_gyro.iloc[j]['x'], temp_parsed_gyro.iloc[j]['y'], temp_parsed_gyro.iloc[j]['z']
            
            # Create new row and append it to the list
            new_row = [timestamp_temp[j], accl_x, accl_y, accl_z, gyro_x, gyro_y, gyro_z]
            all_rows.append(new_row)

imu_data_df = pd.DataFrame(all_rows, columns=kolommen)
display(imu_data_df)


# eerdere versies

In [89]:
# Assuming tp.telemetry(human_readable=True) returns the telemetry data you want to process
for i in range(len(tp.telemetry(human_readable=True)) - 1):
    single_sample = tp.telemetry(human_readable=True)[i + 1]
    
    # Parse accelerometer and gyroscope data
    parsed_data_accl = parse_IMU(single_sample['Accelerometer']['Data'])
    parsed_data_gyro = parse_IMU(single_sample['Gyroscope']['Data'])
    
    # # Check if lengths are the same - - - -- - - -- - - -- - - -- -- - all checked to be true
    # if len(parsed_data_accl) != len(parsed_data_gyro):
    #     print(f"Data lengths do not match at index {i + 1}:")
    #     print(f"Accelerometer length: {len(parsed_data_accl)}")
    #     print(f"Gyroscope length: {len(parsed_data_gyro)}")


In [83]:
# parser 2

# import re

# def parse_IMU(file_contents):  # str -> list[dict[str, float]]
#     data = []
    
#     # Remove the square brackets at the start and end of the string
#     file_contents = file_contents.strip()[1:-1].strip()
    
#     # Regex pattern to match the Vector3 format
#     pattern = r"Vector3\s*\{\s*x:\s*(-?\d+\.?\d*),\s*y:\s*(-?\d+\.?\d*),\s*z:\s*(-?\d+\.?\d*)\s*\}"
    
#     # Find all matches in the string
#     matches = re.findall(pattern, file_contents)
    
#     # Convert matches to a list of dictionaries
#     for match in matches:
#         x, y, z = map(float, match)  # Convert values to floats
#         data.append({"x": x, "y": y, "z": z})
    
#     return data


In [None]:
# def parser_to_df(list_of_dicts):
#     # print(type(list_of_dicts))
#     df_temp = pd.DataFrame(list_of_dicts)                         # build 4x1 dataframe
    
#     # print(type(df_temp))
    
#     # print(df_temp.head())
#     return df_temp 

# list_sampledict = [{'x': 1, 'y': 2, 'z': 3}, {'x': 4, 'y': 5, 'z': 6}]
# parser_to_df(list_sampledict)

In [None]:
# kolommen = ['TIMESTAMP', 'ACCL_x', 'ACCL_y', 'ACCL_z', 'GYRO_x', 'GYRO_y', 'GYRO_z']
# imu_data_df = pd.DataFrame(columns=kolommen)
# display(imu_data_df)

# length = len(tp.telemetry(human_readable = True)) - 1    # should be number of seconds in the video
# print(length)

# for i in range(length):
#     time_temp = i
#     temp = tp.telemetry(human_readable = True)[i+1]
    
#     temp_accl = parse_IMU(temp['Accelerometer']['Data'])                   #lists of dicts
#     temp_gyro = parse_IMU(temp['Gyroscope']['Data'])

#     print(type(temp_accl))
    
#     temp_parsed_accl = parser_to_df(temp_accl)
#     temp_parsed_gyro = parser_to_df(temp_gyro)              # temporary dataframes to append to the big dataframe

#     print(type(temp_parsed_accl))
    
#     # print(len(temp_parsed_accl))
#     # print('and')
#     # print(len(temp_parsed_gyro))
    
#     if len(temp_parsed_accl) == len(temp_parsed_gyro):          # 197 elements
#         timestamp_temp = np.linspace(time_temp,time_temp + 1,len(temp_parsed_accl))
#         for j in range(len(temp_parsed_accl)):  
#             accl_x, accl_y, accl_z = temp_parsed_accl[j]['x'], temp_parsed_accl[j]['y'], temp_parsed_accl[j]['z']
#             gyro_x, gyro_y, gyro_z = temp_parsed_gyro[j]['x'], temp_parsed_gyro[j]['y'], temp_parsed_gyro[j]['z']
        
#         # new row for each data point
#             new_row = pd.DataFrame([[timestamp_temp[j], accl_x, accl_y, accl_z, gyro_x, gyro_y, gyro_z]], columns=kolommen)
            
#             imu_data_df = pd.concat([imu_data_df, new_row], ignore_index=True)  # # Append the new row to the imu_data_df DataFrame

# # total number of rows in the imu_data_df should be 197x(number of seconds in video)
# display(imu_data_df)

In [None]:
# attempt 2 (CHATGPT aided)

import pandas as pd
import numpy as np

# Define the column names
kolommen = ['TIMESTAMP', 'ACCL_x', 'ACCL_y', 'ACCL_z', 'GYRO_x', 'GYRO_y', 'GYRO_z']

# Initialize an empty DataFrame with the specified columns
imu_data_df = pd.DataFrame(columns=kolommen)
print(imu_data_df)

# Get the length of the telemetry data (number of seconds in the video)
length = len(tp.telemetry(human_readable=True)) - 1    # should be number of seconds in the video
print(length)

# Initialize an empty list to hold rows of data
rows = []

# Iterate through the telemetry data
for i in range(length):
    time_temp = i
    temp = tp.telemetry(human_readable=True)[i+1]
    temp_accl = temp['Accelerometer']['Data']
    temp_gyro = temp['Gyroscope']['Data']
    
    # Parse the accelerometer and gyroscope data
    temp_parsed_accl = parser_to_df(temp_accl)
    temp_parsed_gyro = parser_to_df(temp_gyro)
    
    # Check if the lengths of the parsed data are equal
    if len(temp_parsed_accl) == len(temp_parsed_gyro):  # 197 elements
        # Generate timestamps for each data point (using a simple increment approach)
        timestamp_temp = time_temp + np.arange(len(temp_parsed_accl))
        
        # Loop through the parsed accelerometer and gyroscope data
        for j in range(len(temp_parsed_accl)):
            accl_x, accl_y, accl_z = temp_parsed_accl[j]['x'], temp_parsed_accl[j]['y'], temp_parsed_accl[j]['z']
            gyro_x, gyro_y, gyro_z = temp_parsed_gyro[j]['x'], temp_parsed_gyro[j]['y'], temp_parsed_gyro[j]['z']
            
            # Append each row of data to the rows list
            rows.append([timestamp_temp[j], accl_x, accl_y, accl_z, gyro_x, gyro_y, gyro_z])

# After the loop, create the DataFrame from the list of rows
imu_data_df = pd.DataFrame(rows, columns=kolommen)

# Print the resulting DataFrame
print(imu_data_df)


In [None]:
# ast.dump(ast.parse(repr({'a': 'b', 'c': 1}), mode='eval'))

In [None]:
# attempt 3 (CHATGPT aided)

import pandas as pd
import numpy as np

# Define the column names
kolommen = ['TIMESTAMP', 'ACCL_x', 'ACCL_y', 'ACCL_z', 'GYRO_x', 'GYRO_y', 'GYRO_z']

# Initialize an empty DataFrame with the specified columns
imu_data_df = pd.DataFrame(columns=kolommen)
print(imu_data_df)

# Get the length of the telemetry data (number of seconds in the video)
length = len(tp.telemetry(human_readable=True)) - 1    # should be number of seconds in the video
print(length)

# Initialize an empty list to hold rows of data
rows = []

# Function to parse Vector3 data
def parser_to_df(list_of_dicts):
    try:
        # Create a list of dictionaries where each dictionary contains 'x', 'y', 'z' keys
        parsed_data = []
        for vector in list_of_dicts:
            # Extract 'x', 'y', 'z' values from each Vector3 and convert to a dictionary
            parsed_data.append({
                'x': vector.x,  # Ensure these properties match the actual structure of your Vector3 object
                'y': vector.y,
                'z': vector.z
            })
        # Convert to DataFrame
        return pd.DataFrame(parsed_data)
    except Exception as e:
        print("Error in parser_to_df:", e)
        return pd.DataFrame()  # Return an empty DataFrame if an error occurs

# Iterate through the telemetry data
for i in range(length):
    time_temp = i
    temp = tp.telemetry(human_readable=True)[i+1]
    temp_accl = temp['Accelerometer']['Data']
    temp_gyro = temp['Gyroscope']['Data']
    
    # Print the accelerometer and gyroscope data for debugging
    print("temp_accl:", temp_accl)
    print("temp_gyro:", temp_gyro)

    # Parse the accelerometer and gyroscope data
    temp_parsed_accl = parser_to_df(temp_accl)
    temp_parsed_gyro = parser_to_df(temp_gyro)
    
    # Check if the lengths of the parsed data are equal
    if len(temp_parsed_accl) == len(temp_parsed_gyro):  # 197 elements
        # Generate timestamps for each data point (using a simple increment approach)
        timestamp_temp = time_temp + np.arange(len(temp_parsed_accl))
        
        # Loop through the parsed accelerometer and gyroscope data
        for j in range(len(temp_parsed_accl)):
            accl_x, accl_y, accl_z = temp_parsed_accl.iloc[j]['x'], temp_parsed_accl.iloc[j]['y'], temp_parsed_accl.iloc[j]['z']
            gyro_x, gyro_y, gyro_z = temp_parsed_gyro.iloc[j]['x'], temp_parsed_gyro.iloc[j]['y'], temp_parsed_gyro.iloc[j]['z']
            
            # Append each row of data to the rows list
            rows.append([timestamp_temp[j], accl_x, accl_y, accl_z, gyro_x, gyro_y, gyro_z])

# After the loop, create the DataFrame from the list of rows
imu_data_df = pd.DataFrame(rows, columns=kolommen)

# Print the resulting DataFrame
display(imu_data_df)


In [None]:
# bytes.fromhex('4d494e46').decode('ASCII')

# 'ACCL'.encode('ASCII').hex()