# Data analysis script

## Input
a single file received from running `grim dump` on the payload

## Output
- FILL THIS IN

In [89]:
import struct
import base64
from collections import namedtuple
from typing import Tuple, List
import pandas as pd
import os


## Data opening and parsing
- open a file to bytes
- bytes to lists of data

In [6]:
SEPARATOR = "********\n"
filename = 'data/all.cap'

In [7]:
def file_to_bytes(filename: str) -> Tuple[bytes, bytes, bytes, bytes, bytes]:
    with open(filename, 'r') as f:
        parts = f.read().split(SEPARATOR)
        if len(parts) != 5:
            print("wrong number of parts. did you remove all extra lines on top and bottom")
        byte_parts = [base64.b64decode(part) for part in parts]
        return tuple(byte_parts)

In [8]:
SlowData = namedtuple('SlowData', ['timestamp', 'humidity', 'temperature', 'grim_voltage', 'grim_current', 'load_cell_voltage', 'load_cell_current', 'bat_voltage', 'bat_current'])
FastData = namedtuple('FastData', ['timestamp', 'accx', 'accy', 'accz', 'gyrox', 'gyroy', 'gyroz', 'pressure'])
ADCData = namedtuple('ADCData', ['timestamp', 'reading'])
# PreIMUData is FastData
PreALTData = namedtuple('PreALTData', ['timestamp', 'pressure', 'temperature'])


In [62]:
slow_units = SlowData('ms', '% humidity', 'degrees C', 'mV', 'mA', 'mV', 'mA', 'mV', 'mA')
fast_units = FastData('ms', 'm/s^2', 'm/s^2', 'm/s^2', 'rad/s',  'rad/s',  'rad/s', 'kPa')
adc_units = ADCData('ms', 'LSB')
pre_altitude_units = PreALTData('ms', 'kPa', 'degrees C')

In [63]:
slow_fmt='IffHHHHHH' # timestamp, humid, temp, (voltage, current) * 3
fast_fmt = 'Ifffffff' # timestamp accxyz, gyro xyz, press
adc_fmt = 'Iiiiiiiiiii' # timestamp + 10 int32s
imu_boost_detect_fmt = fast_fmt
alt_boost_detect_fmt = 'Iff' # timestamp, press, temp

In [22]:
slow_bs, fast_bs, adc_bs, pre_imu_bs, pre_alt_bs = file_to_bytes(filename)

In [64]:
def interpolate_adc(entries: List[List]) -> List[ADCData]:
    l = []
    for i, entry in enumerate(entries[:-1]):
        start_time = entries[i][0]
        period = entries[i+1][0] - entries[i][0]
        per = period / 10.0
        for j, sample in enumerate(entry[1:]): 
            l.append(ADCData(start_time + j * per, sample))
    return l

In [65]:
slow_lists = list(struct.iter_unpack(slow_fmt, slow_bs))
fast_lists = list(struct.iter_unpack(fast_fmt, fast_bs))
adc_lists = list(struct.iter_unpack(adc_fmt, adc_bs))
pre_imu_lists = list(struct.iter_unpack(imu_boost_detect_fmt, pre_imu_bs))
pre_alt_lists = list(struct.iter_unpack(alt_boost_detect_fmt, pre_alt_bs))


In [66]:
def unit_slow_data(l: List) -> SlowData:
    dr = SlowData(*l)
    du = SlowData(dr.timestamp, dr.humidity, dr.temperature, 1.25 * dr.grim_voltage, 1.25 * dr.grim_voltage, 1.25 * dr.load_cell_voltage, 1.25 * dr.load_cell_current, 1.25 * dr.bat_voltage, 1.25 * dr.bat_current)
    return du

In [68]:
slow_data = [unit_slow_data(l) for l in slow_lists]
fast_data = [FastData(*l) for l in fast_lists]
adc_data = interpolate_adc(adc_lists)

# filter out timestamp = 0 (unwritten) entries
# order by timestamp bc the circular buffer may not begin with the earliest entry
pre_imu_data = sorted([FastData(*l) for l in pre_imu_lists if l[0] != 0], key = lambda d : d.timestamp)
pre_alt_data = sorted([PreALTData(*l) for l in pre_alt_lists if l[0] != 0], key = lambda d : d.timestamp)

In [71]:
# calculate average period of each reading thread
print(f"{len(slow_data)} slow entries. {400 / len(slow_data)} second period")
print(f"{len(fast_data)} fast entries. {1000 * 400 / len(fast_data)} ms period")
print(f"{len(adc_data)} adc entries. {1000 * 400 / len(adc_data)} ms period")


400 slow entries. 1.0 second period
113171 fast entries. 3.5344743794788416 ms period
1822500 adc entries. 0.2194787379972565 ms period


In [113]:
slow_df = pd.DataFrame(slow_data)
fast_df = pd.DataFrame(fast_data)
adc_df = pd.DataFrame(adc_data)

pre_imu_df = pd.DataFrame(pre_imu_data)
pre_alt_df = pd.DataFrame(pre_alt_data)

In [110]:
def add_units_to_df(df: pd.DataFrame, units):
    new_names = {}
    for unit, (series_name, _) in zip(units, df.items()):
        new_names[series_name] = f"{series_name} ({unit})"
    df.rename(columns=new_names, inplace=True)

In [114]:
add_units_to_df(slow_df, slow_units)
add_units_to_df(fast_df, fast_units)
add_units_to_df(adc_df, adc_units)
add_units_to_df(pre_alt_df, pre_altitude_units)
add_units_to_df(pre_imu_df, fast_units)

In [115]:
slow_df

Unnamed: 0,timestamp (ms),humidity (% humidity),temperature (degrees C),grim_voltage (mV),grim_current (mA),load_cell_voltage (mV),load_cell_current (mA),bat_voltage (mV),bat_current (mA)
0,2969,36.833984,30.299999,3287.50,3287.50,4762.50,60.00,4788.75,6.25
1,3969,36.265625,30.309999,3288.75,3288.75,4763.75,60.00,4788.75,8.75
2,4969,36.080078,30.309999,3285.00,3285.00,4762.50,60.00,4788.75,8.75
3,5969,36.000977,30.330000,3287.50,3287.50,4766.25,58.75,4790.00,7.50
4,6969,36.664062,30.350000,3287.50,3287.50,4762.50,60.00,4787.50,8.75
...,...,...,...,...,...,...,...,...,...
395,397969,25.417969,38.130001,3286.25,3286.25,4620.00,60.00,4640.00,7.50
396,398969,24.836914,38.119999,3286.25,3286.25,4618.75,57.50,4641.25,6.25
397,399969,25.131836,38.099998,3286.25,3286.25,4615.00,58.75,4638.75,6.25
398,400969,25.748047,38.110001,3287.50,3287.50,4618.75,58.75,4641.25,6.25


In [119]:
if not os.path.exists('out'):
    os.makedirs('out')
slow_df.to_csv('out/slow.csv', index=False)
fast_df.to_csv('out/fast.csv', index=False)
adc_df.to_csv('out/adc.csv', index=False)

pre_imu_df.to_csv('out/pre_imu.csv', index=False)
pre_alt_df.to_csv('out/pre_alt.csv', index=False)