# Data analysis script

## Input
a single file received from running `grim dump` on the payload

## Output
- FILL THIS IN

In [19]:
import struct
import base64
from collections import namedtuple
from typing import Tuple, List
import pandas as pd
import os


## Data opening and parsing
- open a file to bytes
- bytes to lists of data

In [20]:
SEPARATOR = "********\n"
filename = 'data/lall2.cap'
ouptut_dir = 'out2'

In [21]:
def file_to_bytes(filename: str) -> Tuple[bytes, bytes, bytes, bytes, bytes]:
    with open(filename, 'r') as f:
        parts = f.read().split(SEPARATOR)
        if len(parts) != 5:
            print("wrong number of parts. did you remove all extra lines on top and bottom")
        byte_parts = [base64.b64decode(part) for part in parts]
        return tuple(byte_parts)

In [22]:
SlowData = namedtuple('SlowData', ['timestamp', 'humidity', 'temperature', 'grim_voltage', 'grim_current', 'load_cell_voltage', 'load_cell_current', 'bat_voltage', 'bat_current'])
FastData = namedtuple('FastData', ['timestamp', 'accx', 'accy', 'accz', 'gyrox', 'gyroy', 'gyroz', 'pressure'])
ADCData = namedtuple('ADCData', ['timestamp', 'reading'])
# PreIMUData is FastData
PreALTData = namedtuple('PreALTData', ['timestamp', 'pressure', 'temperature'])


In [23]:
slow_units = SlowData('ms', '% humidity', 'degrees C', 'mV', 'mA', 'mV', 'mA', 'mV', 'mA')
fast_units = FastData('ms', 'm/s^2', 'm/s^2', 'm/s^2', 'rad/s',  'rad/s',  'rad/s', 'kPa')
adc_units = ADCData('ms', 'LSB')
pre_altitude_units = PreALTData('ms', 'kPa', 'degrees C')

In [24]:
slow_fmt='IffHHHHHH' # timestamp, humid, temp, (voltage, current) * 3
fast_fmt = 'Ifffffff' # timestamp accxyz, gyro xyz, press
adc_fmt = 'Iiiiiiiiiii' # timestamp + 10 int32s
imu_boost_detect_fmt = fast_fmt
alt_boost_detect_fmt = 'Iff' # timestamp, press, temp

In [25]:
slow_bs, fast_bs, adc_bs, pre_imu_bs, pre_alt_bs = file_to_bytes(filename)

In [26]:
def interpolate_adc(entries: List[List]) -> List[ADCData]:
    l = []
    for i, entry in enumerate(entries[:-1]):
        start_time = entries[i][0]
        period = entries[i+1][0] - entries[i][0]
        per = period / 10.0
        for j, sample in enumerate(entry[1:]): 
            l.append(ADCData(start_time + j * per, sample))
    return l

In [27]:
slow_lists = list(struct.iter_unpack(slow_fmt, slow_bs))
fast_lists = list(struct.iter_unpack(fast_fmt, fast_bs))
adc_lists = list(struct.iter_unpack(adc_fmt, adc_bs))
pre_imu_lists = list(struct.iter_unpack(imu_boost_detect_fmt, pre_imu_bs))
pre_alt_lists = list(struct.iter_unpack(alt_boost_detect_fmt, pre_alt_bs))


In [28]:
def unit_slow_data(l: List) -> SlowData:
    dr = SlowData(*l)
    du = SlowData(dr.timestamp, dr.humidity, dr.temperature, 1.25 * dr.grim_voltage, 1.25 * dr.grim_voltage, 1.25 * dr.load_cell_voltage, 1.25 * dr.load_cell_current, 1.25 * dr.bat_voltage, 1.25 * dr.bat_current)
    return du

In [29]:
slow_data = [unit_slow_data(l) for l in slow_lists]
fast_data = [FastData(*l) for l in fast_lists]
adc_data = interpolate_adc(adc_lists)

# filter out timestamp = 0 (unwritten) entries
# order by timestamp bc the circular buffer may not begin with the earliest entry
pre_imu_data = sorted([FastData(*l) for l in pre_imu_lists if l[0] != 0], key = lambda d : d.timestamp)
pre_alt_data = sorted([PreALTData(*l) for l in pre_alt_lists if l[0] != 0], key = lambda d : d.timestamp)

In [30]:
# calculate average period of each reading thread
print(f"{len(slow_data)} slow entries. {400 / len(slow_data)} second period")
print(f"{len(fast_data)} fast entries. {1000 * 400 / len(fast_data)} ms period")
print(f"{len(adc_data)} adc entries. {1000 * 400 / len(adc_data)} ms period")


20 slow entries. 20.0 second period
5432 fast entries. 73.63770250368188 ms period
87640 adc entries. 4.564125969876769 ms period


In [37]:
slow_df = pd.DataFrame(slow_data)
fast_df = pd.DataFrame(fast_data)
adc_df = pd.DataFrame(adc_data)

pre_imu_df = pd.DataFrame(pre_imu_data)
pre_alt_df = pd.DataFrame(pre_alt_data)

In [32]:
def add_units_to_df(df: pd.DataFrame, units):
    new_names = {}
    for unit, (series_name, _) in zip(units, df.items()):
        new_names[series_name] = f"{series_name} ({unit})"
    df.rename(columns=new_names, inplace=True)

In [41]:
# set T=0 to start of boost accel buffer
start = pre_imu_df['timestamp'][0]

slow_df['timestamp'] = slow_df['timestamp'] - start
fast_df['timestamp'] = fast_df['timestamp'] - start
adc_df['timestamp'] = adc_df['timestamp'] - start

pre_imu_df['timestamp'] = pre_imu_df['timestamp'] - start
pre_alt_df['timestamp'] = pre_alt_df['timestamp'] - start

In [33]:
add_units_to_df(slow_df, slow_units)
add_units_to_df(fast_df, fast_units)
add_units_to_df(adc_df, adc_units)
add_units_to_df(pre_alt_df, pre_altitude_units)
add_units_to_df(pre_imu_df, fast_units)

In [44]:
pre_imu_df

Unnamed: 0,timestamp,accx,accy,accz,gyrox,gyroy,gyroz,pressure
0,97541,3.455000,10.088000,5.857000,2.035,1.891,4.161,0.0
1,97543,2.172000,8.752000,6.752000,2.243,1.957,4.381,0.0
2,97545,0.205000,7.250000,7.800000,2.376,2.073,4.619,0.0
3,97549,-2.842000,4.187000,11.064000,2.665,2.196,4.819,0.0
4,97551,-4.699000,1.646000,12.346000,2.752,2.273,4.900,0.0
...,...,...,...,...,...,...,...,...
115,97820,-0.612000,18.136999,-29.837999,4.163,-6.337,-5.698,0.0
116,97822,6.570000,25.172001,-38.973999,4.370,-7.657,-5.716,0.0
117,97824,11.901000,33.834000,-43.376999,3.795,-4.285,-5.088,0.0
118,97828,21.454000,44.237999,-46.410999,3.387,-2.820,-3.679,0.0


In [35]:
if not os.path.exists(ouptut_dir):
    os.makedirs(ouptut_dir)
slow_df.to_csv(ouptut_dir+'/slow.csv', index=False)
fast_df.to_csv(ouptut_dir+'/fast.csv', index=False)
adc_df.to_csv(ouptut_dir+'/adc.csv', index=False)

pre_imu_df.to_csv(ouptut_dir+'/pre_imu.csv', index=False)
pre_alt_df.to_csv(ouptut_dir+'/pre_alt.csv', index=False)