## Design analysis and combine data from many flights
### Persona: Analyst


### Import module to retrieve data instances

In [None]:
import pandas as pd
from opal import kinds
kinds = kinds.load()

translated_kind = kinds.lookup("tip_translated")

### Iterate over all translated data and append instance IDs to list

In [None]:
%%time
datasets = {}
for t_id in translated_kind.list_instances():
    t_meta = translated_kind.read_instance_metadata(t_id)
    parsed_id = t_meta["derived_from"]
    if not parsed_id in datasets:
        datasets[parsed_id] = {}
    datasets[parsed_id][t_meta["translated_type"]] = t_id
    
print(f"Dataset count: {len(datasets)}")

### Define analytics and plotting functions

Analytics:

- Join engine turbine speed ARINC429 word table with navigation data from 1553 message table
- Define cruise altitude as the value during periods in which there is little change in the mean
- Return a new table with turbine speed and cruise altitude

Plot:

- Sanity check
- Cruise Altitude as a function of general Altitude 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({"font.size":18})

altitude = "NAV-25"
altitude_valid = "NAV-0111"
rpm = "N1_RPM_ACTUAL"
def get_rpm_at_cruise_altitude(t_1553, t_429):
    # get the 1553 and ARINC429 data
    df_1553 = translated_kind.read(t_1553, "NAV")
    df_429 = translated_kind.read(t_429, "Engine_Fan_RPM_N1_ACTUAL_40")
    
    # filter 1553 by the 'altitude valid' bit
    df_1553 = df_1553[df_1553[altitude_valid]]
    
    # join 1553 and ARINC429 data by time
    joined = pd.concat([ df_1553[["time", altitude]], df_429[["time", rpm]] ]).sort_values("time")
    joined[rpm] = joined[rpm].fillna(method="ffill")
    joined = joined.dropna().copy()
    
    # calculate change in altitude over time (time is in nanoseconds)
    joined.loc[:,"diff_altitude"] = joined[altitude].diff() / (joined["time"].diff() / 10**9)
    # smooth out the altitude derivative
    joined.loc[:,"diff_altitude"] = joined["diff_altitude"].rolling(int(1 / 0.04)).mean()
    
    # select only where difference in altitude is small enough
    # and the aircraft is sufficiently above what we think is ground level
    joined = joined[abs(joined["diff_altitude"]) < 0.25]
    start_altitude = joined[altitude].iloc[:10].mean()
    end_altitude = joined[altitude].iloc[-10:].mean()
    min_altitude = max(start_altitude, end_altitude)
    joined = joined[joined[altitude] > min_altitude * 2]
    joined = joined.dropna()
    
    # group periods of time when it was cruising, take the average
    joined['group'] = (joined['time'].diff() > 10 * 10**9).cumsum()
    grouped = joined.groupby('group').mean()
    return grouped[[altitude, rpm]]

def plot_cruise_altitude(t_1553, t_429):
    # get the 1553 and ARINC429 data
    df_1553 = translated_kind.read(t_1553, "NAV")
    df_429 = translated_kind.read(t_429, "Engine_Fan_RPM_N1_ACTUAL_40")
    
    # filter 1553 by the 'altitude valid' big
    df_1553 = df_1553[df_1553[altitude_valid]]
    
    # join 1553 and ARINC429 data by time
    joined = pd.concat([ df_1553[["time", altitude]], df_429[["time", rpm]] ]).sort_values("time")
    joined[rpm] = joined[rpm].fillna(method="ffill")
    joined = joined.dropna().copy()
    
    # calculate change in altitude over time (time is in nanoseconds)
    joined.loc[:,"diff_altitude"] = joined[altitude].diff() / (joined["time"].diff() / 10**9)
    # smooth out the altitude derivative
    joined.loc[:,"diff_altitude"] = joined["diff_altitude"].rolling(int(1 / 0.04)).mean()
    
    # select only where difference in altitude is small enough
    # and the aircraft is sufficiently above what we think is ground level
    joined = joined[abs(joined["diff_altitude"]) < 0.25]
    start_altitude = joined[altitude].iloc[:10].mean()
    end_altitude = joined[altitude].iloc[-10:].mean()
    min_altitude = max(start_altitude, end_altitude)
    joined = joined[joined[altitude] > min_altitude * 2]
    joined = joined.dropna()
    
    ax = joined.plot(kind="scatter", x="time", y=altitude, c="tab:orange", label="Cruise Altitude [ft]", figsize=(13,10))
    df_1553.plot(x="time", y=altitude, ax=ax, label="General Altitude [ft]")
    ax.set_ylabel("Altitude [ft]")
    ax.set_xlabel("Time [Epoch, ns]")

    
ch10_name = list(datasets.keys())[0]
plot_cruise_altitude(datasets[ch10_name]["1553"], datasets[ch10_name]["arinc429"])

### Calculate dataframes of RPM and cruise altitude for _all_ translated data sets, plot

In [None]:
%%time

from dask.distributed import Client, progress
import dask.bag
import dask.dataframe
import os

with Client(n_workers=16, processes=True) as client:
    dashboard_port = client.cluster.dashboard_link.split(':')[-1].split('/')[0]
    dashboard_link = f"https://opal.metrostar.cloud{os.environ['JUPYTERHUB_SERVICE_PREFIX']}proxy/{dashboard_port}/status"
    print(dashboard_link)
    ds_bag = dask.bag.from_sequence(datasets.values())
    ds_bag = ds_bag.filter(lambda ds: '1553' in ds and 'arinc429' in ds)
    ds_bag = ds_bag.map(lambda ds: get_rpm_at_cruise_altitude(ds["1553"], ds["arinc429"]))
    future = client.compute(ds_bag)
    progress(future, notebook=False)
    df = pd.concat(future.result())
   
    df.plot(
        kind="scatter", x=altitude, y=rpm, 
        xlabel="Cruise Altitude [ft]", ylabel="Engine Turbine Speed [RPM, percent max.]",
        title="Turbine Speed at Cruise Altitude (all flights)",
        figsize=(15, 10)
    )