## Import Python Module to Create/Upload New Data Instances
### (Persona: Data Engineer)

Use metaflow-based data model

In [None]:
import s3fs
import os
import json
from opal.weave.create_index import create_index_from_s3
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np

In [None]:
s3 = s3fs.S3FileSystem(client_kwargs = {'endpoint_url': os.environ['S3_ENDPOINT']})

# What data do I have?

### Create an index of my data store using Weave.

<img src="resources/weave to minio.drawio.png">

In [None]:
index = create_index_from_s3('basket-data-with-arinc')
index

In [None]:
index.basket_type.value_counts()

# Where did my data come from?

### Provenance tracking example using Metaflow and TIP.

In [None]:
small_index = create_index_from_s3('provenance-demo')
small_index

In [None]:
!python /home/jovyan/opal/data-engineering-resources/NASA_ch10_flows/parse_nasa_ch10s_flow.py run --bucket_name provenance-demo --n 1

In [None]:
small_index = create_index_from_s3('provenance-demo')
small_index

In [None]:
!python /home/jovyan/opal/data-engineering-resources/NASA_ch10_flows/translate_nasa_ch10s_flow.py run --bucket_name provenance-demo --n 1
!python /home/jovyan/opal/data-engineering-resources/NASA_ch10_flows/translate_nasa_ch10s_flow.py run --bucket_name provenance-demo --data_type ARINC429 --n 1

In [None]:
small_index = create_index_from_s3('provenance-demo')
small_index

In [None]:
arinc_basket = small_index[small_index.basket_type == 'ch10_translated_ARINC429']
arinc_path = arinc_basket.address.iloc[0]
arinc_metadata_path = os.path.join(arinc_path, 'basket_metadata.json')
with s3.open(arinc_metadata_path, 'rb') as file:
    arinc_metadata = json.load(file)
arinc_metadata

### A look back at the complete index.

In [None]:
index

<img src="./resources/ch10_flow.png"  width = "600" height="5500">

In [None]:
my_translated_data = index[index.basket_type == 'ch10_translated_MILSTD1553'].sample()
my_translated_data

In [None]:
my_parents = index[index.uuid.isin(my_translated_data.parent_uuids.iloc[0])]
my_parents

In [None]:
parsed_data = my_parents.iloc[1]
original_ch10 = index[index.uuid.isin(parsed_data.parent_uuids)]
original_ch10

# How do I access my data?

### View a portion of the data contained in a ch10.

<img src="resources/weave to pandas.drawio.png">

In [None]:
my_data = index[index.label == '652200104211052']
my_data

In [None]:
path_1553 = os.path.join(my_data[my_data.basket_type == 'ch10_translated_MILSTD1553'].address.iloc[0], 'parsed_data_translated', 'NAV.parquet', '00.parquet')
df_1553 = pd.read_parquet(path_1553, filesystem = s3)
df_1553

In [None]:
# Filter out invalid lat/long/altitude measurements
where_valid = df_1553[df_1553["NAV-0110"] & df_1553["NAV-0111"]]
plt.rcParams.update({"font.size":18})

# plot
fig, ax = plt.subplots()
ax = where_valid.plot(
    kind="scatter", title="Aircraft Position (1553)",
    x="NAV-23", y="NAV-21", c="NAV-25", s=1,
    cmap="viridis", figsize=(15, 10), ax=ax)

ax.set_aspect("equal")
ax.set_xlabel("Longitude [deg]")
ax.set_ylabel("Latitude [deg]")
plt.gcf().get_axes()[1].set_ylabel("Altitude [ft]")
plt.show()

### Reset TIP example.

In [None]:
index_1 = small_index[small_index.label == 'tail_35_pilot_slider_1']

parsed_path = index_1[index_1.basket_type == 'ch10_parsed'].address.iloc[0]
s3.rm(parsed_path,recursive = True)
arinc_path = index_1[index_1.basket_type == 'ch10_translated_ARINC429'].address.iloc[0]
s3.rm(arinc_path,recursive = True)
milstd1553_path = index_1[index_1.basket_type == 'ch10_translated_MILSTD1553'].address.iloc[0]
s3.rm(milstd1553_path,recursive = True)

In [None]:
small_index = create_index_from_s3('provenance-demo')
small_index