# EnvSensor_explorer

This notebook serves as an introduction to working with the environmental sensor data. It will open one file and make a few plots.

The environmental sensor file structure is based on NASA's ESDS.
Information on ESDS is available at     
https://www.earthdata.nasa.gov/esdis/esco/standards-and-practices/ascii-file-format-guidelines-for-earth-science-data   


File organization is expected to follow this pattern:

pilot_data_root           
&emsp;environment    
&emsp;&emsp;manifest.tsv    
&emsp;&emsp;environmental_sensor    
&emsp;&emsp;&emsp;leelab_anura    
&emsp;&emsp;&emsp;&emsp;0001    
&emsp;&emsp;&emsp;&emsp;&emsp;0001_ENV.csv     
&emsp;&emsp;&emsp;&emsp;0002    
&emsp;&emsp;&emsp;&emsp;&emsp;0002_ENV.csv   
&emsp;&emsp;&emsp;&emsp;... etc.

In [None]:
from datetime import datetime, timedelta

import matplotlib.axes as plt_axes  # to catch the class
import matplotlib.axis as plt_axis  # to catch the class
import matplotlib.dates as mdates  # to use ConciseDateFormatter
import matplotlib.pyplot as plt  # need to make the plots
import pandas as pd

## custom path -- change to match your file structure

In [None]:
data_root = "/Volumes/data/datasets/AIREADI/YEAR2"  # change this to your own path

# Read the manifest

In [None]:
manifest_path = data_root + "/environment/manifest.tsv"
print(manifest_path)

In [None]:
df = pd.read_csv(manifest_path, sep="\t")
print(df.columns)

In [None]:
# in year2 data, the id_field was called participant_id
# in year3 data, the id_field was changed to person_id to match clinical_data and make table joins easier
if "participant_id" in df.columns:
    id_field = "participant_id"
else:
    id_field = "person_id"

print(f"In this dataset, the id_field is: {id_field}")

In [None]:
df[id_field].nunique()  # number of unique participants

In [None]:
df.head()

In [None]:
key_columns = [
    id_field,
    "sensor_location",
    "sensor_sampling_extent_in_days",
]  # optionally view only a few columns

df[key_columns].head(2)

# Select a set of data to explore

ESDS format contains all data in a single file with a defined header and body   
 * header
    - all lines begin with #
    - the first line tells us how many header lines there are
 * body
    - first line of the body lists the field names
    - all subsequent data is in observation rows
    
Note that the path env_data includes the file extension

In [None]:
pid = 1001  # select a participant ID

pid_csv = df[df[id_field] == pid]["env_sensor_filepath"][0]
print(f"{pid} full path: {pid_csv}")

In [None]:
env_path = data_root + pid_csv
print(env_path)

In [None]:
# this is not necessary as all files have the same number of header lines, but it future-proofs this notebook
with open(env_path, "r") as f:
    first_line = f.readline()  # expect "# header_lines: 45"
num_header_lines = int(first_line.split(":")[-1])
print(num_header_lines)

## read and explore the data

This example does not plot all of the data columns. Instead, it shows examples of 
 - plotting single columns. &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Examples: PM2.5, VOC
 - plotting columns together. &nbsp;Example: all of the spectral channels, color coded by wavelength

See the full list of EnvSensor columns for other data to explore.

In [None]:
df = pd.read_csv(env_path, parse_dates=['ts'], date_format = "%Y-%m-%d %H:%M:%S", skiprows=num_header_lines)
print(df.shape)

In [None]:
print(f"EnvSensor columns: {df.columns}")

In [None]:
df.describe()

## plot non-spectral data

In [None]:
yaxis_adjusts_to_data = False

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(8, 6))
fig.suptitle("Snapshot - plot 2 of the data columns\n\n")

df.plot.line(x="ts", y="pm2.5", ax=ax[0], legend=False)
if yaxis_adjusts_to_data:
    ax[0].set_title("PM2.5 - yaxis scales with data")
else:
    ax[0].set_title("PM2.5 - yaxis [0, 7000]")
    ax[0].set_ylim(0, 7000)
ax[0].tick_params(labelbottom=False)
x_axis_0 = ax[0].axes.get_xaxis()
x_label_0 = x_axis_0.get_label()
x_label_0.set_visible(False)

df.plot.line(x="ts", y="voc", ax=ax[1], legend=False)
if yaxis_adjusts_to_data:
    ax[1].set_title("VOC - yaxis scales with data")
else:
    ax[1].set_title("VOC - yaxis [0, 505]")
    ax[1].set_ylim(0, 505)

plt.tick_params(
    axis="x",  # changes apply to the x-axis
    which="both",  # both major and minor ticks are affected
    bottom=True,  # set ticks along the bottom edge on
    top=False,  # set ticks along the top edge off
    labelbottom=True,
)  # set labels along the bottom edge on

plt.xlabel("timestamp")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()

## plot spectral data

In [None]:
def create_spectral_plot(
    df, plot_ht=3, ax=None, add_legend=True, yaxis_adjusts_to_data=False, verbose=False
):

    if ax is None:
        fig, ax = plt.subplots(1, figsize=(10, plot_ht))
    elif type(ax) is plt_axes._axes.Axes:
        # 'matplotlib.axes._axes.Axes'
        fig = ax.get_figure()
    else:
        fig = ax.Axes.get_figure()

    color_dict = {
        "purple": "#800080",
        "navy": "#000080",
        "med blue": "#809FFF",
        "light blue": "#BFCFFF",
        "green": "#008000",
        "yellow": "#008000",
        "orange": "#FFA600",
        "red": "#FF0000",
        "brown": "#800000",
        "gray": "#BFBFBF",
    }

    chan_dict = {  # these may be wrong... only the hex colors match the named colors for sure
        "lch0": {"color": "purple", "label": "415 nm"},  # f1 - 415 nm  purple
        "lch1": {"color": "navy", "label": "445 nm"},  # f2 - 445 nm  navy
        "lch2": {"color": "med blue", "label": "480 nm"},  # f3 - 480 nm  med blue
        "lch3": {"color": "light blue", "label": "515 nm"},  # f4 - 515 nm  light blue
        # lch4 and lch5 are not used
        "lch6": {"color": "green", "label": "555 nm"},  # f5 - 555 nm  green
        "lch7": {"color": "yellow", "label": "590 nm"},  # f6 - 590 nm  yellow
        "lch8": {"color": "orange", "label": "630 nm"},  # f7 - 630 nm  orange
        "lch9": {"color": "red", "label": "680 nm"},  # f9 - 680 nm  red
        "lch10": {
            "color": "gray",
            "label": "All (no filter)",
        },  # clear - maybe gray or dashed? Si response, non-filtered
        "lch11": {
            "color": "brown",
            "label": "NIR 910 nm",
        },  # NIR - 910 nm  maybe brown?
        "ff": {"color": "gray", "label": "Flicker Hz"},  # flicker (dashed)
    }

    lchans = [
        "lch0",
        "lch1",
        "lch2",
        "lch3",
        "lch6",
        "lch7",
        "lch8",
        "lch9",
        "lch11",
        "lch10",
    ]
    for c in lchans:
        color_to_use = chan_dict[c]["color"]
        ret = df.plot.line(
            x="ts",
            y=c,
            ax=ax,
            color=color_dict[color_to_use],
            label=chan_dict[c]["label"],
            #  marker='*',style=True,legend=False
        )

    if not yaxis_adjusts_to_data:
        ax.set_ylim(0, 1)
    ax.tick_params(labelbottom=False)

    if add_legend is False:
        ax.get_legend().remove()

    plt.tight_layout()
    return fig

In [None]:
fig = create_spectral_plot(
    df, plot_ht=3, ax=None, add_legend=True, yaxis_adjusts_to_data=False, verbose=False
)

In [None]:
print("Done")