# WAS Data Exploration

This notebook creates interactive visualization tools for exploring the repository of Whole Air Samples (WAS) cans from the NASA Student Airborne Research Program (SARP) field campaigns (2009-2023).

::::{grid}
:gutter: 3

:::{grid-item-card} Scatter Plot
For comparing two species
:::

:::{grid-item-card} Map
For viewing the spatial distribution of a species
:::

::::

## Setup

In [1]:
!pip install openpyxl -qq

In [10]:
import pandas as pd
import geopandas as gpd
import hvplot.pandas
import geoviews as gv
import holoviews as hv
import panel as on
hv.extension('bokeh')

In [3]:
pd.options.plotting.backend = 'hvplot'

In [5]:
# read data file
df = pd.read_excel('data/WAS_SARP_2009_2021_noground.xlsx', skipfooter=6)

In [6]:
# drop columns of all nan
df = df.dropna(axis=1, how='all')
# add a datetime column
df['datetime'] = pd.to_datetime(df.date.astype(str) + ' ' + df.OpenTime.astype(str))

In [11]:
df

Unnamed: 0,Snake#,Can#,Flight,Flight or Ground,Jday,date,OpenTime,CloseTime,Julian Open Time,Julian Closing Time,...,4-Ethyltoluene,2-Ethyltoluene,"1,3,5-Trimethylbenzene","1,2,4-Trimethylbenzene","1,2,3-Trimethylbenzene",alpha-Pinene,beta-Pinene,CO (ppbv).1,AltP_feet.1,datetime
0,2317,8198,1,Flight,202,2009-07-21,19:21:38,19:22:35,40015.80669,40015.80735,...,0.001000,0.001000,24.000000,92.000000,116.000000,0.001,0.001,,6151.0,2009-07-21 19:21:38
1,2302,7167,1,Flight,202,2009-07-21,19:24:00,19:24:57,40015.80833,40015.80899,...,4.000000,0.001000,32.000000,124.000000,171.000000,3.000,0.001,,3772.0,2009-07-21 19:24:00
2,2315,6354,1,Flight,202,2009-07-21,19:26:00,19:26:57,40015.80972,40015.81038,...,0.001000,0.001000,5.000000,9.000000,6.000000,0.001,0.001,,1600.0,2009-07-21 19:26:00
3,2303,7226,1,Flight,202,2009-07-21,19:29:15,19:30:10,40015.81198,40015.81262,...,0.001000,0.001000,8.000000,19.000000,19.000000,0.001,0.001,,1221.0,2009-07-21 19:29:15
4,2314,6269,1,Flight,202,2009-07-21,19:31:00,19:31:53,40015.81319,40015.81381,...,0.001000,0.001000,0.001000,9.000000,6.000000,0.001,5.000,,1220.0,2009-07-21 19:31:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3729,5610,6099,Flight 06,Flight,345,2021-12-11,00:39:01,00:39:22,44541.02709,44541.02734,...,0.001000,0.001000,0.001000,0.001000,0.001000,0.001,0.001,92.593962,4026.0,2021-12-11 00:39:01
3730,5623,9483,Flight 06,Flight,345,2021-12-11,00:42:02,00:42:26,44541.02919,44541.02947,...,0.001000,0.001000,0.001000,0.001000,0.001000,0.001,0.001,71.025390,11266.0,2021-12-11 00:42:02
3731,5608,7156,Flight 06,Flight,345,2021-12-11,00:44:00,00:44:48,44541.03056,44541.03111,...,0.001000,0.001000,0.001000,0.001000,0.001000,0.001,0.001,71.136102,11280.0,2021-12-11 00:44:00
3732,5609,6182,Flight 06,Flight,345,2021-12-11,00:46:00,00:46:25,44541.03194,44541.03223,...,5.370182,5.108404,5.246699,7.737759,7.506552,0.001,0.001,66.378233,11281.0,2021-12-11 00:46:00


In [12]:
# create a list of the numeric columns for use in selection widgets
numeric_cols = []
for index, value in df.dtypes.items():
    if value != 'object':
        numeric_cols.append(index)

Creating and processing geodataframe

In [13]:
geo_df = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), 
    crs='epsg:4326')

## Scatter Plot

In [9]:
### Fixed colorbar code

# x = pn.widgets.Select(name='x', options=numeric_cols, value='CH4 (ppmv)')
# y = pn.widgets.Select(name='y', options=numeric_cols, value='AltP_meters')
# color_by_select = pn.widgets.Select(name='color by', options=numeric_cols, value='CO2_ppm')

# color_min = pn.widgets.FloatInput(name='color_min', value=400, step=1e-1, start=0, end=5000)
# color_max = pn.widgets.FloatInput(name='color_max', value=500, step=1e-1, start=0, end=5000)

# def points(ds, x, y, colorby, color_min, color_max):
#     return ds.data.hvplot.points(
#         x=x, y=y, color=colorby, cmap='viridis', 
#         hover_cols=['datetime', 'index'], width=800, 
#         clim=(color_min, color_max),
#     )

# pn.Column(pn.Row(pn.Column(x, y, color_by_select),
#                  pn.Column(color_min, color_max)),
#           hv.Dataset(df).apply(points, x=x, y=y, 
#                                colorby=color_by_select, 
#                                color_min=color_min, 
#                                color_max=color_max))

In [14]:
# Widgets

x = pn.widgets.Select(name='x', options=numeric_cols, value='CH4 (ppmv)')
y = pn.widgets.Select(name='y', options=numeric_cols, value='AltP_meters')
color_by_select = pn.widgets.Select(name='color by', options=numeric_cols, value='CO2_ppm')

# Functions to compute min and max based on selected column
def get_color_min(colorby):
    return float(df[colorby].min())

def get_color_max(colorby):
    return float(df[colorby].max())

# Bind color_min and color_max widgets to color_by_select
color_min = pn.widgets.FloatInput(
    name='color_min',
    value=get_color_min(color_by_select.value),
    step=1e-1,
    start=0,
    end=5000
)
color_max = pn.widgets.FloatInput(
    name='color_max',
    value=get_color_max(color_by_select.value),
    step=1e-1,
    start=0,
    end=5000
)

# Update color_min and color_max when color_by_select changes
pn.bind(get_color_min, colorby=color_by_select, watch=True)
pn.bind(get_color_max, colorby=color_by_select, watch=True)

# Bind widget values to update when color_by_select changes
@pn.depends(color_by_select)
def update_color_min(colorby):
    color_min.value = get_color_min(colorby)
    return color_min

@pn.depends(color_by_select)
def update_color_max(colorby):
    color_max.value = get_color_max(colorby)
    return color_max

# Plotting function
def points(ds, x, y, colorby, color_min, color_max):
    return ds.data.hvplot.points(
        x=x, y=y, color=colorby, cmap='viridis',
        hover_cols=['datetime', 'index'], width=800,
        clim=(color_min, color_max),
    )

pn.Column(
    pn.Row(
        pn.Column(x, y, color_by_select),
        pn.Column(update_color_min, update_color_max)
    ),
    hv.Dataset(df).apply(points, x=x, y=y, colorby=color_by_select, 
                         color_min=color_min, color_max=color_max)
# Layout
# layout = pn.Column(
#     pn.Row(
#         pn.Column(x, y, color_by_select),
#         pn.Column(update_color_min, update_color_max)
#     ),
#     hv.Dataset(df).apply(points, x=x, y=y, colorby=color_by_select, 
#                          color_min=color_min, color_max=color_max)
)

# Display
# layout.servable(title="WAS Data Exploration App")

## Map

In [15]:
color = pn.widgets.Select(name='color', options=numeric_cols)  # df.columns.tolist())
float_input = pn.widgets.FloatInput(name='FloatInput', value=5., step=1e-1, start=0, end=1000)
size = pn.widgets.IntSlider(start=5, end=15, name='size')

poly_plot = gv.Points(geo_df[['geometry']].dropna()).opts(width=600, height=600, colorbar=True,
                                                                     toolbar='above', tools=['hover'],
                                                                    cmap='inferno', size=12)

gv.tile_sources.OSM * poly_plot