Interactive CC Feature KDE Plots

In [None]:
# Base Packages
import numpy as np
import pandas as pd
import holoviews as hv
import hvplot.pandas
from bokeh.io import output_notebook

output_notebook()
hv.extension('bokeh')

import panel as pn
pn.extension(throttled=True)

In [None]:
# Import TEVA output files
ccs = pd.read_excel('ccs_2DOC_CAMELS.xlsx', sheet_name='CCEA_Low')
dnfs = pd.read_excel('dnfs_2DOC_CAMELS.xlsx', sheet_name='DNFEA_Low')
# Import observation data
data = pd.read_csv('test_observations.csv')
# Grab feature names from the CC output
feature_names = list(data.columns)

In [None]:
# Functions
def flatten(xss):
    '''
    Flattens a list of lists.
    '''
    return np.array([x for xs in xss for x in xs])


def parse_dnf(dnfs):
    '''
    Creates a list of the ccs composing each dnf.
    '''
    all_ccs = []
    for i in range(0, len(dnfs['mask'])):
        item = dnfs.iloc[i].iloc[12:]
        item_ccs = item[item==1].index.values.tolist()
        item_ccs = list(map(lambda j: j[3:], item_ccs))
        all_ccs.append(item_ccs)
    return all_ccs


def parse_cc(ccs):
    '''
    Creates a list of the features composing each cc.
    '''
    cc_features = []
    for i in range(0, len(ccs)):
        cc_values = ccs.iloc[i].iloc[12:]
        cc_values.fillna(value = 0, method=None, inplace=True)
        cc_values = dict(cc_values[cc_values != 0])
        cc_features.append(list(cc_values.keys()))
    return cc_features


# def fitness_contours(n_grid, dnfs, ccs):
#     '''
#     Interpolate fitness values within plot domain using linear triangular interpolator.
    
#         n_grid      number of grid points
#         dnfs        TEVA dnf output excel file
#         ccs         TEVA cc output excel file

#     Returns:
#         x           x coordinates of mesh
#         y           y coordinates of mesh
#         z           interpolated fitness masked array

#     Can be passed into the Bokeh contour renderer. Example:
#                 # fitness contours
#                 # x, y, z = fitness_contours(1000, dnfs, ccs)
#     '''
#     x = np.linspace(0, 1, n_grid)
#     y = np.linspace(0, 1, n_grid)
#     xplot, yplot = np.meshgrid(x, y)
#     triangles = tri.Triangulation(pd.concat([dnfs['cov'],ccs['cov']]),
#                                   pd.concat([dnfs['ppv'], ccs['ppv']]))
#     fitness = pd.concat([dnfs['fitness'], ccs['fitness']])
#     interpolator = tri.LinearTriInterpolator(triangles, fitness)
#     z = interpolator(xplot, yplot)

#     return (x, y, z)


In [None]:
# ccs composing each dnf
all_ccs = flatten(parse_dnf(dnfs))

# features composing each cc
cc_features = parse_cc(ccs)

# flatten lists
# all_ccs_flat = flatten(all_ccs)
# cc_features_flat = flatten(cc_features)

# unique lists
unique_ccs = (np.unique(all_ccs))
# unique_features = (np.unique(cc_features_flat))

In [None]:
# Function for updating plots
def feature_plotter(selected_cc):
    fig = []
    for i in range(len(cc_features[selected_cc])):
        fig.append(data[cc_features[selected_cc][i]].dropna().hvplot.kde(height=200, width=300, hover=False))
        fig[i].opts(shared_axes=False, toolbar=None)
    return pn.FlexBox(objects=fig)

# Widget for selecting CC to plot
dropdown_options = list(np.sort(unique_ccs.astype(int)))
cc_select = pn.widgets.Select(options=dropdown_options, width=75, name='CC', description='Select a CC to view features.')

# Bind function to widget
dynamic_subplots = pn.bind(feature_plotter, cc_select)

# Create layout
app = pn.Row(cc_select, dynamic_subplots)

app.show()


In [None]:
# scratch
print(app)