In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

In [None]:
df = pd.read_csv("test_output.csv", index_col=0)

In [None]:
df = df.loc[~df.index.str.contains("Unnamed")]

In [None]:
df.shape

In [None]:
df.head()

In [None]:
def list_val_default(list_, default=0):
    return list_[1] if len(list_) > 1 else default

In [None]:
day_n = df.index.str.split(".").map(lambda l: list_val_default(l))

In [None]:
date = df.index.str.split(".").map(lambda x: x[0])

In [None]:
df.index = pd.to_datetime(date) + day_n.map(lambda t: pd.Timedelta(hours= 4 * int(t)))

In [None]:
metric_names = df.columns.str.split("  ").map(lambda l: l[0]).to_series().reset_index(drop=True)

In [None]:
metric_ranges = df.columns.str.split("  ").map(lambda l: list_val_default(l, np.nan)).to_series().reset_index(drop=True)

### units and ranges

In [None]:
dfm = pd.concat([metric_names, metric_ranges], axis=1)

In [None]:
dfm.columns = ["metric", "range"]
dfm.set_index("metric", inplace=True)
dfm = dfm["range"]

In [None]:
dfm = dfm[~dfm.index.duplicated()]

In [None]:
dfm = dfm[~dfm.str.contains('|'.join(("Not", "NEG", "TRACE", "Non", "NOT", "Negative", "Normal", "OCC")), regex=True).astype(bool)]

In [None]:
dfm_lists = dfm.dropna().str.split(" ").dropna()

In [None]:
def parse_metric_range(elem_list):
    metric_dict = dict()
    len_ = len(elem_list)
    if len_ == 1:
        metric_dict["units"] = elem_list[-1]
    elif len_ == 2:
        metric_dict["units"] = elem_list[-1]
        val = elem_list[0]
        for comparator, col in {"<=": "max", ">=": "min", "<": "max", ">": "min"}.items():
            if val.startswith(comparator):
                metric_dict[col] = float(val[len(comparator):])
                break
    elif len_ >= 3:
        metric_dict["min"] = elem_list[0]
        metric_dict["max"] = elem_list[2]
        if "-" not in elem_list:
            return parse_metric_range([elem_list[0], " ".join(elem_list[1:])])
        if len_ == 4:
            metric_dict["units"] = elem_list[-1]
        
    return metric_dict

In [None]:
dfm = pd.DataFrame(dict(dfm_lists.map(parse_metric_range))).T

In [None]:
dfm["min"] = dfm["min"].str.replace('|'.join((",", "_x")), "", regex=True).astype(float)
dfm["max"] = dfm["max"].str.replace('|'.join((",", "_x")), "", regex=True).astype(float)

### clean names

In [None]:
df.columns = df.columns.str.split("  ").map(lambda l: l[0])

In [None]:
df.columns = df.columns.str.lower()
dfm.index = dfm.index.str.lower()

In [None]:
df.columns = df.columns.str.replace(" ", "_")
dfm.index = dfm.index.str.replace(" ", "_")

In [None]:
dfl = df.select_dtypes(np.float64).reset_index().melt(id_vars="index").dropna()

In [None]:
dfl

In [None]:
dfn = df.select_dtypes(np.float64)

In [None]:
dfn.shape

In [None]:
dfn = dfn[dfn.index > "2021-1-1"]

In [None]:
dfn.shape

In [None]:
dfn = dfn.loc[:,~dfn.columns.duplicated()]

In [None]:
MIN_VALS = 5

In [None]:
cols_lt_min = dfn.isnull().sum(axis = 0).sort_values() > len(dfn) - MIN_VALS
cols_lt_min.value_counts()

In [None]:
dfn = dfn[dfn.columns[~cols_lt_min]]

In [None]:
dfn = dfn.interpolate(limit_area="inside")

In [None]:
dfn = dfn.sort_index()

In [None]:
COLS = 3

In [None]:
from math import ceil

In [None]:
rows = ceil(len(dfn.columns) / COLS)

In [None]:
titles = dfn.columns.map(lambda col: col + " " + str(dfm.loc[col]["units"]) if col in dfm.index else col)

In [None]:
titles = titles.str.replace("_x", "")

In [None]:
titles

In [None]:
dfn.columns

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

EXTREMA_COLORS = {"min": "orange", "max": "red"}

fig = make_subplots(
    rows=rows, cols=COLS,
    shared_xaxes=True,
    vertical_spacing=0.01,
    subplot_titles=titles,
)
for i, col in enumerate(dfn.columns):
    j = i // COLS + 1
    i = i % COLS + 1
    fig.add_trace(
        go.Scatter(x=dfn.index, y=dfn[col], line_shape='linear'),
        row=j,
        col=i
    )
    if col in dfm.index:
        for extrema in ["min", "max"]:
            lim = dfm[extrema][col]
            if not np.isnan(lim):
                fig.add_hline(y=lim, line_width=3, line_color=EXTREMA_COLORS[extrema], row=j, col=i)

In [None]:
fig.write_html("ehr_1_19_2021.html")

In [None]:
fig.update_layout(
    height=300 * rows,
    width=500 * COLS,
    title_text="Stacked Subplots with Shared X-Axes"
)
fig.update_xaxes(matches='x')
fig.show()

In [None]:
df.select_dtypes("O").dropna(how="all")