## Displaying ED predictions

- an end in itself
- template up how to build a dash display

Load standard libraries plus those specified in poetry.toml

In [None]:
import os
import pandas as pd
import numpy as np
import json
from pathlib import Path
import datetime
import warnings

from sqlalchemy import create_engine

import plotly.graph_objects as go
import plotly.express as px
from ridgeplot import ridgeplot

Run this file as if it was in the 'app' folder so that it's easier to migrate your code later on

In [None]:
# check that you're starting from the notebooks folder
# else subsequent relative imports won't work
# moreover this means that code here will behave wrt to relative imports in a similar manner to app code
app_path = Path("__file__").parent.resolve().parent / 'app' 
assert app_path.exists()
os.chdir(app_path)
print(Path.cwd())

Now import the 'app' and configuration settings

In [None]:
import app
from config.config import ConfigFactory
conf = ConfigFactory.factory()

Run a query against the live data if in production

In [None]:
print(conf.DEV)

In [None]:
CSV_FILE = 'ed_agg_30d_sample.csv'
SQL_FILE = 'ed_predictor_agg.sql'

if conf.DEV:
    df = pd.read_csv(
        f"../data/secret/{CSV_FILE}"
    )
else:
    # Environment variables stored in conf.SECRETS
    # Construct the PostgreSQL connection
    uds_host = conf.SECRETS["EMAP_DB_HOST"]
    uds_name = conf.SECRETS["EMAP_DB_NAME"]
    uds_port = conf.SECRETS["EMAP_DB_PORT"]
    uds_user = conf.SECRETS["EMAP_DB_USER"]
    uds_passwd = conf.SECRETS["EMAP_DB_PASSWORD"]

    emapdb_engine = create_engine(
        f"postgresql://{uds_user}:{uds_passwd}@{uds_host}:{uds_port}/{uds_name}"
    )
    q = Path(f"../notebooks/sql/{SQL_FILE}").read_text()
    df = pd.read_sql_query(q, emapdb_engine)
    df.to_csv(f"../data/secret/{CSV_FILE}", index=False)


Prepare the data 
- prepare the 'hour' the model ran so you can work compare 8h prediction models from the same time of day
- convert date to time offset in days

In [None]:
# Round to handle timezones (and round to nearest even hour)
df['extract_dttm'] = df['extract_dttm'].apply(lambda x: datetime.datetime(x.year, x.month, x.day, 2*(x.hour//2)))

df['hour'] = df.extract_dttm.round('1H').dt.hour
df['dow'] = df.extract_dttm.dt.dayofweek # Monday = 0, Sunday = 6
df['date'] = df.extract_dttm.round('1D').dt.date
df['days'] = (df.extract_dttm.max() - df.extract_dttm).round('1D').dt.days.astype(int)

# df = df.loc[:, ['extract_dttm', 'days', 'date', 'dow', 'hour', 'num_adm_pred', 'probs']]
df['probs'] = df['probs'].fillna(value=0)
df['probs'] = df['probs'].round(decimals=5)

In [None]:
df

### Simple plot of probability distribution for one forecast
- return `df0`: a dataframe with the most recent forecast for all patients
- then prepare a simple plot of the probability distribution

In [None]:
df0 = df.loc[(df.extract_dttm == df.extract_dttm.max())]

In [None]:
df0.sort_values(by='num_adm_pred', inplace=True)

In [None]:
go.Figure(
    data=go.Scatter(x=df0.num_adm_pred, y=df0.probs, mode='lines+markers')
)

### Plot multiple forecasts from the same time the last XX ish days

In [None]:
#dfr = df recent (same hour, same day of the week)
dfr = df.loc[((df.hour==df0.iloc[0].hour) & (df.dow==df0.iloc[0].dow)),['days',  'num_adm_pred', 'probs']]


Using the [ridgeplot](https://github.com/tpvasconcelos/ridgeplot) package which in turn also uses plotly for this. We need to prepare the data as a series of (2,n) arrays for this where 'n' is the same for all densities.

Prepare a list of days (should be multiples of 7 into the past)

In [None]:
days = dfr.days.unique()
days = days[np.argsort(-days)]

In [None]:
num_adm_max = int(dfr.num_adm_pred.max())
skeleton = pd.DataFrame(dict(num_adm_pred=range(num_adm_max)))
ll = []
labels = []
for day in days:
    tt = dfr.loc[dfr.days==day].drop(['days'],axis=1)
    tt = pd.merge(skeleton, tt, how='left')
    tt = tt.reset_index(drop=True)
    tt.fillna(value=0, inplace=True)
    if sum(tt.probs) == 0:
        continue
    tt = tt.loc[:40,]
    tt = tt.values.transpose()
    ll.append(tt)
    labels.append(day)
res = np.asarray(ll)

In [None]:
fig = ridgeplot(densities=res, 
                labels=labels,
                colorscale='portland',
                colormode='mean-minmax',
                spacing=1/5,
               )

In [None]:
fig.update_layout(showlegend=False)
fig.update_layout(autosize=False, width=800, height=400)
fig.update_layout(xaxis_title='Inpatient bed demand',
                 yaxis_title=f"Predictions from days past<br>(i.e. up to {max(labels)} days ago)")
fig.update_layout(template='plotly_white')
fig.show()