# Model comparison (single step) - Dash/Plotly

This notebook is meant as a visualization of the comparison of the different model performances. The comparison metric is the error in predicting incidence a `t+7` (as we have done for single step models in the other notebooks). The predictions shown for multistep models are just the ones for `t+7`.

To run this notebook you will need to install node.js and the plotly extension for jupyterlab:
```bash
curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
sudo apt-get install -y nodejs
jupyter labextension install jupyterlab-plotly
pip install jupyterlab-dash
```

In [1]:
import sys
sys.path.insert(0, "/home/iheredia/ignacio/covid/covid-dl/src")
from paths import PATHS
from data.base import make_splits, get_data, single_X_y

In [2]:
import pathlib

import matplotlib
import matplotlib.pylab as plt
import numpy as np
import pandas as pd

import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from jupyter_dash import JupyterDash
import plotly.express as px
import plotly.graph_objects as go

In [3]:
dataset = get_data(trend=True)
splits = make_splits(dataset,
                     norm=False)
X, y = single_X_y(splits)

training_end = splits['val']['X'].index[0][0]  # end of training date
validation_end = splits['test']['X'].index[0][0]  # end of validation date

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [4]:
# Load predictions
pred = []
for f in pathlib.Path('single_step').glob('pred_*'):
    pred.append(pd.read_csv(f, index_col=['date', 'province']))
pred = pd.concat(pred, axis=1)

# TSNE embedding

In [5]:
# from sklearn.manifold import TSNE

# X_emb = TSNE(n_components=2).fit_transform(X)
# X_emb = pd.DataFrame(X_emb, index=X.index, columns=['x', 'y'])
# X_emb.to_csv("single_step/tsne.csv")

X_emb = pd.read_csv("single_step/tsne.csv", index_col=['date', 'province'])

In [6]:
provinces = X_emb.index.get_level_values(1)
dates_dt = X_emb.index.get_level_values(0)
dates_dt = pd.to_datetime(dates_dt)

options = ['dates', 'training set', 'provinces', 'incidence 7 (t+7)'] + [f'{c} error' for c in pred.columns]
hnames = X.index.map(' '.join)
join_df = pd.concat([X, y, pred], axis=1)
hdata = {f'{c}': join_df[c].map('{:.2f}'.format) for c in join_df.columns}

In [7]:
app = JupyterDash(__name__)

app.layout = html.Div([
    html.H1("TSNE embedding"),
    
    dcc.Dropdown(
        id='dropdown',
        clearable=False,
        value='incidence 7 (t+7)',
        options=[{'label': i, 'value': i} for i in options]),
   
    dcc.Graph(id='graph',
              style={'width': '100vh',
                     'height': '100vh'}),

])

@app.callback(
    Output('graph', 'figure'),
    [Input('dropdown', 'value')]
)
def update_figure(var):
    common = {'x': X_emb['x'],
              'y': X_emb['y'],
              'opacity': 0.7, 
              'hover_name': hnames,
              'hover_data': hdata,
              'template': 'seaborn',
             }
    
    if var=='dates':
        fig = px.scatter(color=(dates_dt - dates_dt[0]).days,
                         color_continuous_scale='plasma',
                         **common)

    elif var=='training set':
        fig = px.scatter(color=(dates <  training_end),
                         **common)
        
    elif var=='provinces':
        fig = px.scatter(color=provinces,
                         **common)

    elif var=='incidence 7 (t+7)':
        fig = px.scatter(color=y['incidence 7 (t+7)'],
                         color_continuous_scale='RdYlGn_r',
                         **common)
        
    elif var.endswith(' error'):
        c = var.split(' error')[0]
        fig = px.scatter(color=np.abs(y['incidence 7 (t+7)'] - pred[c]),
                         color_continuous_scale='matter',
                         **common)

    else:
        raise Exception('Missing key')
        
    fig.layout.uirevision = True  # keep zoom levels between changes
    
    return fig

app.run_server(mode='inline', port=8070)
# app.run_server(mode='external', port=8070)  # bigger window

In [8]:
# Compare points
with pd.option_context('display.float_format', '{:.2f}'.format):
    print(pd.concat([X, y, pred], axis=1).loc[[('2021-01-02','Segovia'),
                                               ('2020-12-24','Soria')]].T)

date                        2021-01-02    2020-12-24
province                       Segovia         Soria
external risk           10130522716.00 5385685238.00
external risk (acc)               0.08         -0.08
external risk (vel)               0.13         -0.04
flux intra               1137668472.00 1057751920.00
flux intra (acc)                 -0.01         -0.02
flux intra (vel)                 -0.00         -0.03
incidence 7                     187.00         30.00
incidence 7 (acc)                -0.14         -0.03
incidence 7 (vel)                 0.08          0.17
incidence 7 (t+7)               417.00         34.00
Gradient Boost                  252.00         63.00
Feedforward                     272.00         68.00
Feedforward (multistep)         262.00         59.00


# Province time series

In [9]:
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("Province trend"),

    dcc.Dropdown(
        id='dropdown',
        clearable=False,
        value=provinces[0],
        options=[{'label': p, 'value': p} for p in provinces.unique()]),
    
    dcc.Graph(id='graph'),
])

@app.callback(
    Output('graph', 'figure'),
    [Input('dropdown', 'value')]
)
def update_figure(province):
    dfx = X.xs(province, level=1, drop_level=True)
    dfy = y.xs(province, level=1, drop_level=True)
    dfp = pred.xs(province, level=1, drop_level=True)
    
    fig = go.Figure(layout={'template': 'seaborn'})
    fig.add_trace(go.Scatter(x=dfx.index,
                             y=dfx['incidence 7'],
                             name='incidence 7'))
    fig.add_trace(go.Scatter(x=dfy.index,
                             y=dfy['incidence 7 (t+7)'],
                             name='incidence 7 (t+7)'))
    for c in dfp.columns:
        fig.add_trace(go.Scatter(x=dfp.index,
                                 y=dfp[c],
                                 name=c))
    fig.add_vline(x=training_end, line_width=3, line_dash='dash', line_color='black')
    fig.update_layout(hovermode='x unified')


    return fig

app.run_server(mode='inline', port=8075)
# app.run_server(mode='external', port=8075)  # bigger

# Provinces comparison

In [10]:
diff = pred.subtract(y['incidence 7 (t+7)'], axis='index').abs()
dates = diff.index.get_level_values(0)
dranges = {'train': dates < training_end, 
           'val': (dates >= training_end) & (dates < validation_end),
           'test': dates >= validation_end,
           'val+test': dates >= training_end}

In [11]:
# All model at the same time
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("Results by provinces"),
    
    dcc.Checklist(
        id='checklist',
        options=[{'label': 'logscale', 'value': 'logscale'},
                 {'label': 'sort', 'value': 'sort'},],
        value=[],
    ),
    
    dcc.Dropdown(
        id='dropdown',
        clearable=False,
        value='val+test',
        options=[{'label': p, 'value': p} for p in dranges.keys()]),
    
    dcc.Graph(id='graph',
              style={'width': '100vh',
                     'height': '100vh'}),
])

@app.callback(
    Output('graph', 'figure'),
    [Input('dropdown', 'value')],
    [Input('checklist', 'value')]
)
def update_figure(k, options):
    dft = diff[dranges[k]]
    dft = dft.unstack().mean(axis=0).unstack(0)
    if 'sort' in options:
        args = dft.mean(axis=1).argsort()
        dft = dft.iloc[args]
    dft = dft.append(dft.iloc[0, :])  # repeat first element to close plot
    
    fig = go.Figure()
    for c in dft.columns:
        fig.add_trace(go.Scatterpolar(
            r = dft[c],
            theta = dft.index,
            name=c,
            fill='tonext'
        ))
    
    fig.layout.template = 'seaborn'
    fig.layout.title = 'Error in predicted incidence (smaller is better)'
    if 'logscale' in options:
        fig.layout.polar.radialaxis.type = "log"
    
    return fig

app.run_server(mode='inline', port=8072)
# app.run_server(mode='external', port=8072)  # bigger

In [18]:
# Compare two models
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("Results by provinces"),
    
    dcc.Checklist(
        id='options',
        options=[{'label': 'logscale', 'value': 'logscale'}],
        value=[],
    ),
    
    dcc.Dropdown(
        id='dataset',
        clearable=False,
        value='val+test',
        options=[{'label': p, 'value': p} for p in dranges.keys()]),
    
    dcc.Dropdown(
        id='model1',
        clearable=False,
        value=pred.columns[0],
        options=[{'label': p, 'value': p} for p in pred.columns]),
    
    dcc.Dropdown(
        id='model2',
        clearable=False,
        value=pred.columns[1],
        options=[{'label': p, 'value': p} for p in pred.columns]),
    
    dcc.Graph(id='graph',
              style={'width': '100vh',
                     'height': '100vh'}),
])

@app.callback(
    Output('graph', 'figure'),
    [Input('dataset', 'value')],
    [Input('model1', 'value')],
    [Input('model2', 'value')],
    [Input('options', 'value')]
)
def update_figure(k, m0, m1, options):

    # Select dates
    dft = diff[dranges[k]]
    dft = dft.unstack().mean(axis=0).unstack(0)

    # Compare columns and sort
    order = dft[m0] - dft[m1]
    args = order.argsort()
    dft = dft.iloc[args]

    fig = go.Figure()

    # Add fancy coloring
    tmp = pd.DataFrame(0, index=dft.index, columns=[0, 1])
    tmp[0].update(dft[m1][order[args] < 0])    
    tmp[1].update(dft[m0][order[args] >= 0])    

    fig.add_trace(go.Scatterpolar(
        r = tmp[0],
        theta = tmp.index,
        text = f'{m0} wins',
        hoverinfo = 'text',
        line = {'color': 'rgba(0,0,0,0.)'},   
        showlegend=False,
        fill='toself',
        fillcolor='rgba(26,150,65,0.5)',
    ))
    fig.add_trace(go.Scatterpolar(
        r = tmp[1],
        theta = tmp.index,
        name = f'{m0} losses',
        hoverinfo = 'text',
        line = {'color': 'rgba(0,0,0,0.)'},
        showlegend=False,
        fill='toself',
        fillcolor='rgba(202,16,16,0.5)',
        connectgaps=False
    ))
    fig.add_trace(go.Scatterpolar(
        r = dft[[m0, m1]].min(axis=1),
        theta = dft.index,
        name = '', 
        line = {'color': 'rgba(0,0,0,0.)'},
        showlegend=False,
        fill='toself',
        fillcolor='rgba(211,211,211,1.)',
    ))

    # Add the proper traces
    colors = ['rgba(23, 102, 171, 1)', 'rgba(152, 61, 145, 1)']
    for i, modelname in enumerate([m0, m1]):
        fig.add_trace(go.Scatterpolar(
            r = dft[modelname],
            theta = dft.index,
            name=modelname,
            line = {'color': colors[i]},
            hoverinfo='skip'
        ))

    fig.layout.template = 'seaborn'
    fig.layout.title = 'Error in predicted incidence (smaller is better)'
    if 'logscale' in options:
        fig.layout.polar.radialaxis.type = "log"
    
    return fig

app.run_server(mode='inline', port=8071)
# app.run_server(mode='external', port=8071)  # bigger