In [42]:
import sys

In [43]:
sys.path.append('..')

In [44]:
import numpy as np
import pandas as pd
import plotnine as p9
from tqdm.auto import tqdm
from mizani.breaks import date_breaks
from mizani.formatters import custom_format
from utils.process_based_model import run_model

In [45]:
p9.options.set_option('dpi', 150)
p9.theme_set(p9.theme_bw())

<plotnine.themes.theme_bw.theme_bw at 0x197d9ccfa50>

## Fitting and running the model for every location and variant

In [49]:
models = ['seasonal', 'constant', 'temperature']
regions = ['Lombardia']
region_periods = {#'Barcelona': ['1', '2', '3'],
                  'Lombardia': ['1', '2'],
                  #'Thuringen': ['1','2'],
                  #'Catalunya':['1', '2', '3']
                  }

models_map = {'constant': r'Constant $\beta$', 'temperature': r'$\beta(T)$',
              'seasonal': r'Seasonal $\beta$'}
periods_map = {'1': 'First Wave', '2': 'Second Wave', '3': 'Third Wave'}

In [None]:
for beta in tqdm(models, leave=False):
    for region in tqdm(regions, leave=False):
        for period in tqdm(region_periods[region], leave=False):
            params_path = f'../data/process_based_model/parameters/{beta}/{region}{period}.pkl'
            res_df = run_model(beta_type='temperature',
                               location=region,
                               params_path=params_path,
                               n_iter=20,
                               seed=23)
            res_df.to_csv(f'../results/tables/process_based_model/{region}_{period}_{beta}_.csv',
                         index=False)

### Loading model results

In [55]:
name_map = {'non_t_fit': 'constant', 'seasonal_fit': 'seasonal', 't_fit': 'temperature'}

In [63]:
#source_folder = '../results/tables/process_based_model/'
source_folder = '../data/process_based_model/pre_optimized_results'
model_dfs = []
for model in models:
    for region in regions:
        for period in region_periods[region]:


            model_dfs.append(
                pd.read_csv(f'{source_folder}/{region}_{period}_{model}.csv')
                .assign(model=models_map[model], region=region, period=periods_map[period])
            )
            
model_dfs = (pd.concat(model_dfs)
             .reset_index(drop=True)
             .rename(columns={'Date': 'date'})
             .assign(date=lambda dd: pd.to_datetime(dd.date))
            )

In [65]:
real_data_df = []
for region in regions:
    real_data_df.append(
        pd.read_csv(f'../data/process_based_model/training_data/{region}Data.csv')
        .assign(region=region)
    )
real_data_df = (pd.concat(real_data_df)
                .assign(date=lambda dd: pd.to_datetime(dd.Date))
                .drop(columns='Date')
               )

### Generating figures

In [66]:
full_df = model_dfs.merge(real_data_df, on=['date', 'region'])
full_df.columns = ['_'.join(c.lower().split(' ')) for c in full_df.columns]

In [68]:
full_df

Unnamed: 0,unnamed:_0,date,s,e,i,q,r,d,p,total_cases,...,model,region,period,confirmed,deaths,recovered,people_vaccinated,newcases,temperature,active
0,0,2021-12-01,8889711.9,34247.1,32281.6,31948.5,868430.0,34754.3,169200.6,935132.8,...,Seasonal $\beta$,Lombardia,First Wave,935047.0,34396.0,868430.0,8236002.0,2503.0,5.168486,32221.0
1,1,2021-12-02,6049947.0,71538.7,35869.4,27763.9,868459.4,41035.4,2965960.2,937258.7,...,Seasonal $\beta$,Lombardia,First Wave,937667.0,34413.0,868577.0,8239794.0,2620.0,5.202569,34677.0
2,2,2021-12-03,4530894.7,96342.7,42382.2,26434.0,868536.7,44703.2,4451280.5,939673.9,...,Seasonal $\beta$,Lombardia,First Wave,940476.0,34428.0,870664.0,8243555.0,2809.0,4.331253,35384.0
3,3,2021-12-04,3586938.5,115072.0,50661.8,26756.0,868657.9,47128.9,5365358.9,942542.8,...,Seasonal $\beta$,Lombardia,First Wave,943466.0,34447.0,872025.0,8247245.0,2990.0,3.332513,36994.0
4,4,2021-12-05,2917708.5,131224.6,60426.5,28437.1,868839.5,48873.8,6005064.0,946150.4,...,Seasonal $\beta$,Lombardia,First Wave,946094.0,34455.0,872791.0,8249686.0,2628.0,3.198350,38848.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1177,390,2022-12-27,1004825.2,105955.8,266789.2,72723.0,3896753.9,49246.7,4664280.2,4018723.6,...,$\beta(T)$,Lombardia,Second Wave,4052405.0,44593.0,3965068.0,8763439.0,812.0,7.487624,42744.0
1178,391,2022-12-28,1003151.1,106350.7,267190.7,72989.9,3902724.1,49246.7,4658920.8,4024960.7,...,$\beta(T)$,Lombardia,Second Wave,4056786.0,44671.0,3971172.0,8763461.0,4381.0,7.199748,40943.0
1179,392,2022-12-29,1001370.6,106838.0,267559.2,73208.4,3908753.2,49246.7,4653597.9,4031208.3,...,$\beta(T)$,Lombardia,Second Wave,4059723.0,44688.0,3975162.0,8763476.0,2937.0,7.623839,39873.0
1180,393,2022-12-30,999619.8,107244.0,267999.3,73491.7,3914711.4,49246.7,4648261.1,4037449.8,...,$\beta(T)$,Lombardia,Second Wave,4062838.0,44741.0,3980368.0,8763498.0,3115.0,7.888979,37729.0


In [71]:
full_df = (full_df
 .melt(['date', 'period', 'region', 'model'])
 .loc[lambda dd: dd.variable.isin(['q', 'r', 'd', 'total_cases', 'std_q', 'std_r', 'std_d', 
                                   'std_total_cases', 'active', 'deaths',
                                   'recovered', 'confirmed'])]
 .assign(variable_type=lambda dd: dd.variable.apply(lambda x:
                                                   'std' if 'std' in x else 'mean'))
 .assign(variable=lambda dd: dd.variable.str.split('std_').str[-1])
 .pivot(index=['region', 'date', 'period', 'model', 'variable'], columns='variable_type', values='value')
 .reset_index()
 .assign(prediction_type=lambda dd: dd.variable.apply(lambda x:
                                                'predicted' if x in ['q', 'd', 'r', 'total_cases']
                                                      else 'true'))
 .assign(variable=lambda dd: dd.variable.replace(
     {'q': 'active', 'd': 'deaths', 'r': 'recovered', 'total_cases': 'confirmed'}
 ))
  .rename(columns={'mean': 'value'})
           )

In [72]:
def human_format(num):
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'),
                         ['', 'k', 'M', 'B', 'T'][magnitude])

In [73]:
formatter = lambda x: [human_format(i) for i in x]

In [76]:
for region in ['Lombardia']:
    for i, variables in enumerate([['active', 'deaths', 'recovered'], ['confirmed']]):
        (full_df
          .loc[lambda dd: dd.region==region]
          .loc[lambda dd: dd.variable.isin(variables)]
          .pipe(lambda dd: p9.ggplot(dd)
               + p9.aes('date', 'value', color='variable', group='variable + prediction_type',
                        fill='variable')
               + p9.geom_line(p9.aes(linetype='prediction_type'), size=.8)
               + p9.scale_fill_manual(['#D13E3F', 'black', '#5CBA61'])
               + p9.scale_color_manual(['#D13E3F', 'black', '#5CBA61'])
               + p9.geom_ribbon(p9.aes(ymin='value - std', ymax='value + std'), alpha=.3,
                                color=None, data=dd.loc[dd.prediction_type=='predicted'])
               + p9.geom_line(p9.aes(y='value'), linetype='dashed', size=.8)
               + p9.scale_x_datetime(breaks=date_breaks('50 days'))
               + p9.scale_y_continuous(labels=formatter)
               + p9.facet_wrap(['model', 'period'], scales='free', ncol=dd.period.nunique())
               + p9.theme(dpi=125, figure_size=(2.5 * dd.period.nunique(), 5),
                          subplots_adjust={'wspace':0.25, 'hspace': 0.35},
                          axis_text_x=p9.element_text(size=7))
               + p9.labs(x='Date', y='Individuals in compartment', title=region,
                         fill='Compartment', color='Compartment', linetype='')
               )
        ).draw().savefig(f'../results/figures/process_based_model/{region}_model.pdf',
                         bbox_inches='tight')



In [77]:
populations = {#'Barcelona': 2227000,
             #'Catalunya': 7780479,
             'Lombardia': 10060574
             #'Thuringen': 2392000
             }

In [78]:
(full_df
 .loc[lambda dd: dd.variable=='confirmed']
 .groupby(['region', 'period', 'model', 'prediction_type'])
 .apply(lambda dd: dd.assign(new_cases=lambda dd: 
                dd.value.diff().rolling(window=7, center=True).mean() /
                             populations[dd.region.unique()[0]] * 100_000,
        new_cases_max=(dd.value + dd['std']).diff().rolling(window=7, center=True).mean() /
                             populations[dd.region.unique()[0]] * 100_000,
        new_cases_min=(dd.value - dd['std']).diff().rolling(window=7, center=True).mean() /
                             populations[dd.region.unique()[0]] * 100_000
                            )
       )
 .reset_index(drop=True)
 .replace({'predicted': 'Predicted new cases',
           'true': 'True new cases'})
  .pipe(lambda dd: p9.ggplot(dd) 
        + p9.aes('date', 'new_cases', group='period + prediction_type', color='prediction_type',
                 alpha='prediction_type') 
        + p9.geom_line(size=1)
        + p9.geom_ribbon(p9.aes(ymax='new_cases_max', ymin='new_cases_min'), alpha=.5,
                         fill='#5CBA61', color=None)
        + p9.scale_color_manual(['#5CBA61', 'black'])
        + p9.scale_alpha_manual([1, .7])
        + p9.facet_wrap(['region', 'model'], ncol=3, scales='free_y')
        + p9.theme(figure_size=(8, 6), dpi=150, subplots_adjust={'wspace': 0.15},
                   legend_position='bottom')
        + p9.scale_x_datetime(breaks=date_breaks('6 months'))
        + p9.labs(x='Date', y='New cases per 100,000', color='')
        + p9.guides(alpha=False)

       ).draw().savefig('../results/figures/process_based_model/model_fits.pdf',
                        bbox_inches='tight')

)



In [82]:
(full_df
 .loc[lambda dd: dd.variable=='confirmed']
 .groupby(['region', 'period', 'model', 'prediction_type'])
  .apply(lambda dd: dd.assign(new_cases=lambda dd: 
                 dd.value.diff().rolling(window=7, center=True).mean() 
                              / populations[dd.region.unique()[0]] * 100_000))
  .reset_index(drop=True)
  .loc[lambda dd: dd.new_cases.notna()]
  .pivot(index=['region', 'date', 'period', 'model', 'variable'], columns='prediction_type', values='new_cases')
  .assign(residuals=lambda dd: (dd['predicted'] - dd['true']).pow(2))
  .reset_index()
  .groupby(['region', 'model', 'period'], as_index=False)
  .residuals.mean()
  .pipe(lambda dd: p9.ggplot(dd) 
        + p9.aes('model', 'residuals', fill='model')
        + p9.geom_col()
        + p9.facet_wrap(['region', 'period'], ncol=3, scales='free_x')
        + p9.scale_fill_manual(['#5CBA61', 'gray', 'gray'])
        + p9.coord_flip()
        + p9.theme(subplots_adjust={'hspace': 0.45}, figure_size=(6, 8))
        + p9.labs(x='Mean Squared Error', y='')
        + p9.guides(fill=False)
).draw().savefig('../results/figures/process_based_model/model_residuals.pdf')
)

