# Model Report

In [1]:
import warnings
from pathlib import Path
from time import time

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotnine as gg
import pymc3 as pm
import seaborn as sns
from src.command_line_interfaces import simulation_based_calibration_cli as sbc_cli
from src.context_managers import set_directory
from src.data_processing import common as dphelp
from src.modeling import pymc3_analysis as pmanal
from src.modeling import pymc3_sampling_api as pmapi
from src.modeling import sampling_pymc3_models as sampling
from src.modeling.sampling_pymc3_models import SamplingArguments
from src.modeling.simulation_based_calibration_helpers import SBCFileManager
from src.plot.color_pal import SeabornColor

notebook_tic = time()

warnings.simplefilter(action="ignore", category=UserWarning)

gg.theme_set(gg.theme_classic())
%config InlineBackend.figure_format = "retina"

RANDOM_SEED = 847
np.random.seed(RANDOM_SEED)

pymc3_cache_dir = Path("..", "models", "modeling_cache", "pymc3_model_cache")

Parameters for papermill:

- `MODEL`: which model was tested
- `SBC_RESULTS_DIR`: directory containing results of many rounds of SBC
- `NUM_SIMULATIONS`: the number of simiulations; will be used to check that all results are found

## Setup

### Papermill parameters

In [2]:
MODEL = ""
SBC_RESULTS_DIR = ""
NUM_SIMULATIONS = -1

In [3]:
# Parameters
MODEL = "crc_model_one"
SBC_RESULTS_DIR = "temp/crc_model_one"
NUM_SIMULATIONS = 5

### Prepare and validate papermill parameters

Build the model using the `MODEL` parameter.

In [4]:
ModelClass = sbc_cli.get_model_class(sbc_cli.ModelOption[MODEL])

Check values passed as the directory with results of the rounds of SBC.

In [5]:
sbc_results_dir = Path("../..", SBC_RESULTS_DIR)
assert sbc_results_dir.is_dir()
assert sbc_results_dir.exists()

Confirm that there is a positive number of simulations.

In [6]:
assert NUM_SIMULATIONS > 0

## Read in all results

In [7]:
for sbc_dir in sbc_results_dir.iterdir():
    sbc_fm = SBCFileManager(sbc_dir)
    if not sbc_fm.all_data_exists():
        raise Exception(f"Not all output from '{sbc_fm.dir.name}' exist.")
    res = sbc_fm.get_sbc_results()

In [8]:
res.posterior_summary

Unnamed: 0_level_0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
parameter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
μ_g,0.144,0.338,-0.519,0.731,0.011,0.008,966.0,902.0,
μ_α[0],0.571,0.319,0.040,1.232,0.010,0.007,1023.0,906.0,
μ_α[1],1.642,0.460,0.760,2.475,0.016,0.011,875.0,947.0,
μ_α[2],1.486,0.397,0.709,2.200,0.013,0.009,1009.0,963.0,
μ_α[3],-1.373,0.534,-2.309,-0.304,0.017,0.012,1037.0,852.0,
...,...,...,...,...,...,...,...,...,...
μ[146],1.477,0.236,1.039,1.906,0.008,0.005,941.0,944.0,
μ[147],1.365,0.227,0.957,1.800,0.007,0.005,923.0,941.0,
μ[148],2.071,0.233,1.614,2.505,0.007,0.005,1060.0,895.0,
μ[149],1.487,0.231,1.069,1.924,0.007,0.005,971.0,905.0,


---

In [9]:
notebook_toc = time()
print(f"execution time: {(notebook_toc - notebook_tic) / 60:.2f} minutes")

execution time: 0.03 minutes


In [10]:
%load_ext watermark
%watermark -d -u -v -iv -b -h -m

Last updated: 2021-04-01

Python implementation: CPython
Python version       : 3.9.2
IPython version      : 7.21.0

Compiler    : GCC 9.3.0
OS          : Linux
Release     : 3.10.0-1062.el7.x86_64
Machine     : x86_64
Processor   : x86_64
CPU cores   : 32
Architecture: 64bit

Hostname: compute-a-16-163.o2.rc.hms.harvard.edu

Git branch: simulation-based-calibration

matplotlib: 3.3.4
pandas    : 1.2.3
plotnine  : 0.7.1
numpy     : 1.20.1
pymc3     : 3.11.1
seaborn   : 0.11.1
arviz     : 0.11.2

