# Generate the Dataset

We want to predict possible future evolutions of the pandemic knowing the parameters of the simulator which more or less have been able to describe the trend of the real data for the past year.
These parameters have been computed in the previous notebook.

In [None]:
import json
import pandas as pd
import covasim as cv
import seaborn as sns
from pyDOE import lhs
from src.data import get_regional_data
from src.interventions import get_sampling_interventions

with open('../res/parameters.json', 'r') as json_file:
  j = json.load(json_file)

initial_params = j['initial_params']
intervention_params = j['intervention_params']
df = get_regional_data(4.46e6 / (initial_params['pop_size'] * initial_params['pop_scale']))

## Latin-Hypercube Sampling

* beta
* pop_infected
* init_zone
* actuated_zone
* num_tests

TODO: Description

In [None]:
samples = lhs(n=11, samples=5)
samples = pd.DataFrame(samples)
for i in range(11):
    samples[i] = samples[i].map(lambda v: int(4 * v)).map({0: 'W', 1: 'Y', 2: 'O', 3: 'R'})
samples

We will consider a shorter time interval, in particular, around a month centered in today.

In [None]:
time_interval = 14 # days
result_cols = ['n_severe', 'n_critical', 'cum_diagnoses', 'cum_deaths']
sim_days = range(1, time_interval * 2 + 1)
data_cols = [col + str(day) for day in sim_days for col in result_cols]

In [None]:
# 251 = (2020-11-01) - (2020-02-24)
initial_params['n_days'] = 251 + samples.shape[1] * time_interval

data = []
for idx, s in samples.iterrows():
    intervs = get_sampling_interventions(s, intervention_params , time_interval)
    sim = cv.Sim(pars={**initial_params, 'rand_seed': idx}, interventions=intervs, datafile=df)
    sim.run()
    # raw = [result.values for result in sim.results[result_cols]]
    # raw = [item for daily in zip(*raw) for item in daily]
    # row = pd.Series(raw[-30:])
    # data.append(row)
    print(sim)

In [None]:
pd.options.display.max_columns = 120

df = pd.concat(data, axis=1).transpose()
df.columns = data_cols
df

Finally, we can collect the results in a dataframe.

In [None]:
#dataset = pd.???
#dataset.to_csv('res/dataset.csv')
#dataset
