# SEIRX model runs for prevention measures investigation in schools

In [1]:
import pandas as pd
import numpy as np
from os.path import join
import os
import sys
import json

from scseirx import analysis_functions as af

currentdir = os.getcwd()
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir) 

import data_creation_functions as dcf

# parallelisation functionality
from multiprocess import Pool
import psutil
from tqdm import tqdm

## Simulation settings

In [2]:
# basic prevention measures in place. Additional measures will be added
# throughout the measure screening simulations
with open('params/intervention_screening_delta_measures.json', 'r') as fp:
    measures = json.load(fp)
# simulation parameters, specifically the
# - base transmission risk (calibrated from household transmissions)
# - subclinical transmission modifier (literature value)
# - exposure duration, time until symtpoms and infection duration (lit. values)
# - age symptom discount (fit to empirical observations)
# - age transmission discount (from the calibration)
# - weights of transmission risks for intermediate & far contacts (calibration)
with open('params/intervention_screening_delta_simulation_parameters.json', 'r') as fp:
    simulation_params = json.load(fp)
# characteristics (# classes, # students / class, # teachers) of the "average" 
# school, depending on school type. These characteristics were determined in 
# interviews with Austrian teachers and from statistics about Austrian schools 
# (year 2017/18, page 10: https://www.bmbwf.gv.at/Themen/schule/schulsystem/gd.html)
# NOTE: "students" indicates the number of students per class
with open('params/intervention_screening_delta_school_characteristics.json', 'r') as fp:
    school_characteristics = json.load(fp)

## Simulation function

In [3]:
def run(params):
    '''
    Runs an ensemble of simulations and collects observable statistics. To be 
    run in parallel on many workers. Note: I/O paths and the number of runs per 
    ensemble hare hard coded here, because I only want to pass the parameter 
    values that are being screened in the simulation run to the function via the
    parallel processing interface.
    
    Parameters:
    -----------
    param_list : iterable
        Iterable that contains the values for the parameters test_type, 
        index_case, e_screen_range and r_screen_range that are passed to the
        simulation.
        
    Returns:
    --------
    row : dictionary
        Dictionary of the ensemble statistics of the observables.
    '''    

    # extract the simulation parameters from the parameter list
    N_runs, school_type, index_case, ttype, s_screen_interval, t_screen_interval,\
        student_mask, teacher_mask, half_classes, ventilation_mod = params
    
    try:
        os.mkdir(join(dst, school_type))
    except FileExistsError:
        pass

    # run the ensemble with the given parameter combination and school type
    row = dcf.run_ensemble(N_runs, school_type, measures,\
            simulation_params, school_characteristics, contact_network_src,\
            dst, index_case, ttype, s_screen_interval, t_screen_interval,\
            student_mask, teacher_mask, half_classes, ventilation_mod)
    
    row['school_type'] = school_type
    row['index_case'] = index_case
    row['test_type'] = ttype
    row['student_screen_interval'] = s_screen_interval
    row['teacher_screen_interval'] = t_screen_interval
    row['student_mask'] = student_mask
    row['teacher_mask'] = teacher_mask
    row['half_classes'] = half_classes
    row['ventilation_mod'] = ventilation_mod
    
    return row

## Single measures

### Screening parameters

In [4]:
# number of runs in the ensemble (set to a low number so runs complete quickly
# for demonstration purposes)
N_runs = 500
# different school types (only simulate for primary schools for demonstration
# purposes)
school_types = ['primary', 'primary_dc', 'lower_secondary',
                'lower_secondary_dc', 'upper_secondary', 'secondary']
# load the other screening parameters from file
screening_params = pd.read_csv(join('screening_params', 'single_measures.csv'))

params_single = [(N_runs, st, 
           row['index_case'],
           'same_day_antigen',
           dcf.format_none_column(row['s_screen_interval']),
           dcf.format_none_column(row['t_screen_interval']),
           row['s_mask'],
           row['t_mask'], 
           row['half_classes'],
           row['ventilation_modification']) \
           for st in school_types \
           for i, row in screening_params.iterrows()]

print('there are {} different parameter combinations'\
      .format(len(params_single)))

there are 108 different parameter combinations


### Run simulations

In [7]:
%%time
# paths for data I/O
contact_network_src = '../../../data/contact_networks/representative_schools'
dst = '../../../data/intervention_measures_delta/simulation_results'

number_of_cores = psutil.cpu_count(logical=True)
pool = Pool(number_of_cores)

results_single = pd.DataFrame()
for ensmbl_results in tqdm(pool.imap_unordered(func=run,
                        iterable=params_single), total=len(params_single)):
        results_single = results_single.append(ensmbl_results)

# turn off your parallel workers 
pool.close()
    
results_single = results_single.reset_index(drop=True)
index_cols = ['school_type', 'index_case', 'test_type',
              'student_screen_interval', 'teacher_screen_interval',
              'student_mask', 'teacher_mask', 'half_classes',
              'ventilation_mod']
other_cols = [c for c in results_single.columns if c not in index_cols]
results_single = results_single[index_cols + other_cols]

results_single.to_csv(join(dst,'intervention_measures_single_{}.csv'\
                   .format(N_runs)), index=False)
results_single.head(3)

100%|██████████| 108/108 [8:30:29<00:00, 283.60s/it]   


CPU times: user 4.97 s, sys: 1.12 s, total: 6.09 s
Wall time: 8h 30min 30s


Unnamed: 0,school_type,index_case,test_type,student_screen_interval,teacher_screen_interval,student_mask,teacher_mask,half_classes,ventilation_mod,N_diagnostic_tests,...,run,seed,student_family_member_transmissions,student_student_transmissions,student_teacher_transmissions,teacher_student_transmissions,teacher_teacher_transmissions,tests_per_day_per_agent,transmissions,undetected_infections
0,primary,student,same_day_antigen,,,False,False,True,1.0,0.0,...,1.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,6.0,0.0
1,primary,student,same_day_antigen,,,False,False,True,1.0,0.0,...,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,primary,student,same_day_antigen,,,False,False,True,1.0,15.0,...,3.0,3.0,8.0,3.0,3.0,14.0,4.0,0.001158,38.0,0.0


## Measure packages

### Screening parameters

In [19]:
# number of runs in the ensemble (set to a low number so runs complete quickly
# for demonstration purposes)
N_runs = 500
# different school types (only simulate for primary schools for demonstration
# purposes)
school_types = ['primary', 'primary_dc', 'lower_secondary',
                'lower_secondary_dc', 'upper_secondary', 'secondary']
school_types.reverse()
# load the other screening parameters from file
screening_params = pd.read_csv(join('screening_params', 'measure_packages.csv'))

params_packages = [(N_runs, st, 
           row['index_case'],
           'same_day_antigen',
           dcf.format_none_column(row['s_screen_interval']),
           dcf.format_none_column(row['t_screen_interval']),
           row['s_mask'],
           row['t_mask'], 
           row['half_classes'],
           row['ventilation_modification']) \
           for st in school_types \
           for i, row in screening_params.iterrows()]

print('there are {} different parameter combinations'\
      .format(len(params_packages)))

there are 120 different parameter combinations


### Run simulations

In [18]:
%%time
# paths for data I/O
contact_network_src = '../../../data/contact_networks/representative_schools'
dst = '../../../data/intervention_measures_delta/simulation_results'

number_of_cores = psutil.cpu_count(logical=True) - 2
pool = Pool(number_of_cores)

results_packages = pd.DataFrame()
for ensmbl_results in tqdm(pool.imap_unordered(func=run,
                iterable=params_packages), total=len(params_packages)):
        results_packages = results_packages.append(ensmbl_results)

# turn off your parallel workers 
pool.close()
    
results_packages = results_packages.reset_index(drop=True)
index_cols = ['school_type', 'index_case', 'test_type',
              'student_screen_interval', 'teacher_screen_interval',
              'student_mask', 'teacher_mask', 'half_classes',
              'ventilation_mod']
other_cols = [c for c in results_packages.columns if c not in index_cols]
results_packages = results_packages[index_cols + other_cols]

results_packages.to_csv(join(dst,'intervention_measures_packages_{}_.csv'\
                   .format(N_runs)), index=False)
results_packages.head(3)

100%|██████████| 36/36 [1:21:09<00:00, 135.27s/it]


CPU times: user 1.11 s, sys: 184 ms, total: 1.3 s
Wall time: 1h 21min 10s


Unnamed: 0,school_type,index_case,test_type,student_screen_interval,teacher_screen_interval,student_mask,teacher_mask,half_classes,ventilation_mod,N_diagnostic_tests,...,run,seed,student_family_member_transmissions,student_student_transmissions,student_teacher_transmissions,teacher_student_transmissions,teacher_teacher_transmissions,tests_per_day_per_agent,transmissions,undetected_infections
0,lower_secondary_dc,student,same_day_antigen,,,False,True,False,0.36,1.0,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.000372,1.0,0.0
1,lower_secondary_dc,student,same_day_antigen,,,False,True,False,0.36,1.0,...,2.0,2.0,3.0,0.0,0.0,0.0,0.0,0.000372,3.0,0.0
2,lower_secondary_dc,student,same_day_antigen,,,False,True,False,0.36,0.0,...,3.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


## Aggregate results

In [20]:
src = '../../../data/intervention_measures_delta/simulation_results'
dst = src
for stype in school_types:
    print(stype)
    data = dcf.get_data(stype, src)
    data['measure'] = np.nan
    dcf.set_individual_measures(data)
    dcf.set_measure_packages(data)
    data.to_csv(join(dst, '{}_combined_ensembles.csv'.format(stype)), index=False)

secondary
upper_secondary
lower_secondary_dc
lower_secondary
primary_dc
primary


## Extract observables

In [21]:
dst = '../../../data/intervention_measures_delta/simulation_results'

turnovers = {'same':0, 'one':1, 'two':2, 'three':3}
bmap = {True:'T', False:'F'}

params = params_single + params_packages

for stype in school_types:
    print(stype)
    spath_ensmbl = join(dst,'{}'.format(stype))
    
    observables = pd.DataFrame()
    for N_runs, pstype, index_case, ttype, s_screen_interval, t_screen_interval, \
        student_mask, teacher_mask, half_classes, ventilation_mod in params:
        
        if pstype != stype:
            continue
            
        turnover, _, test = ttype.split('_')
        turnover = turnovers[turnover]
        
        measure_string = '{}_test-{}_turnover-{}_index-{}_tf-{}_sf-{}_tmask-{}'\
        .format(stype, test, turnover, index_case[0], t_screen_interval,
                s_screen_interval, bmap[teacher_mask]) +\
                '_smask-{}_half-{}_vent-{}'\
        .format(bmap[student_mask], bmap[half_classes], ventilation_mod)
        
        ensmbl = pd.read_csv(join(spath_ensmbl, measure_string + '.csv'))
        ensmbl = ensmbl.drop(columns=['Unnamed: 0'])
        
        row = {'test_type':test,
               'turnover':turnover,
               'index_case':index_case,
               'student_screen_interval':s_screen_interval,
               'teacher_screen_interval':t_screen_interval,
               'student_mask':student_mask,
               'teacher_mask':teacher_mask,
               'half_classes':half_classes,
               'ventilation_modification':ventilation_mod}
        
        N = len(ensmbl)
        ensmbl = ensmbl[ensmbl['infected_agents'] > 0]
        N_red = len(ensmbl)
        row.update({'no_outbreak':(N - N_red) / N})
        for col in ensmbl.columns:
            row.update(af.get_statistics(ensmbl, col))
        observables = observables.append(row, ignore_index=True)

    # calculate the number of tests per day and agent in the school
    observables['N_tests_per_day_per_agent'] = \
    (observables['N_diagnostic_tests_median'] + observables['N_preventive_tests_median']) /\
    observables['duration_median'] / observables['N_school_agents_median']
    
    screen_cols = ['test_type', 'turnover', 'index_case', 'student_screen_interval',
            'teacher_screen_interval', 'student_mask', 'teacher_mask',
            'half_classes', 'ventilation_modification']

    other_cols = [c for c in observables.columns if c not in screen_cols]
    observables = observables[screen_cols + other_cols]
    
    for col in ['infected_teachers_median', 'infected_students_median', 
                        'infected_family_members_median',
                        'infected_teachers_0.90', 'infected_students_0.90', 
                        'infected_family_members_0.90']:
        observables[col] = observables[col].round(0).astype(int)
    
    observables.to_csv(join(dst, '{}_observables'.format(stype) + '.csv'))

secondary
upper_secondary
lower_secondary_dc
lower_secondary
primary_dc
primary
