# Analyze simulation results based on country-wide network
* underlying network: entire country = Austria
* analyze effects (lost patients, average displacement and free capacity) in single states and on country level
* patients steps restricted to max-dist from starting doctor
* SIM tries 'max_dist_trials' times to find a doc within reach, then it just chooses another one

In [1]:
import pandas as pd
import numpy as np
import os
from os.path import join

In [2]:
def analyze_results(iterations,shocks,remov,alpha,max_steps,threshold,kd,max_dist,max_dist_trials,min_pats):
    '''   
    Set parameters and load datafiles with doc info 
    * normalize lost patients in states
    * calculate all unique patients, unique docs per speciality..
    * define parameters
    '''
    
    #list of all specialities 'IM','KI','PSY','ORTR','RAD','DER','URO','HNO','CH','NEU','AU','GGH','AM'
    doctors = list(['IM','KI','PSY','ORTR','RAD','DER','URO','HNO','CH','NEU','AU','GGH','AM'])
    states = {'state':['Burgenland','Kärnten','Niederösterreich','Oberösterreich','Salzburg','Steiermark',
                       'Tirol','Vorarlberg','Wien'],
             'state_id':[1,2,3,4,5,6,7,8,9]}
    network = 'Österreich' # what is the underlying network for doc connections 


    # information selection criteria
    patient_type = 'total'
    capacity_type = 'hour-based'
    timeframe = 'quarterly'


    ### dataframe with total patient numbers and unique doc numbers per state and per specialty
    N = pd.DataFrame.from_dict(states)
    N.set_index('state',inplace=True)

    for specialization in doctors:
        N[specialization+'_total'] = 0
        N[specialization+'_cap_total'] = 0
        N[specialization+'_unique_docs'] = 0
        for bez in N.index:
            dinfo = pd.read_csv('data/doctor_info_bez='+network+'_spec='+specialization+'_ptype='+patient_type+
                                '_ctype='+capacity_type+'_tf='+timeframe+'_th='+str(threshold)+'.csv',
                                usecols=list(['number_of_patients','capacity','gemeinde']))
            dinfo.gemeinde = dinfo.gemeinde.astype(str)
            N.loc[bez,specialization+'_total'] = dinfo[dinfo.gemeinde.str.startswith(str(N.loc[bez,'state_id']))].number_of_patients.sum()
            N.loc[bez,specialization+'_cap_total'] = dinfo[dinfo.gemeinde.str.startswith(str(N.loc[bez,'state_id']))].capacity.sum()
            N.loc[bez,specialization+'_unique_docs'] = len(dinfo[dinfo.gemeinde.str.startswith(str(N.loc[bez,'state_id']))])

    N.to_excel('results/states_doc_info_{}_{}_{}.xlsx'.format(patient_type, capacity_type, timeframe))

    '''
    Check results for single states based on entire network
    * save all simulation data in one dataframe for seaborn plots
    * sum up all lost patients from previous shocks (only possible on country-level, missing info on patients residence)
    '''

    dta = pd.DataFrame(columns=['run','shock','avg_displacement','N_lost_patients','N_lost_patients_summed',
                                'free_capacity_country','free_capacity_state','disconnected_capacity',
                                'state','specialty','lost_patients_country','incorrect_displacements',
                                'N_lost_patients_state_summed'])

    ### for all specialists and states, add info on LP and FC
    for doc in doctors:  
        ### read in the data file
        sim_params = 'patient_dynamics_iter{}_shocks{}_remove{}_alpha{}_maxs{}_th{}_kd{}_maxdist{}_maxdisttrials{}_minpats{}_{}_Final_.csv'\
                    .format(iterations, shocks, remov, alpha, max_steps, threshold, kd,max_dist,max_dist_trials,min_pats,doc)
        dta_load = pd.read_csv('results/'+sim_params)
        
        ### change SIM run number (9 states * total_#_docs)
        dta_load = dta_load.reset_index(drop=True)
        for r in range(0,iterations):
            dta_load.loc[r*9*int(dta_load.shock.max()):(r+1)*9*int(dta_load.shock.max()),'run'] = r+1
            
        for bez in N.state_id:
            dta2 = dta_load[dta_load.state==bez].copy()
            dta2['state'] = N[N.state_id == bez].index.item()
            dta2['specialty'] = doc
            dta2['lost_patients_country'] = dta2.N_lost_patients_summed/N.loc[:,doc+'_total'].sum()*100
            dta2['lost_patients_state'] = dta2.N_lost_patients_state_summed/N.loc[N.state_id==bez,doc+'_total'].sum()*100
            dta2['free_capacity_country'] = dta2.free_capacity_country/N.loc[:,doc+'_cap_total'].sum()*100
            dta2['free_capacity_state'] = dta2.free_capacity_state/N.loc[N.state_id==bez,
                                                                                 doc+'_cap_total'].item()*100
            dta = pd.concat([dta,dta2])


    dta.reset_index(drop=True,inplace=True)

    ### add number of unique doctors per specialty
    dta['num_unique_docs'] = 0
    for doc in doctors:
        dta.loc[(dta.specialty == doc),'num_unique_docs'] = N.loc[:,doc+'_unique_docs'].sum() 

    ### the % of removed doctors based on total number
    dta['perc_docs_removed'] = dta.shock / dta.num_unique_docs * 100
        
    
    ### calculate remaining free capacity filling up
    dta['remaining_FC_filled'] = 0
    
    for doc in dta.specialty.unique():  
        for bez in dta.state.unique():
            
            if dta.loc[(dta.state==bez)&(dta.specialty==doc),'free_capacity_state'].max() > 0:
                maxv = dta.loc[(dta.state==bez)&(dta.specialty==doc),'free_capacity_state'].max()
                dta.loc[(dta.state==bez)&(dta.specialty==doc),'remaining_FC_filled'] = \
                            dta.loc[(dta.state==bez)&(dta.specialty==doc),'free_capacity_state'].values * (100/maxv)
            else:
                dta.loc[(dta.state==bez)&(dta.specialty==doc),'remaining_FC_filled'] = np.nan
            
                
    dta.remaining_FC_filled = 100-dta.remaining_FC_filled
    return dta

## Set parameters of SIM to analyze

In [3]:
iterations = 100     # number of sim iterations
shocks = 5000        # number of maximum doctors to remove
remov = 1            # number of docs removed in each step
alpha = 0.0            # teleportation probability (try 0 or 0.0 if error - formatting issue)
max_steps = 10       # max steps for patients before becoming lost
kd = 1               # keep disconnected doctors 0/1
threshold = 0.9      # capacity threshold for calculation
max_dist = 100       # maximum travelling distance
max_dist_trials = 10 # sim trials to find doc within max distance before using some other doc  
min_pats = 2         # minimum number of patients for valid connection in adj.matrix

In [4]:
### run function for selected setting
res = analyze_results(iterations,shocks,remov,alpha,max_steps,threshold,kd,max_dist,
                    max_dist_trials,min_pats)

In [5]:
res.to_csv('results/DF_results_Final.csv',index=False)