In [1]:
import pandas as pd
import numpy as np

### Overview

Compare output.txt with printed compartement numbers


1. number of infected equal for each timepoint -> done (failing)
2. Nobody is in schools/... at unreasonable time points (e.g. 3 o'clock in the morning) -> done (not failing, but sometimes unreasonable)

3. Infected severe are in hospital (ID 05) -> done (passed)
4. Infected critical are at ICU (ID 06) -> done (passed)
5. Death are at graveyard (ID 10) -> done (passed)

6. Time since transmission >= 0 for all infected/exposed -> is true if 1. is not failing
7. Search for maximal transmission value -> done
8. Time since transmission is either increasing over time or set to 0 if person is recovered : not systematically checked, but some individual agents passed

### Import data

In [2]:
output_path = '../../../output/output3.txt'

In [3]:
# get number of columns per row to identify max column number
with open(output_path, 'r') as temp_f:
    col_count = [ len(l.split(" ")) for l in temp_f.readlines() ]

In [4]:
max(col_count)

8462

In [5]:
# create dummy col names
column_names = [i for i in range(0, max(col_count))]

In [24]:
output_df = pd.read_csv(output_path, header=None, delimiter=" ", names=column_names, dtype={0: 'str'})

In [25]:
output_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461
0,1000,337,0.0,0,1.0,0.0,2.0,1.0,141.0,0.0,...,,,,,,,,,,
1,301,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,
2,302,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,
3,303,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,
4,304,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,


In [26]:
output_df.max()[1:].max()

883.0

### Helper function

In [27]:
location_row = output_df.iloc[34]

In [72]:
# do not use death compartement for this test here
output_df_wo_graveyard = output_df.loc[~(output_df.loc[0,]=="1000"),].copy()

In [73]:
def get_number_of_infections_per_timepoint_one_location(location_row):
    
    location_id = location_row[0]
    output_timesteps = int(location_row[1])
    
    df_res = pd.DataFrame(columns=["LocationID", "timepoint", "n_agents", "n_infected"])

    timestep_col_id = 2 # col id for the current timestep

    for t in range(1,output_timesteps+1):
        # number of agents is in column after timestep
        timepoint = location_row[timestep_col_id]

        if np.isnan(timepoint):
            #print("break")
            break
        n_agents = int(location_row[timestep_col_id + 1])

        # timestep_col_id, # agents, agent_ot1, transmission_time
        agent_id = [timestep_col_id + 1 + 1 + i*2 for i in range(0,n_agents)]
        transmission_time_id = [timestep_col_id + 1 + 2 + i*2 for i in range(0,n_agents)]

        # timepoint n_agents n_infected
        df_res.loc[len(df_res),] = [location_id, timepoint, n_agents, sum([1 for i in location_row[transmission_time_id] if i>=0])]

        timestep_col_id += 2 + n_agents*2
    
    return df_res

In [74]:
get_number_of_infections_per_timepoint_one_location(output_df_wo_graveyard.iloc[1])

Unnamed: 0,LocationID,timepoint,n_agents,n_infected
0,0302,0.0,0,0
1,0302,1.0,0,0
2,0302,2.0,0,0
3,0302,3.0,0,0
4,0302,4.0,0,0
...,...,...,...,...
332,0302,332.0,30,0
333,0302,333.0,6,0
334,0302,334.0,11,0
335,0302,335.0,12,0


In [75]:
def get_number_of_infection_per_timepoint(output_df):
    
    df_res = pd.DataFrame(columns=["LocationID", "timepoint", "n_agents", "n_infected"])
    
    for i in range(0,len(output_df)):
        location_row = output_df.iloc[i]
        df_res_row = get_number_of_infections_per_timepoint_one_location(location_row)
        df_res = pd.concat([df_res, df_res_row])
    
    return df_res

In [76]:
# runtime ~ 1-2 minutes
df_transformed = get_number_of_infection_per_timepoint(output_df_wo_graveyard)

In [81]:
df_transformed = df_transformed.astype({'n_agents': 'int', "n_infected": "int"})

In [108]:
df_transformed_w_graveyard = get_number_of_infection_per_timepoint(output_df)

In [109]:
df_transformed_w_graveyard = df_transformed_w_graveyard.astype({'n_agents': 'int', "n_infected": "int"})

# Evaluation

### Number of Infections per timepoint

In [82]:
n_days = 14

In [83]:
print("Number of infected people over all locations per hour:")
df_transformed.groupby("timepoint").sum()[['n_agents', 'n_infected']].loc[[i*24 for i in range(0,n_days)],:]

Number of infected people over all locations per hour:


Unnamed: 0_level_0,n_agents,n_infected
timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,884,8
24.0,883,5
48.0,883,5
72.0,883,5
96.0,883,5
120.0,883,0
144.0,883,0
168.0,883,0
192.0,883,0
216.0,883,0


### Nobady is in schools/... during unreasonable timepoints

In [84]:
# 1 -> School
# 2 -> Work
# 3 -> Social Event
# 4 -> Basic Shop

In [85]:
df_transformed["hour_of_day"] = df_transformed["timepoint"].apply(lambda x: int(x)%24)

In [117]:
df_transformed["Location_Type"] = df_transformed['LocationID'].apply(lambda x: x[0:2])

In [118]:
# school
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["01"])].copy()

In [119]:
len(df_sub["timepoint"].unique())

337

In [120]:
df_sub.groupby("hour_of_day").sum()[["n_agents", "n_infected"]]

Unnamed: 0_level_0,n_agents,n_infected
hour_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
5,0,0
6,0,0
7,153,0
8,352,0
9,484,0


In [121]:
# work
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["02"])].copy()

len(df_sub["timepoint"].unique())

337

In [122]:
df_sub.groupby("hour_of_day").sum()[["n_agents", "n_infected"]]

Unnamed: 0_level_0,n_agents,n_infected
hour_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
5,0,0
6,0,0
7,1609,5
8,3403,5
9,5017,5


In [123]:
# social event
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["03"])].copy()

len(df_sub["timepoint"].unique())

337

In [124]:
df_sub.groupby("hour_of_day").sum()[["n_agents", "n_infected"]]

Unnamed: 0_level_0,n_agents,n_infected
hour_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1
0,957,1
1,881,1
2,881,1
3,881,0
4,881,0
5,881,0
6,881,0
7,881,0
8,881,0
9,881,0


In [125]:
# basic shop
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["04"])].copy()

len(df_sub["timepoint"].unique())

337

In [126]:
df_sub.groupby("hour_of_day").sum()[["n_agents", "n_infected"]]

Unnamed: 0_level_0,n_agents,n_infected
hour_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
5,0,0
6,0,0
7,0,0
8,0,0
9,158,0


### Infections in hospital == infected severe?

In [127]:
# 5 -> Hospital

In [128]:
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["05"])].copy()

In [132]:
# calculate numer of infections in hospital
n_inf_hospital = df_sub.groupby("timepoint").sum()[['n_agents', 'n_infected']]

In [133]:
# subselect every 24h
n_inf_hospital.loc[[i*24 for i in range(0,n_days)],:]

Unnamed: 0_level_0,n_agents,n_infected
timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0,0
24.0,0,0
48.0,0,0
72.0,0,0
96.0,0,0
120.0,0,0
144.0,0,0
168.0,0,0
192.0,0,0
216.0,0,0


### Infections at ICU == infected critical?

In [134]:
# 6 -> Hospital
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["06"])].copy()

In [135]:
# calculate numer of infections in ICU
n_inf_ICU = df_sub.groupby("timepoint").sum()[['n_agents', 'n_infected']]

In [136]:
# subselect every 24h
n_inf_ICU.loc[[i*24 for i in range(0,n_days)],:]

Unnamed: 0_level_0,n_agents,n_infected
timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0,0
24.0,0,0
48.0,0,0
72.0,0,0
96.0,0,0
120.0,0,0
144.0,0,0
168.0,0,0
192.0,0,0
216.0,0,0


### Persons at graveyard == death?

In [137]:
df_transformed_w_graveyard["Location_Type"] = df_transformed_w_graveyard['LocationID'].apply(lambda x: x[0:2])

In [139]:
# 10 -> n_graveyard
df_sub = df_transformed_w_graveyard.loc[df_transformed_w_graveyard["Location_Type"].isin(["10"])].copy()

# calculate numer of infections in ICU
n_graveyard = df_sub.groupby("timepoint").sum()[['n_agents']]

In [140]:
# subselect every 24h
n_graveyard.loc[[i*24 for i in range(0,n_days)],:]

Unnamed: 0_level_0,n_agents
timepoint,Unnamed: 1_level_1
0.0,0
24.0,1
48.0,1
72.0,1
96.0,1
120.0,1
144.0,1
168.0,1
192.0,1
216.0,1


## Time since transmission

In [141]:
def tidy_row(location_row):

    location_id = location_row[0]
    output_timesteps = location_row[1]

    df_res = pd.DataFrame(columns=["LocationID", "timepoint", "n_agents", "agent_id", "time_since_transmission"])

    timestep_col_id = 2 # col id for the current timestep

    for t in range(1,output_timesteps+1):
        # number of agents is in column after timestep
        timepoint = location_row[timestep_col_id]

        if np.isnan(timepoint):
            #print("break")
            break
        n_agents = int(location_row[timestep_col_id + 1])

        # timestep_col_id, # agents, agent_ot1, transmission_time
        agent_ids = [timestep_col_id + 1 + 1 + i*2 for i in range(0,n_agents)]
        transmission_time_ids = [timestep_col_id + 1 + 2 + i*2 for i in range(0,n_agents)]

        for i in range(0,n_agents):
            agent_id = agent_ids[i]
            transmission_time_id = transmission_time_ids[i]
            df_res.loc[len(df_res),] = [location_id, timepoint, n_agents, location_row[agent_id], location_row[transmission_time_id]]

        timestep_col_id += 2 + n_agents*2
    return df_res

In [142]:
def tidy_output_df(output_df):
    
    df_res = pd.DataFrame(columns=["LocationID", "timepoint", "n_agents", "agent_id", "time_since_transmission"])
    
    for i in range(0,len(output_df)):
        location_row = output_df.iloc[i]
        df_res_row = tidy_row(location_row)
        df_res = pd.concat([df_res, df_res_row])
    
    return df_res

In [143]:
# takes ~3min
tidy_output = tidy_output_df(output_df)

### maximum timecourse value

In [147]:
tidy_output["time_since_transmission"].max()

107.0

In [148]:
tidy_output['time_since_transmission'].max()/24

4.458333333333333

In [149]:
tidy_output.sort_values(['agent_id','time_since_transmission'])

Unnamed: 0,LocationID,timepoint,n_agents,agent_id,time_since_transmission
107,0419,140.0,2,0.0,-1.0
121,0419,190.0,4,0.0,-1.0
172,0419,277.0,2,0.0,-1.0
105,0034,108.0,1,0.0,-1.0
106,0034,109.0,1,0.0,-1.0
...,...,...,...,...,...
984,00498,329.0,3,883.0,-1.0
992,00498,333.0,5,883.0,-1.0
997,00498,334.0,5,883.0,-1.0
1002,00498,335.0,5,883.0,-1.0


In [154]:
tidy_output.loc[(tidy_output["time_since_transmission"]>0),].head()

Unnamed: 0,LocationID,timepoint,n_agents,agent_id,time_since_transmission
3,306,21.0,4,841.0,21.0
10,306,22.0,6,841.0,22.0
387,307,96.0,5,550.0,96.0
392,307,97.0,5,550.0,97.0
397,307,98.0,5,550.0,98.0


In [160]:
agent_id = 841.0
df_sub = tidy_output[tidy_output["agent_id"]==agent_id].sort_values("timepoint")
max_time = df_sub["time_since_transmission"].max()

In [162]:
max_time

98.0

In [163]:
df_sub.iloc[90:105]

Unnamed: 0,LocationID,timepoint,n_agents,agent_id,time_since_transmission
266,482,90.0,4,841.0,90.0
270,482,91.0,4,841.0,91.0
274,482,92.0,4,841.0,92.0
278,482,93.0,4,841.0,93.0
282,482,94.0,3,841.0,94.0
285,482,95.0,4,841.0,95.0
289,482,96.0,4,841.0,96.0
293,482,97.0,4,841.0,97.0
297,482,98.0,4,841.0,98.0
301,482,99.0,4,841.0,-1.0
