In [2]:
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd

In [3]:
py.init_notebook_mode(connected=True)

In [4]:
plotly.__version__

'4.5.2'

## Using files from MoriaOutputCSV dir

In [5]:
household_locations_df = pd.read_csv('MoriaOutputCSV/households_location.txt', names=['x', 'y'])
household_locations_df.head()

Unnamed: 0,x,y
0,0.447482,0.512702
1,0.850074,0.218818
2,0.755039,0.165309
3,0.542193,0.346503
4,0.364121,0.555194


In [6]:
population_df = pd.read_csv('MoriaOutputCSV/population.txt', names=['household', 'disease', 'dsymptom', 'daycount', 'new_asymp', 'age', 'gender', 'chronic', 'wanderer'])
population_df.head()

Unnamed: 0,household,disease,dsymptom,daycount,new_asymp,age,gender,chronic,wanderer
0,0.0,0.0,8.235834,0.0,0.0,0.8,1.0,0.0,0.0
1,0.0,0.0,3.450011,0.0,1.0,32.7,0.0,0.0,0.0
2,0.0,0.0,6.129842,0.0,0.0,4.7,1.0,0.0,0.0
3,0.0,0.0,7.256631,0.0,0.0,4.7,0.0,0.0,0.0
4,0.0,0.0,5.106579,0.0,0.0,2.1,1.0,0.0,0.0


In [7]:
track_states_df = pd.read_csv('MoriaOutputCSV/track_states.txt', names=['susceptible', 'exposed', 'presymptomatic', 'symptomatic', 'mild', 'severe', 'recovered', 'qua_susceptible', 'qua_exposed', 'qua_presymptomatic', 'qua_symptomatic', 'qua_mild', 'qua_severe', 'qua_recovered'])
track_states_df.head()

Unnamed: 0,susceptible,exposed,presymptomatic,symptomatic,mild,severe,recovered,qua_susceptible,qua_exposed,qua_presymptomatic,qua_symptomatic,qua_mild,qua_severe,qua_recovered
0,18699.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,18699.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,18699.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,18694.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,18694.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
scatter_plot = go.Figure(data=go.Scatter(x=household_locations_df['x'], y=household_locations_df['y'], mode='markers'))
scatter_plot.layout.title.text = 'Household Locations'
scatter_plot.layout.xaxis.title = 'Latitude'
scatter_plot.layout.yaxis.title = 'Longitude'
scatter_plot.show()

In [9]:
# getting number of unique households in population data
len(set(population_df['household']))

3423

In [10]:
# getting number of households from household locations data
len(household_locations_df)

3423

clearly, the two data sets can be joined based on the household column

In [11]:
# computing mean age per household
population_df.groupby('household').mean()['age']

household
0.0        7.616667
1.0       26.700000
2.0       32.786667
3.0       27.354545
4.0       16.750000
            ...    
3418.0    21.850000
3419.0    68.900000
3420.0    23.800000
3421.0    11.700000
3422.0    17.633333
Name: age, Length: 3423, dtype: float64

In [12]:
population_df.groupby(['daycount', 'household']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,disease,dsymptom,new_asymp,age,gender,chronic,wanderer
daycount,household,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0,0.0,0.0,36.836832,1.0,45.7,4.0,0.0,1.0
0.0,1.0,0.0,22.954173,1.0,106.8,1.0,0.0,1.0
0.0,2.0,0.0,109.943000,2.0,491.8,8.0,0.0,2.0
0.0,3.0,0.0,68.441448,1.0,300.9,5.0,0.0,0.0
0.0,4.0,0.0,46.778029,0.0,134.0,6.0,0.0,0.0
...,...,...,...,...,...,...,...,...
40.0,2501.0,9.0,10.348686,0.0,58.8,1.0,0.0,0.0
40.0,2734.0,9.0,10.214927,0.0,2.7,0.0,0.0,0.0
40.0,3417.0,9.0,9.104623,0.0,51.9,0.0,0.0,0.0
42.0,1164.0,22.0,11.942816,1.0,37.6,1.0,0.0,0.0


In [13]:
population_df_grouped_by_day = population_df.groupby(['daycount'], as_index=False).sum()
population_df_grouped_by_day.head()

Unnamed: 0,daycount,household,disease,dsymptom,new_asymp,age,gender,chronic,wanderer
0,0.0,25238279.0,49.0,118640.486571,3326.0,472803.3,9966.0,271.0,896.0
1,1.0,35132.0,51.0,102.523718,3.0,397.6,11.0,1.0,1.0
2,2.0,25346.0,49.0,112.273319,2.0,463.4,11.0,0.0,1.0
3,3.0,21715.0,22.0,44.765947,2.0,190.7,5.0,0.0,0.0
4,4.0,20114.0,65.0,87.607167,6.0,276.3,5.0,0.0,0.0


In [14]:
# interesting columns are disease, new_asymp, chronic
# new_asymp seems to need cumulative sum
new_asymp = list(population_df_grouped_by_day['new_asymp'])
for i in range(1, len(new_asymp)):
    new_asymp[i] = new_asymp[i] + new_asymp[i-1]
population_df_grouped_by_day['total_asymp'] = new_asymp

In [15]:
population_df_grouped_by_day.head()

Unnamed: 0,daycount,household,disease,dsymptom,new_asymp,age,gender,chronic,wanderer,total_asymp
0,0.0,25238279.0,49.0,118640.486571,3326.0,472803.3,9966.0,271.0,896.0,3326.0
1,1.0,35132.0,51.0,102.523718,3.0,397.6,11.0,1.0,1.0,3329.0
2,2.0,25346.0,49.0,112.273319,2.0,463.4,11.0,0.0,1.0,3331.0
3,3.0,21715.0,22.0,44.765947,2.0,190.7,5.0,0.0,0.0,3333.0
4,4.0,20114.0,65.0,87.607167,6.0,276.3,5.0,0.0,0.0,3339.0


In [16]:
day_counts = [int(d) for d in population_df_grouped_by_day['daycount']]

In [17]:
categories_to_plot = ['disease', 'total_asymp', 'chronic']
line_plot = go.Figure()
for category in categories_to_plot:
    line_plot.add_trace(go.Scatter(x=day_counts, y=population_df_grouped_by_day[category],
                    mode='lines+markers',
                    name=category,
                    hovertemplate='%{y}'))
line_plot.layout.xaxis.title = 'Days'
line_plot.layout.hovermode = 'x'
line_plot.layout.title = 'Disease Progress in Households'
line_plot.show()

## Using files from ABM_model_data_for_graphs

In [18]:
foodpoints_locations = pd.read_csv('ABM_model_data_for_graphs/foodpoints_location.csv')
foodpoints_locations.head()

Unnamed: 0.1,Unnamed: 0,foodpoint_x,foodpoint_y
0,0,0.166667,0.166667
1,1,0.333333,0.333333
2,2,0.5,0.5
3,3,0.666667,0.666667
4,4,0.833333,0.833333


In [19]:
households_locations = pd.read_csv('ABM_model_data_for_graphs/households_location.csv')
households_locations.head()

Unnamed: 0.1,Unnamed: 0,hh_id,hh_x,hh_y,contagious_count
0,0,0.0,0.795731,0.42246,0.0
1,1,1.0,0.333313,0.322147,0.0
2,2,2.0,0.383055,0.225983,0.0
3,3,3.0,0.558613,0.665344,0.0
4,4,4.0,0.230392,0.303943,0.0


In [20]:
set(households_locations['contagious_count'])

{0.0}

contagious_count is 0.0 for all households

In [21]:
toilets_locations = pd.read_csv('ABM_model_data_for_graphs/toilets_location.csv')
toilets_locations.head()

Unnamed: 0.1,Unnamed: 0,toilet_x,toilet_y
0,0,0.083333,0.083333
1,1,0.166667,0.166667
2,2,0.25,0.25
3,3,0.333333,0.333333
4,4,0.416667,0.416667


In [23]:
track_states = pd.read_csv('ABM_model_data_for_graphs/track_states.csv')
track_states.head()  # now has the day column

Unnamed: 0.1,Unnamed: 0,susceptible_tl,exposed_tl,presymptomatic_tl,symptomatic_tl,mild_tl,severe_tl,recovered_tl,qua_susceptible_tl,qua_exposed_tl,qua_presymptomatic_tl,qua_symptomatic_tl,qua_mild_tl,qua_severe_tl,qua_recovered_tl,day
0,0,18699.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,1,18690.0,9.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,2,18684.0,13.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3,3,18672.0,22.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
4,4,18668.0,16.0,2.0,0.0,0.0,0.0,0.0,0.0,5.0,7.0,2.0,0.0,0.0,0.0,4


In [30]:
population = pd.read_csv('ABM_model_data_for_graphs/population.csv')
population.head()

Unnamed: 0.1,Unnamed: 0,household,disease,dsymptom,daycount,new_asymp,age,gender,chronic,wanderer
0,0,0.0,0.0,3.846869,0.0,0.0,51.6,0.0,1.0,1.0
1,1,0.0,0.0,7.44184,0.0,1.0,50.0,1.0,0.0,0.0
2,2,0.0,0.0,7.628137,0.0,0.0,23.5,0.0,0.0,0.0
3,3,0.0,0.0,4.24301,0.0,0.0,18.2,1.0,0.0,0.0
4,4,0.0,0.0,9.921428,0.0,0.0,39.6,1.0,0.0,0.0


In [24]:
# re-plotting household locations with foodpoint locations and toilet locations
locations_scatter_plot = go.Figure()
location_names = ['household', 'food point', 'toilet']

households = [households_locations['hh_x'], households_locations['hh_y']]
foodpoints = [foodpoints_locations['foodpoint_x'], foodpoints_locations['foodpoint_y']]
toilets = [toilets_locations['toilet_x'], toilets_locations['toilet_y']]

for i, location_list in enumerate([households, foodpoints, toilets]):
    locations_scatter_plot.add_trace(go.Scatter(x=location_list[0], 
                                                y=location_list[1], 
                                                mode='markers', 
                                                name=location_names[i]))
locations_scatter_plot.layout.title = 'Locations Scatter Plot for Spatial Understanding'
locations_scatter_plot.layout.xaxis.title = 'x (latitude)'
locations_scatter_plot.layout.yaxis.title = 'y (longitude)'
locations_scatter_plot.show()

In [28]:
foodpoints[0]

0    0.166667
1    0.333333
2    0.500000
3    0.666667
4    0.833333
5    1.000000
Name: foodpoint_x, dtype: float64

In [29]:
toilets[0]

0     0.083333
1     0.166667
2     0.250000
3     0.333333
4     0.416667
5     0.500000
6     0.583333
7     0.666667
8     0.750000
9     0.833333
10    0.916667
11    1.000000
Name: toilet_x, dtype: float64

some toilets overlap with foodpoints, so foodpoints aren't visible on the plot

In [34]:
# tracking disease states progress (not tracking 'susceptible_tl' since range is very different)
disease_states = ['exposed_tl', 'presymptomatic_tl', 'symptomatic_tl', 'mild_tl', 'severe_tl', 'recovered_tl', 'qua_susceptible_tl', 'qua_exposed_tl', 'qua_presymptomatic_tl', 'qua_symptomatic_tl', 'qua_mild_tl', 'qua_severe_tl', 'qua_recovered_tl']
disease_state_tracker_plot = go.Figure()

for state in disease_states:
    disease_state_tracker_plot.add_trace(go.Scatter(x=track_states['day'],
                                                    y=track_states[state], 
                                                    name=state,
                                                    mode='lines+markers',
                                                    hovertemplate='%{y}'))
disease_state_tracker_plot.layout.xaxis.title = 'Day'
disease_state_tracker_plot.layout.yaxis.title = 'Population'
disease_state_tracker_plot.layout.hovermode = 'x'
disease_state_tracker_plot.layout.title = 'Disease States Progress'
disease_state_tracker_plot.show()