In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import time
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('notebook')

In [3]:
from june import World 
from june.demography.geography import Geography
from june.demography import Demography
from june.interaction import Interaction
from june.infection import Infection, HealthIndexGenerator, InfectionSelector
from june.infection.transmission import TransmissionConstant
from june.groups import Hospitals, Schools, Companies, Households, CareHomes, Cemeteries, Universities
from june.groups.leisure import generate_leisure_for_config, Cinemas, Pubs, Groceries
from june.simulator import Simulator
from june.infection_seed import InfectionSeed
from june.policy import Policy, Policies
from june import paths
from june.hdf5_savers import load_geography_from_hdf5
from june.logger.read_logger import ReadLogger
from june.world import generate_world_from_geography
from june.hdf5_savers import generate_world_from_hdf5

No --data argument given - defaulting to:
/home/arnau/code/JUNE/data
No --configs argument given - defaulting to:
/home/arnau/code/JUNE/configs


# Initialize world

To initialize a certain world, we need to add the different components we want to have in it. First we specify what super areas (msoa) we want to create. We have included these ones, because they are known to contain hospitals, schools, care homes, and companies.

After creating the geography, we create the different components the worlds need to have such as care homes, companies ...

In [4]:
CONFIG_PATH = paths.configs_path / "config_example.yaml"

In [5]:
%%time 

geography = Geography.from_file(
        {
            "super_area": [
                "E02003282",
                "E02003283",
                "E02001720",
                "E02002560",
                "E02002559",
                "E02004314",
            ]
        }
)

geography.hospitals = Hospitals.for_geography(geography)
geography.schools = Schools.for_geography(geography)
geography.companies = Companies.for_geography(geography)
geography.care_homes = CareHomes.for_geography(geography)
geography.universities = Universities.for_super_areas(geography.super_areas)
world = generate_world_from_geography(geography, include_households=True, include_commute=True)

2020-08-28 15:42:04,089 - june.demography.geography - INFO - There are 162 areas and 6 super_areas in the world.
2020-08-28 15:42:04,093 - june.groups.hospital - INFO - There are 1 hospitals in this geography.
2020-08-28 15:42:04,133 - june.groups.school - INFO - There are 26 schools in this geography.
2020-08-28 15:42:04,160 - june.groups.school - INFO - No school for the age 0 in this world.
2020-08-28 15:42:04,163 - june.groups.school - INFO - No school for the age 1 in this world.
2020-08-28 15:42:04,166 - june.groups.school - INFO - No school for the age 2 in this world.
2020-08-28 15:42:04,380 - june.groups.carehome - INFO - There are 11 care_homes in this geography.
2020-08-28 15:42:16,674 - june.distributors.worker_distributor - INFO - There are 0 who had to be told to stay real
2020-08-28 15:42:35,143 - june.groups.commute.commutehub_distributor - INFO - Distributing people to commute hubs in 11 commute cities.
2020-08-28 15:42:35,144 - june.groups.commute.commutehub_distribut

## Commute, travel and leisure

In [6]:
world.pubs = Pubs.for_geography(geography)
world.cinemas = Cinemas.for_geography(geography)
world.groceries = Groceries.for_geography(geography)
leisure = generate_leisure_for_config(world, config_filename=CONFIG_PATH)
leisure.distribute_social_venues_to_households(world.households, super_areas=world.super_areas) # this assigns possible social venues to people.

2020-08-28 15:42:36,352 - june.groups.leisure.leisure - INFO - Distributing social venues to households
2020-08-28 15:42:36,353 - june.groups.leisure.leisure - INFO - Distributed in 0 of 20498 households.


We are also going to need some cemeteries...


In [7]:
world.cemeteries = Cemeteries()

In [8]:
len(world.people)

57259

### If it took a long time to run the previous commands, it might be a good idea to save the world to reuse it later.

In [9]:
world.to_hdf5("world.hdf5")

2020-08-28 15:42:43,670 - june.hdf5_savers.world_saver - INFO - saving world to HDF5
2020-08-28 15:42:44,535 - june.hdf5_savers.world_saver - INFO - saving hospitals...
2020-08-28 15:42:44,537 - june.hdf5_savers.world_saver - INFO - saving schools...
2020-08-28 15:42:44,543 - june.hdf5_savers.world_saver - INFO - saving companies...
2020-08-28 15:42:44,551 - june.hdf5_savers.world_saver - INFO - saving households...
2020-08-28 15:42:45,115 - june.hdf5_savers.world_saver - INFO - saving care homes...
2020-08-28 15:42:45,118 - june.hdf5_savers.world_saver - INFO - saving commute cities...
2020-08-28 15:42:45,122 - june.hdf5_savers.world_saver - INFO - saving commute hubs...
2020-08-28 15:42:45,125 - june.hdf5_savers.world_saver - INFO - saving universities...
2020-08-28 15:42:45,128 - june.hdf5_savers.world_saver - INFO - saving social venues...


If we would like to load the world we saved, we just do

In [10]:
world = generate_world_from_hdf5("world.hdf5")

2020-08-28 15:42:45,161 - june.hdf5_savers.world_saver - INFO - loading world from HDF5
2020-08-28 15:42:45,167 - june.hdf5_savers.world_saver - INFO - loading hospitals...
2020-08-28 15:42:45,170 - june.hdf5_savers.world_saver - INFO - loading schools...
2020-08-28 15:42:45,177 - june.hdf5_savers.world_saver - INFO - loading companies...
2020-08-28 15:42:45,206 - june.hdf5_savers.world_saver - INFO - loading care homes...
2020-08-28 15:42:45,210 - june.hdf5_savers.world_saver - INFO - loading universities...
2020-08-28 15:42:45,214 - june.hdf5_savers.world_saver - INFO - loading commute cities...
2020-08-28 15:42:45,235 - june.hdf5_savers.world_saver - INFO - loading commute hubs...
2020-08-28 15:42:45,332 - june.hdf5_savers.world_saver - INFO - loading households...
2020-08-28 15:42:45,745 - june.hdf5_savers.world_saver - INFO - loading population...
2020-08-28 15:42:46,093 - june.hdf5_savers.world_saver - INFO - loading social venues...
2020-08-28 15:42:46,101 - june.hdf5_savers.wor

In [11]:
# and regenerate leisure in case we load it externally
leisure = generate_leisure_for_config(world, CONFIG_PATH)

you have now a beautiful pre-pandemic world. 

# Adding the infection

The module in charge of infecting people is called the ``InfectionSelector``, which gives people a transmission time profile and a symptoms trajectory based on their age and sex (through the health index generator)

In [12]:
health_index_generator = HealthIndexGenerator.from_file(asymptomatic_ratio=0.2)
selector = InfectionSelector.from_file(
        health_index_generator=health_index_generator,
        transmission_config_path=paths.configs_path / 'defaults/transmission/XNExp.yaml'
)

In [13]:
selector.health_index_generator.asymptomatic_ratio

0.2

# Adding the interaction

In [14]:
interaction = Interaction.from_file()

Beta are the intensities of the interaction taking place at the different groups

In [15]:
interaction.beta

{'box': 1,
 'pub': 0.1,
 'grocery': 0.1,
 'cinema': 0.1,
 'commute_unit': 1.0,
 'commute_city_unit': 1.0,
 'hospital': 1.0,
 'care_home': 1.0,
 'company': 0.1,
 'school': 0.1,
 'household': 0.2,
 'university': 0.01}

to modify these, simply do

In [16]:
for key in interaction.beta.keys():
    interaction.beta[key] *= 1.1

In [17]:
interaction.beta

{'box': 1.1,
 'pub': 0.11000000000000001,
 'grocery': 0.11000000000000001,
 'cinema': 0.11000000000000001,
 'commute_unit': 1.1,
 'commute_city_unit': 1.1,
 'hospital': 1.1,
 'care_home': 1.1,
 'company': 0.11000000000000001,
 'school': 0.11000000000000001,
 'household': 0.22000000000000003,
 'university': 0.011000000000000001}

moreover this interaction module uses contact matrices, that are different for different groups. These contact matrices shouldnt be modified for now. However they are a combination of conversational contact matrices, and physical contact matrices (see the BBC pandemic paper, from where these matrices are extracted https://www.medrxiv.org/content/10.1101/2020.02.16.20023754v2)

There is a parameter, ``alpha`` ($\alpha$), that combines these two matrices in the following way,


$\beta M \left(1 + (\alpha -1) \right) P$

where $\beta$ is the intensity of the interaction, and $P$ the physical contact matrix. A larger $\alpha$ produces more physical contacts. It is an overall number, non dependent of the particular group.


In [18]:
interaction.alpha_physical

2.0

# Seed the disease

There are two options implemented in the seed at the moment, either you specify the number of cases and these are then homogeneously distributed by population to the different areas, or you use UK data on cases per region. For now use the first case.

In [19]:
infection_seed = InfectionSeed(
    world.super_areas, selector,
)

In [20]:
n_cases = 5
infection_seed.unleash_virus(n_cases) # play around with the initial number of cases

# Set policies

In [21]:
policies = Policies.from_file()

In [22]:
policies

<june.policy.policy.Policies at 0x7f4759c56ca0>

# Run the simulation

The simulator is the main module in charge of running the simulation. It coordinates the ``ActivityManager`` which is responsible of allocating people to the right groups given the current timestep, it updates the health status of the population, and it runs the interaction over the different groups. All of these modules can be modified by policies at any given time.

Since the timer configuration is a bit cumbersome, it is read from the config file at ``configs/config_example.yaml``

In [23]:
simulator = Simulator.from_file(
    world=world,
    infection_selector=selector,
    interaction=interaction, 
    config_filename = CONFIG_PATH,
    leisure = leisure,
    policies = policies
)

In [None]:
%%time
simulator.run()

2020-08-28 15:42:55,883 - june.simulator - INFO - Starting group_dynamics for 210 days at day 0
2020-08-28 15:42:55,884 - june.simulator - INFO - starting the loop ..., at 0 days, to run for 210 days
2020-08-28 15:42:57,114 - june.simulator - INFO - Date = 2020-03-01 00:00:00, number of deaths =  0, number of infected = 5
2020-08-28 15:42:57,778 - june.simulator - INFO - Date = 2020-03-01 04:00:00, number of deaths =  0, number of infected = 5
2020-08-28 15:42:58,580 - june.simulator - INFO - Date = 2020-03-01 08:00:00, number of deaths =  0, number of infected = 5
2020-08-28 15:42:58,899 - june.simulator - INFO - Date = 2020-03-01 12:00:00, number of deaths =  0, number of infected = 5
2020-08-28 15:42:59,212 - june.simulator - INFO - Date = 2020-03-02 00:00:00, number of deaths =  0, number of infected = 5
2020-08-28 15:42:59,628 - june.simulator - INFO - Date = 2020-03-02 01:00:00, number of deaths =  0, number of infected = 5
2020-08-28 15:42:59,886 - june.simulator - INFO - Date =

While the simulation runs (and afterwards) we can launch the visualization webpage by running
```python june/visualizer.py path/to/results``` 

# Getting the results

All results are stored in a json file specified in the ``save_path`` variable in the config file. We can also access it from ``world.logger`` directly.

In [None]:
import pandas as pd

In [None]:
read = ReadLogger()

## Hospital data and how it changed over time

In [None]:
hospitals_df = read.load_hospital_capacity()

In [None]:
hospitals_df.head(3)

In [None]:
hospitals_characteristics_df = read.load_hospital_characteristics()

In [None]:
hospitals_characteristics_df

## where did infections happen?

In [None]:
loc_df = read.get_locations_infections()

In [None]:
locations_per_day = read.locations_df.groupby(pd.Grouper(freq='D')).sum()

In [None]:
all_infection_places = set(locations_per_day.location.sum())

In [None]:
def n_infections(row, infection_place):
    return sum([row.counts[i] for i, x in enumerate(row.location) if x == infection_place])


In [None]:
for infection_place in all_infection_places:
    locations_per_day[str(infection_place)] = locations_per_day.apply(
        lambda x: n_infections(x, infection_place),
        axis=1
    )

In [None]:
locations_per_day = locations_per_day.drop(columns=['location',
                                                    'counts'])

In [None]:
locations_per_day = locations_per_day.div(
    locations_per_day.sum(axis=1), axis=0
)


In [None]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 100

In [None]:
locations_per_day.plot.area( alpha=0.5)
plt.legend(bbox_to_anchor=(1,1))
plt.ylabel('Percent of infections')

In [None]:
world.

In [None]:
from matplotlib import cm
cmap = cm.get_cmap('Spectral') # Colour map (there are many others)

locations_per_day.plot(figsize=(30,8), logy=True, ylim=(1e-3, 1), cmap=cmap)

In [None]:
import matplotlib.ticker as mtick

ax = loc_df['percentage_infections'].sort_values().plot.bar()
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.ylabel('Percentage of infections at location')
plt.xlabel('location')
#plt.yscale('log')

## rate of infection

In [None]:
r_df = read.get_r()

In [None]:
from june.policy import SocialDistancing
r_df.plot()
colors = ["yellow", "orange"]
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(), alpha=0.2)
plt.axhline(y=1, linestyle='dashed', color='gray')
plt.xlabel('Date')
plt.ylim(0,3)
plt.ylabel('R')

## World infection curves, and by super area

In [None]:
world_df = read.world_summary()

In [None]:
fig, ax = plt.subplots()
world_df['new_infections'].resample('D').sum().plot(ax=ax)
colors = ["yellow", "orange"]
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(), alpha=0.2)
plt.axvline('2020-03-23',
           linestyle='dashed',
           color='indianred')
ax.set_ylabel('# new infections')
ax.set_xlabel('Date')

In [None]:
fig, ax = plt.subplots()
(world_df['hospital_admissions']+world_df['intensive_care_admissions']).resample('D').sum().plot(ax=ax)
colors = ["yellow", "orange"]
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(), alpha=0.2)
plt.axvline('2020-04-01',
           linestyle='dashed',
           color='indianred')
ax.set_ylabel('# hospital admissions')
ax.set_xlabel('Date')

In [None]:
fig, ax = plt.subplots()
world_df['infected'].plot(ax=ax)
colors = ["yellow", "orange"]
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(), alpha=0.2)
ax.set_ylabel('# Infected')
ax.set_xlabel('Date')

In [None]:
world_df.plot()
colors = ["yellow", "orange"]
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(), alpha=0.2)
plt.legend(bbox_to_anchor=(1,1))


In [None]:
area_df = read.super_area_summary()

In [None]:
area_df[area_df.super_area == "E02003282"].plot()
plt.legend(bbox_to_anchor=(1,1))

## World infection curves per age group

In [None]:
ages_df = read.age_summary([0,10,20,30,40,
                  50,60,70,80,90,100])

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['infected'].plot(label=name)
colors = ["yellow", "orange"]
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(), alpha=0.2)
plt.legend(bbox_to_anchor=(1,1))


In [None]:
for name, group in ages_df.groupby('age_range'):
    group['dead'].cumsum().plot(label=name)
plt.legend()

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['susceptible'].plot(label=name)
plt.legend()

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['hospital_admissions'].cumsum().plot(label=name)
plt.legend()

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['intensive_care_admissions'].cumsum().plot(label=name)
plt.legend()

## Draw some of the symptoms trajectories

In [None]:
random_trajectories = read.draw_symptom_trajectories(window_length=600,
                                        n_people=10)

In [None]:
from june.infection import SymptomTag

In [None]:
symptoms_values = [tag.value for tag in SymptomTag]
symptoms_names = [tag.name for tag in SymptomTag]


In [None]:
for df_person in random_trajectories:
    df_person['symptoms'].plot()
plt.ylabel('Symptoms Trajectory')
_ = plt.yticks(symptoms_values, symptoms_names)
plt.xlabel('Date')

In [None]:
for df_person in random_trajectories:
    df_person['n_secondary_infections'].plot()
plt.ylabel('Number of secondary infections')

plt.xlabel('Date')

In [None]:
person.primary_activity.group.spec