In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import time
from datetime import datetime
import matplotlib.pyplot as plt
 
plt.style.use(['science','notebook'])
plt.style.reload_library()

In [3]:
from june import World 
from june.demography.geography import Geography
from june.demography import Demography
from june.interaction import Interaction
from june.infection import Infection, HealthIndexGenerator, InfectionSelector
from june.infection.transmission import TransmissionConstant
from june.groups import Hospitals, Schools, Companies, Households, CareHomes, Cemeteries, Universities
from june.groups.leisure import generate_leisure_for_config, Cinemas, Pubs, Groceries
from june.simulator import Simulator
from june.infection_seed import InfectionSeed
from june.policy import Policy, Policies
from june import paths
from june.hdf5_savers import load_geography_from_hdf5
from june.logger.read_logger import ReadLogger
from june.world import generate_world_from_geography
from june.hdf5_savers import generate_world_from_hdf5

No --data argument given - defaulting to:
/home/florpi/JUNE/data
No --configs argument given - defaulting to:
/home/florpi/JUNE/configs


# Initialize world

To initialize a certain world, we need to add the different components we want to have in it. First we specify what super areas (msoa) we want to create. We have included these ones, because they are known to contain hospitals, schools, care homes, and companies.

After creating the geography, we create the different components the worlds need to have such as care homes, companies ...

In [4]:
CONFIG_PATH = paths.configs_path / "config_example.yaml"

In [5]:
%%time 

geography = Geography.from_file(
        {
            "super_area": [
                "E02003282",
                "E02003283",
                "E02001720",
                "E02002560",
                "E02002559",
                "E02004314",
            ]
        }
)

geography.hospitals = Hospitals.for_geography(geography)
geography.schools = Schools.for_geography(geography)
geography.companies = Companies.for_geography(geography)
geography.care_homes = CareHomes.for_geography(geography)
geography.universities = Universities.for_super_areas(geography.super_areas)
world = generate_world_from_geography(geography, include_households=True, include_commute=True)


2020-08-30 17:38:30,592 - june.demography.geography - INFO - There are 162 areas and 6 super_areas in the world.
2020-08-30 17:38:30,602 - june.groups.hospital - INFO - There are 1 hospitals in this geography.
2020-08-30 17:38:30,684 - june.groups.school - INFO - There are 26 schools in this geography.
2020-08-30 17:38:30,724 - june.groups.school - INFO - No school for the age 0 in this world.
2020-08-30 17:38:30,731 - june.groups.school - INFO - No school for the age 1 in this world.
2020-08-30 17:38:30,738 - june.groups.school - INFO - No school for the age 2 in this world.
2020-08-30 17:38:31,047 - june.groups.carehome - INFO - There are 11 care_homes in this geography.
2020-08-30 17:38:41,591 - numexpr.utils - INFO - NumExpr defaulting to 4 threads.
2020-08-30 17:38:46,042 - june.distributors.worker_distributor - INFO - There are 0 who had to be told to stay real
2020-08-30 17:39:08,210 - june.groups.commute.commutehub_distributor - INFO - Distributing people to commute hubs in 11 

## Commute, travel and leisure

In [6]:
world.pubs = Pubs.for_geography(geography)
world.cinemas = Cinemas.for_geography(geography)
world.groceries = Groceries.for_geography(geography)
leisure = generate_leisure_for_config(world, config_filename=CONFIG_PATH)
leisure.distribute_social_venues_to_households(world.households, super_areas=world.super_areas) # this assigns possible social venues to people.

2020-08-30 17:39:09,871 - june.groups.leisure.leisure - INFO - Distributing social venues to households
2020-08-30 17:39:09,872 - june.groups.leisure.leisure - INFO - Distributed in 0 of 20498 households.


We are also going to need some cemeteries...geography.cemeteries = Cemeteries()


In [7]:
world.cemeteries = Cemeteries()

In [8]:
len(world.people)

57259

### If it took a long time to run the previous commands, it might be a good idea to save the world to reuse it later.

In [9]:
world.to_hdf5("world.hdf5")

2020-08-30 17:39:18,899 - june.hdf5_savers.world_saver - INFO - saving world to HDF5
2020-08-30 17:39:19,763 - june.hdf5_savers.world_saver - INFO - saving hospitals...
2020-08-30 17:39:19,767 - june.hdf5_savers.world_saver - INFO - saving schools...
2020-08-30 17:39:19,771 - june.hdf5_savers.world_saver - INFO - saving companies...
2020-08-30 17:39:19,779 - june.hdf5_savers.world_saver - INFO - saving households...
2020-08-30 17:39:20,604 - june.hdf5_savers.world_saver - INFO - saving care homes...
2020-08-30 17:39:20,607 - june.hdf5_savers.world_saver - INFO - saving commute cities...
2020-08-30 17:39:20,612 - june.hdf5_savers.world_saver - INFO - saving commute hubs...
2020-08-30 17:39:20,617 - june.hdf5_savers.world_saver - INFO - saving universities...
2020-08-30 17:39:20,621 - june.hdf5_savers.world_saver - INFO - saving social venues...


If we would like to load the world we saved, we just do

In [10]:
world = generate_world_from_hdf5("world.hdf5")

2020-08-30 17:39:20,669 - june.hdf5_savers.world_saver - INFO - loading world from HDF5
2020-08-30 17:39:20,682 - june.hdf5_savers.world_saver - INFO - loading hospitals...
2020-08-30 17:39:20,686 - june.hdf5_savers.world_saver - INFO - loading schools...
2020-08-30 17:39:20,704 - june.hdf5_savers.world_saver - INFO - loading companies...
2020-08-30 17:39:20,764 - june.hdf5_savers.world_saver - INFO - loading care homes...
2020-08-30 17:39:20,769 - june.hdf5_savers.world_saver - INFO - loading universities...
2020-08-30 17:39:20,774 - june.hdf5_savers.world_saver - INFO - loading commute cities...
2020-08-30 17:39:20,818 - june.hdf5_savers.world_saver - INFO - loading commute hubs...
2020-08-30 17:39:20,966 - june.hdf5_savers.world_saver - INFO - loading households...
2020-08-30 17:39:21,453 - june.hdf5_savers.world_saver - INFO - loading population...
2020-08-30 17:39:22,068 - june.hdf5_savers.world_saver - INFO - loading social venues...
2020-08-30 17:39:22,073 - june.hdf5_savers.wor

In [11]:
# and regenerate leisure in case we load it externally
leisure = generate_leisure_for_config(world, CONFIG_PATH)

you have now a beautiful pre-pandemic world. 

# Adding the infection

The module in charge of infecting people is called the ``InfectionSelector``, which gives people a transmission time profile and a symptoms trajectory based on their age and sex (through the health index generator)

In [12]:
health_index_generator = HealthIndexGenerator.from_file(asymptomatic_ratio=0.2)
selector = InfectionSelector.from_file(
        health_index_generator=health_index_generator,
        transmission_config_path=paths.configs_path / 'defaults/transmission/XNExp.yaml'
)

# Adding the interaction

In [13]:
interaction = Interaction.from_file()

Beta are the intensities of the interaction taking place at the different groups

In [14]:
for key in interaction.beta.keys():
    interaction.beta[key] *= 0.2

In [15]:
interaction.beta

{'box': 0.2,
 'pub': 0.1,
 'grocery': 0.05,
 'cinema': 0.05,
 'commute_unit': 0.1,
 'commute_city_unit': 0.1,
 'hospital': 0.010000000000000002,
 'care_home': 0.24,
 'company': 0.16000000000000003,
 'school': 0.16000000000000003,
 'household': 0.2,
 'university': 0.1}

moreover this interaction module uses contact matrices, that are different for different groups. These contact matrices shouldnt be modified for now. However they are a combination of conversational contact matrices, and physical contact matrices (see the BBC pandemic paper, from where these matrices are extracted https://www.medrxiv.org/content/10.1101/2020.02.16.20023754v2)

There is a parameter, ``alpha`` ($\alpha$), that combines these two matrices in the following way,


$\beta M \left(1 + (\alpha -1) \right) P$

where $\beta$ is the intensity of the interaction, and $P$ the physical contact matrix. A larger $\alpha$ produces more physical contacts. It is an overall number, non dependent of the particular group.


In [16]:
interaction.alpha_physical

2.0

# Seed the disease

There are two options implemented in the seed at the moment, either you specify the number of cases and these are then homogeneously distributed by population to the different areas, or you use UK data on cases per region. For now use the first case.

In [17]:
infection_seed = InfectionSeed(
    world.super_areas, selector,
)

In [18]:
n_cases = 50
infection_seed.unleash_virus(n_cases) # play around with the initial number of cases

# Set policies

In [19]:
policies = Policies.from_file()

In [20]:
policies

<june.policy.policy.Policies at 0x7fc1c309ca60>

# Run the simulation

The simulator is the main module in charge of running the simulation. It coordinates the ``ActivityManager`` which is responsible of allocating people to the right groups given the current timestep, it updates the health status of the population, and it runs the interaction over the different groups. All of these modules can be modified by policies at any given time.

Since the timer configuration is a bit cumbersome, it is read from the config file at ``configs/config_example.yaml``

In [25]:
simulator = Simulator.from_file(
    world=world,
    infection_selector=selector,
    interaction=interaction, 
    config_filename = CONFIG_PATH,
    leisure = leisure,
    policies = policies
)

In [None]:
%%time
simulator.run()

2020-08-30 17:40:14,286 - june.simulator - INFO - Starting group_dynamics for 40 days at day 0
2020-08-30 17:40:14,288 - june.simulator - INFO - starting the loop ..., at 0 days, to run for 40 days
2020-08-30 17:40:15,814 - june.simulator - INFO - Date = 2020-03-01 00:00:00, number of deaths =  0, number of infected = 50
2020-08-30 17:40:16,622 - june.simulator - INFO - Date = 2020-03-01 04:00:00, number of deaths =  0, number of infected = 50
2020-08-30 17:40:17,335 - june.simulator - INFO - Date = 2020-03-01 08:00:00, number of deaths =  0, number of infected = 50
2020-08-30 17:40:17,598 - june.simulator - INFO - Date = 2020-03-01 12:00:00, number of deaths =  0, number of infected = 50
2020-08-30 17:40:17,886 - june.simulator - INFO - Date = 2020-03-02 00:00:00, number of deaths =  0, number of infected = 50
2020-08-30 17:40:18,245 - june.simulator - INFO - Date = 2020-03-02 01:00:00, number of deaths =  0, number of infected = 50
2020-08-30 17:40:18,507 - june.simulator - INFO - Da

2020-08-30 17:40:47,540 - june.simulator - INFO - Date = 2020-03-14 08:00:00, number of deaths =  1, number of infected = 461
2020-08-30 17:40:47,822 - june.simulator - INFO - Date = 2020-03-14 12:00:00, number of deaths =  1, number of infected = 467
2020-08-30 17:40:48,395 - june.simulator - INFO - Date = 2020-03-15 00:00:00, number of deaths =  1, number of infected = 485
2020-08-30 17:40:48,980 - june.simulator - INFO - Date = 2020-03-15 04:00:00, number of deaths =  1, number of infected = 495
2020-08-30 17:40:49,567 - june.simulator - INFO - Date = 2020-03-15 08:00:00, number of deaths =  1, number of infected = 500
2020-08-30 17:40:49,980 - june.simulator - INFO - Date = 2020-03-15 12:00:00, number of deaths =  1, number of infected = 506
2020-08-30 17:40:50,694 - june.simulator - INFO - Date = 2020-03-16 00:00:00, number of deaths =  1, number of infected = 532
2020-08-30 17:40:51,311 - june.simulator - INFO - Date = 2020-03-16 01:00:00, number of deaths =  1, number of infecte

2020-08-30 17:41:36,192 - june.simulator - INFO - Date = 2020-03-28 04:00:00, number of deaths =  1, number of infected = 1706
2020-08-30 17:41:37,071 - june.simulator - INFO - Date = 2020-03-28 08:00:00, number of deaths =  1, number of infected = 1716
2020-08-30 17:41:37,701 - june.simulator - INFO - Date = 2020-03-28 12:00:00, number of deaths =  1, number of infected = 1728
2020-08-30 17:41:38,591 - june.simulator - INFO - Date = 2020-03-29 00:00:00, number of deaths =  1, number of infected = 1758
2020-08-30 17:41:39,879 - june.simulator - INFO - Date = 2020-03-29 04:00:00, number of deaths =  1, number of infected = 1762
2020-08-30 17:41:41,124 - june.simulator - INFO - Date = 2020-03-29 08:00:00, number of deaths =  1, number of infected = 1772
2020-08-30 17:41:42,022 - june.simulator - INFO - Date = 2020-03-29 12:00:00, number of deaths =  1, number of infected = 1780
2020-08-30 17:41:42,928 - june.simulator - INFO - Date = 2020-03-30 00:00:00, number of deaths =  1, number of 

While the simulation runs (and afterwards) we can launch the visualization webpage by running
```python june/visualizer.py path/to/results``` 

# Getting the results

All results are stored in a json file specified in the ``save_path`` variable in the config file. We can also access it from ``world.logger`` directly.

In [23]:
import pandas as pd

In [24]:
read = ReadLogger()

KeyError: "Unable to open object (object 'locations' doesn't exist)"

## Hospital data and how it changed over time

In [None]:
hospitals_df = read.load_hospital_capacity()

In [None]:
hospitals_df.head(3)

In [None]:
hospitals_characteristics_df = read.load_hospital_characteristics()

In [None]:
hospitals_characteristics_df

## where did infections happen?

In [None]:
infection_locations = read.get_locations_infections()

In [None]:
unique_locations, counts_locations = np.unique(
                np.array(infection_locations),
                return_counts=True)

location_counts_df = pd.DataFrame(
    {'locations': unique_locations,
    'counts': counts_locations}
)
location_counts_df.set_index('locations', inplace=True)

In [None]:
location_counts_df['percent_infections']= 100*(location_counts_df['counts'])/location_counts_df.values.sum()

In [None]:
import matplotlib.ticker as mtick

ax = location_counts_df['percent_infections'].sort_values().plot.bar()
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.ylabel('Percentage of infections at location')
plt.xlabel('location')


In [None]:
# Find outbreaks

In [None]:
unique_id, count_id =  np.unique(
        np.array(read.locations_df['location_id'].sum()),
        return_counts=True
)

In [None]:
unique_id[np.argsort(count_id)[-5:]]

In [None]:
count_id[np.argsort(count_id)[-5:]]

In [None]:
weekly_infections=read.locations_df.resample('W').sum()

In [None]:
for location in unique_locations:
    weekly_infections[str(location)] = weekly_infections.apply(
        lambda x: x.location.count(str(location)), axis=1
    )

In [None]:
weekly_infections.iloc[:,3:].plot()

In [None]:
for location in unique_locations:
    read.locations_df[str(location)] = read.locations_df.apply(
        lambda x: x.location.count(str(location)), axis=1
    )

In [None]:
read.locations_df.index = pd.to_datetime(read.locations_df.index)


In [None]:
read.locations_df.iloc[:,3:].plot()

## rate of infection

In [None]:
r_df = read.get_r()

In [None]:
from june.policy import SocialDistancing
r_df.plot()
colors = ["yellow", "orange", 'blue']
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(),
                alpha=0.2)
plt.axhline(y=1, linestyle='dashed', color='gray')
plt.xlabel('Date')
plt.ylim(0,3)
plt.ylabel('R')

## World infection curves, and by super area

In [None]:
%%time
world_df = read.world_summary()

In [None]:
fig, ax = plt.subplots()
world_df['daily_infections'].resample('D').sum().plot(ax=ax)
for policy in policies.policies:
    plt.axvspan(policy.start_time, 
                policy.end_time,
                alpha=0.01)
plt.axvline('2020-03-23',
           linestyle='dashed',
           color='indianred')
ax.set_ylabel('# daily infections')
ax.set_xlabel('Date')

In [None]:
fig, ax = plt.subplots()
world_df['daily_hospital_admissions'].resample('D').sum().plot(ax=ax)

plt.axvline('2020-04-01',
           linestyle='dashed',
           color='indianred')
ax.set_ylabel('# daily hospital admissions')
ax.set_xlabel('Date')

In [None]:
fig, ax = plt.subplots()
world_df['current_infected'].plot(ax=ax)

ax.set_ylabel('# Infected')
ax.set_xlabel('Date')

In [None]:
world_df.plot()

plt.legend(bbox_to_anchor=(1,1))


In [None]:
read = ReadLogger()

In [None]:
%%time

area_df = read.super_area_summary()

In [None]:
area_df[area_df.super_area == "E02003282"].plot()
plt.legend(bbox_to_anchor=(1,1))

## World infection curves per age group

In [None]:
ages_df = read.age_summary([0,10,20,30,40,
                  50,60,70,80,90,100])

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['current_infected'].plot(label=name)
colors = ["yellow", "orange","blue"]
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(), alpha=0.2)
plt.legend(bbox_to_anchor=(1,1))


## Draw some of the symptoms trajectories

In [None]:
random_trajectories = read.draw_symptom_trajectories(window_length=600,
                                        n_people=10)

In [None]:
from june.infection import SymptomTag

In [None]:
symptoms_values = [tag.value for tag in SymptomTag]
symptoms_names = [tag.name for tag in SymptomTag]


In [None]:
for df_person in random_trajectories:
    df_person['symptoms'].plot()
plt.ylabel('Symptoms Trajectory')
_ = plt.yticks(symptoms_values, symptoms_names)
plt.xlabel('Date')

In [None]:
for df_person in random_trajectories:
    df_person['n_secondary_infections'].plot()
plt.ylabel('Number of secondary infections')

plt.xlabel('Date')