In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import time
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('notebook')

In [3]:
from june import World 
from june.demography.geography import Geography
from june.demography import Demography
from june.interaction import Interaction
from june.infection import Infection, HealthIndexGenerator, InfectionSelector
from june.infection.transmission import TransmissionConstant
from june.groups import Hospitals, Schools, Companies, Households, CareHomes, Cemeteries, Universities
from june.groups.leisure import generate_leisure_for_config, Cinemas, Pubs, Groceries
from june.simulator import Simulator
from june.infection_seed import InfectionSeed
from june.policy import Policy, Policies
from june import paths
from june.hdf5_savers import load_geography_from_hdf5
from june.logger.read_logger import ReadLogger
from june.world import generate_world_from_geography
from june.hdf5_savers import generate_world_from_hdf5

No --data argument given - defaulting to:
/home/arnau/code/JUNE/data
No --configs argument given - defaulting to:
/home/arnau/code/JUNE/configs


# Initialize world

To initialize a certain world, we need to add the different components we want to have in it. First we specify what super areas (msoa) we want to create. We have included these ones, because they are known to contain hospitals, schools, care homes, and companies.

After creating the geography, we create the different components the worlds need to have such as care homes, companies ...

In [4]:
#CONFIG_PATH = paths.configs_path / "config_example.yaml"
CONFIG_PATH = "../scripts/config_nocommute.yaml"

In [5]:
%%time 

geography = Geography.from_file(
        {
            "super_area": [
                "E02003282",
                "E02003283",
                "E02001720",
                "E02002560",
                "E02002559",
                "E02004314",
            ]
        }
)

geography.hospitals = Hospitals.for_geography(geography)
geography.schools = Schools.for_geography(geography)
geography.companies = Companies.for_geography(geography)
geography.care_homes = CareHomes.for_geography(geography)
geography.universities = Universities.for_super_areas(geography.super_areas)
world = generate_world_from_geography(geography, include_households=True, include_commute=False)


2020-08-31 16:04:58,458 - june.demography.geography - INFO - There are 162 areas and 6 super_areas in the world.
2020-08-31 16:04:58,468 - june.groups.hospital - INFO - There are 1 hospitals in this geography.
2020-08-31 16:04:58,547 - june.groups.school - INFO - There are 26 schools in this geography.
2020-08-31 16:04:58,595 - june.groups.school - INFO - No school for the age 0 in this world.
2020-08-31 16:04:58,599 - june.groups.school - INFO - No school for the age 1 in this world.
2020-08-31 16:04:58,603 - june.groups.school - INFO - No school for the age 2 in this world.
2020-08-31 16:04:59,017 - june.groups.carehome - INFO - There are 11 care_homes in this geography.
2020-08-31 16:05:17,798 - june.distributors.worker_distributor - INFO - There are 0 who had to be told to stay real
CPU times: user 24.2 s, sys: 811 ms, total: 25 s
Wall time: 26.2 s


## Commute, travel and leisure

In [6]:
world.pubs = Pubs.for_geography(geography)
world.cinemas = Cinemas.for_geography(geography)
world.groceries = Groceries.for_geography(geography)
leisure = generate_leisure_for_config(world, config_filename=CONFIG_PATH)
leisure.distribute_social_venues_to_areas(areas = world.areas, super_areas=world.super_areas)

2020-08-31 16:05:24,315 - june.groups.leisure.leisure - INFO - Linking households for visits
2020-08-31 16:05:24,885 - june.groups.leisure.leisure - INFO - Linking households with care homes for visits
2020-08-31 16:05:25,016 - june.groups.leisure.leisure - INFO - Distributing social venues to areas
2020-08-31 16:05:25,018 - june.groups.leisure.leisure - INFO - Distributed in 0 of 162 areas.
2020-08-31 16:05:25,259 - june.groups.leisure.leisure - INFO - Distributed in 162 of 162 areas.


We are also going to need some cemeteries...geography.cemeteries = Cemeteries()


In [7]:
world.cemeteries = Cemeteries()

In [8]:
len(world.people)

57259

### If it took a long time to run the previous commands, it might be a good idea to save the world to reuse it later.

In [9]:
#world.to_hdf5("world.hdf5")

If we would like to load the world we saved, we just do

In [10]:
#world = generate_world_from_hdf5("world.hdf5")

In [11]:
# and regenerate leisure in case we load it externally
leisure = generate_leisure_for_config(world, CONFIG_PATH)

you have now a beautiful pre-pandemic world. 

# Adding the infection

The module in charge of infecting people is called the ``InfectionSelector``, which gives people a transmission time profile and a symptoms trajectory based on their age and sex (through the health index generator)

In [12]:
health_index_generator = HealthIndexGenerator.from_file(asymptomatic_ratio=0.2)
selector = InfectionSelector.from_file(
        health_index_generator=health_index_generator,
        transmission_config_path=paths.configs_path / 'defaults/transmission/XNExp.yaml'
)

# Adding the interaction

In [13]:
interaction = Interaction.from_file()

Beta are the intensities of the interaction taking place at the different groups

In [14]:
for key in interaction.beta.keys():
    interaction.beta[key] *= 1.85

In [15]:
interaction.beta

{'box': 1.85,
 'pub': 0.18500000000000003,
 'grocery': 0.18500000000000003,
 'cinema': 0.18500000000000003,
 'commute_unit': 1.85,
 'commute_city_unit': 1.85,
 'hospital': 1.85,
 'care_home': 1.85,
 'company': 0.18500000000000003,
 'school': 0.18500000000000003,
 'household': 0.37000000000000005,
 'university': 0.018500000000000003}

moreover this interaction module uses contact matrices, that are different for different groups. These contact matrices shouldnt be modified for now. However they are a combination of conversational contact matrices, and physical contact matrices (see the BBC pandemic paper, from where these matrices are extracted https://www.medrxiv.org/content/10.1101/2020.02.16.20023754v2)

There is a parameter, ``alpha`` ($\alpha$), that combines these two matrices in the following way,


$\beta M \left(1 + (\alpha -1) \right) P$

where $\beta$ is the intensity of the interaction, and $P$ the physical contact matrix. A larger $\alpha$ produces more physical contacts. It is an overall number, non dependent of the particular group.


In [16]:
interaction.alpha_physical

2.0

# Seed the disease

There are two options implemented in the seed at the moment, either you specify the number of cases and these are then homogeneously distributed by population to the different areas, or you use UK data on cases per region. For now use the first case.

In [17]:
infection_seed = InfectionSeed(
    world.super_areas, selector,
)

In [18]:
n_cases = 30
infection_seed.unleash_virus(n_cases) # play around with the initial number of cases

# Set policies

In [19]:
policies = Policies.from_file()

In [20]:
policies

<june.policy.policy.Policies at 0x7fc7c5e57c10>

# Run the simulation

The simulator is the main module in charge of running the simulation. It coordinates the ``ActivityManager`` which is responsible of allocating people to the right groups given the current timestep, it updates the health status of the population, and it runs the interaction over the different groups. All of these modules can be modified by policies at any given time.

Since the timer configuration is a bit cumbersome, it is read from the config file at ``configs/config_example.yaml``

In [21]:
simulator = Simulator.from_file(
    world=world,
    infection_selector=selector,
    interaction=interaction, 
    config_filename = CONFIG_PATH,
    leisure = leisure,
    policies = policies
)

In [22]:
%%time
simulator.run()

2020-08-31 16:05:27,093 - june.simulator - INFO - Starting group_dynamics for 50 days at day 0
2020-08-31 16:05:27,094 - june.simulator - INFO - starting the loop ..., at 0 days, to run for 50 days
2020-08-31 16:05:29,382 - june.simulator - INFO - Date = 2020-03-01 00:00:00, number of deaths =  0, number of infected = 30


SimulatorError: Number of people active 74083 does not match the total people number 57259.
People in the world 57259
People going abroad 0
People coming from abroad 0
Current rank 0


In [23]:
len(world.people)

57259

In [38]:
grocery.max_size

200

In [72]:
asd = 0
diff = 0
for grocery in world.hospitals:
    for person in grocery.people:
        for subgroup in person.subgroups.iter():
            if subgroup is not None and subgroup.group is not grocery:
                if person in subgroup.people:
                    asd=1
                    break
#    people_ids = [person.id for person in grocery.people]
#    if len(set(people_ids)) != len(people_ids) or grocery.size > grocery.max_size:
#        diff += grocery.max_size - grocery.size
#        asd=1
#        break

In [75]:
len(world.people)

57259

In [76]:
groups = ["households", "care_homes", "companies", "pubs", "groceries", "cinemas", "universities", "hospitals", "schools"]

In [81]:
total_size = 0
for gname in groups:
    for group in getattr(world, gname):
        total_size += len(group.people)

In [82]:
total_size

57259

While the simulation runs (and afterwards) we can launch the visualization webpage by running
```python june/visualizer.py path/to/results``` 

# Getting the results

All results are stored in a json file specified in the ``save_path`` variable in the config file. We can also access it from ``world.logger`` directly.

In [26]:
import pandas as pd

In [27]:
read = ReadLogger()

KeyError: "Unable to open object (object 'locations' doesn't exist)"

## Hospital data and how it changed over time

In [None]:
hospitals_df = read.load_hospital_capacity()

In [None]:
hospitals_df.head(3)

In [None]:
hospitals_characteristics_df = read.load_hospital_characteristics()

In [None]:
hospitals_characteristics_df

## where did infections happen?

In [None]:
loc_df = read.get_locations_infections()

In [None]:
unique_super_areas = np.unique(read.super_areas)

In [None]:
read.locations_df["super_areas"] = read.locations_df.apply(    
    lambda x: read.super_areas[x.new_infected_ids], axis=1    
)

In [None]:
read.locations_df

In [None]:
read.locations_df.resample('D').sum()

In [None]:
loc_df

In [None]:
locations_per_day = read.locations_df.groupby(pd.Grouper(freq='D')).sum()

In [None]:
all_infection_places = set(locations_per_day.location.sum())

In [None]:
def n_infections(row, infection_place):
    return sum([row.counts[i] for i, x in enumerate(row.location) if x == infection_place])


In [None]:
for infection_place in all_infection_places:
    locations_per_day[str(infection_place)] = locations_per_day.apply(
        lambda x: n_infections(x, infection_place),
        axis=1
    )

In [None]:
locations_per_day = locations_per_day.drop(columns=['location',
                                                    'counts'])

In [None]:
locations_per_day = locations_per_day.div(
    locations_per_day.sum(axis=1), axis=0
)


In [None]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 100

In [None]:
locations_per_day.plot.area( alpha=0.5)
plt.legend(bbox_to_anchor=(1,1))
plt.ylabel('Percent of infections')

In [None]:
from matplotlib import cm
cmap = cm.get_cmap('Spectral') # Colour map (there are many others)

locations_per_day.plot(figsize=(30,8), logy=True, ylim=(1e-3, 1), cmap=cmap)

In [None]:
import matplotlib.ticker as mtick

ax = loc_df['percentage_infections'].sort_values().plot.bar()
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.ylabel('Percentage of infections at location')
plt.xlabel('location')
#plt.yscale('log')

## rate of infection

In [None]:
r_df = read.get_r()

In [None]:
from june.policy import SocialDistancing
r_df.plot()
colors = ["yellow", "orange", 'blue']
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(),
                alpha=0.2)
plt.axhline(y=1, linestyle='dashed', color='gray')
plt.xlabel('Date')
plt.ylim(0,3)
plt.ylabel('R')

## World infection curves, and by super area

In [None]:
world_df = read.world_summary()

In [None]:
world_df['new_infections'].sum()/len(world.people)

In [None]:
len(world.people)

In [None]:
fig, ax = plt.subplots()
world_df['new_infections'].resample('D').sum().plot(ax=ax)
for policy in policies.policies:
    plt.axvspan(policy.start_time, 
                policy.end_time,
                alpha=0.01)
plt.axvline('2020-03-23',
           linestyle='dashed',
           color='indianred')
ax.set_ylabel('# new infections')
ax.set_xlabel('Date')

In [None]:
fig, ax = plt.subplots()
(world_df['hospital_admissions']+world_df['intensive_care_admissions']).resample('D').sum().plot(ax=ax)

plt.axvline('2020-04-01',
           linestyle='dashed',
           color='indianred')
ax.set_ylabel('# hospital admissions')
ax.set_xlabel('Date')

In [None]:
fig, ax = plt.subplots()
world_df['infected'].plot(ax=ax)

ax.set_ylabel('# Infected')
ax.set_xlabel('Date')

In [None]:
world_df.plot()

plt.legend(bbox_to_anchor=(1,1))


In [None]:
area_df = read.super_area_summary()

In [None]:
area_df[area_df.super_area == "E02003282"].plot()
plt.legend(bbox_to_anchor=(1,1))

## World infection curves per age group

In [None]:
ages_df = read.age_summary([0,10,20,30,40,
                  50,60,70,80,90,100])

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['infected'].plot(label=name)
colors = ["yellow", "orange"]
for policy in policies.policies:
    if isinstance(policy, SocialDistancing):
        plt.axvspan(policy.start_time, 
                policy.end_time,
                facecolor=colors.pop(), alpha=0.2)
plt.legend(bbox_to_anchor=(1,1))


In [None]:
for name, group in ages_df.groupby('age_range'):
    group['dead'].cumsum().plot(label=name)
plt.legend()

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['susceptible'].plot(label=name)
plt.legend()

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['hospital_admissions'].cumsum().plot(label=name)
plt.legend()

In [None]:
for name, group in ages_df.groupby('age_range'):
    group['intensive_care_admissions'].cumsum().plot(label=name)
plt.legend()

## Draw some of the symptoms trajectories

In [None]:
random_trajectories = read.draw_symptom_trajectories(window_length=600,
                                        n_people=10)

In [None]:
from june.infection import SymptomTag

In [None]:
symptoms_values = [tag.value for tag in SymptomTag]
symptoms_names = [tag.name for tag in SymptomTag]


In [None]:
for df_person in random_trajectories:
    df_person['symptoms'].plot()
plt.ylabel('Symptoms Trajectory')
_ = plt.yticks(symptoms_values, symptoms_names)
plt.xlabel('Date')

In [None]:
for df_person in random_trajectories:
    df_person['n_secondary_infections'].plot()
plt.ylabel('Number of secondary infections')

plt.xlabel('Date')

In [None]:
person.primary_activity.group.spec