In [1]:
%pylab inline
import os
import sys
import glob

import pandas as pd
import datetime

from selectfile import FileBrowser
import ipywidgets
from ipywidgets import interact, interactive, fixed, interact_manual,\
                       FloatSlider, FloatRangeSlider, Dropdown, Button,\
                       Layout, IntSlider, IntRangeSlider
from subprocess import Popen, STDOUT, PIPE

from IPython.display import Image, display_pdf, Markdown
import glob
from wand.image import Image as WImage

from synpopGenerateEntities import generateEntities
from synpopClusterizeEntities import clusterizeEntities
from synpopCheckPopulation import checkPopulation

Populating the interactive namespace from numpy and matplotlib


This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 1065, in start
    handler_func(fd_obj, events)
  

In [2]:
def getConfigFiles():
    configFiles = {f: f for f in glob.glob("*_config*.py")}
    return configFiles
def updateConfig(dropMenu):
    configFiles = getConfigFiles()
    dropMenu.options = configFiles

In [3]:
# Building the generation interface...
label_style = {'description_width': '150px'}
layout_cfg = Layout(width="450px")

configFiles = getConfigFiles()
cfgSelection = Dropdown(options=configFiles, value=configFiles.values()[0],
                        description="Configuration file:",
                        layout=layout_cfg, style=label_style)

selNUTSselection = ipywidgets.Text(
        value='',
        placeholder='Comma separated list of NUTS to create...',
        description='Selected NUTS:',
        disabled=False, layout=layout_cfg, style=label_style,
    )

popNameSelection = ipywidgets.Text(
        value='resources/Italy/',
        placeholder='Output file for the population...',
        description='Population output name:',
        disabled=False, layout=layout_cfg, style=label_style,
    )

popScaleSelection = ipywidgets.FloatSlider(value=.1, min=.0, max=1., step=.01,
                        description="Population scale:", continuous_update=True,
                        layout=layout_cfg, style=label_style)

refNameSelection = ipywidgets.Text(
        value='',
        placeholder='Type the mnemonic name of your pop...',
        description='Reference name:',
        disabled=False, layout=layout_cfg, style=label_style,
    )

popLevelsSelection = ipywidgets.Text(
        value='',
        placeholder='Comma seprated list of local levels size...',
        description='Local level sizes:',
        disabled=False, layout=layout_cfg, style=label_style,
    )

simBox = ipywidgets.HBox(
        [ipywidgets.VBox([cfgSelection, selNUTSselection, popLevelsSelection]),
         ipywidgets.VBox([popScaleSelection, refNameSelection, popNameSelection])]
    )

boundariesSelection = FileBrowser(path="resources", showHidden=False, title="Select boundaries:")

createButton = ipywidgets.Button(description="Create", positioning="right")

cfgSelection.observe(lambda v: updateConfig(cfgSelection))

from subprocess import Popen, STDOUT, PIPE
def onCreateButtonClicked(ev=None):
    #createButton.disabled = True
    tic = datetime.datetime.now()
    print "Creating population for `%s`!" % (selNUTSselection.value)
    geoDFfile = boundariesSelection.path
    geoid2nuts = geoDFfile.replace("gdf.pkl.gz", "id2NUTS.pkl")
    geonuts2id = geoDFfile.replace("gdf.pkl.gz", "NUTS2id.pkl")
    localConf = {"geoDataFrame_file": geoDFfile,
                 "geoDFid2nuts_file": geoid2nuts,
                 "geoDFnuts2id_file": geonuts2id,
                 "referenceName": refNameSelection.value,
                 "populationFileName": popNameSelection.value,
                 "popScale": popScaleSelection.value,
                 "levelsTargetSize": [int(s.replace(" ", "")) for s in popLevelsSelection.value.split(",")],
                 "selectedNUTS": set([s.replace(" ", "") for s in selNUTSselection.value.split(",")]),
                }
    fout, cfg = generateEntities(cfgSelection.value.split(".py")[0], additionalArgs=localConf)
    clusterizeEntities(fout.split(".py")[0], additionalArgs=cfg)
    toc = datetime.datetime.now()
    dt = toc - tic
    print "\n Everything done, the generation procedure took %d seconds." % (dt.total_seconds())
    
createButton.on_click(onCreateButtonClicked)

loadBox = ipywidgets.HBox([boundariesSelection.widget(),])

btnBox = ipywidgets.HBox([createButton], positioning="right", layout=Layout(positioning="right"))

inputBox = ipywidgets.VBox([simBox, loadBox, btnBox])

In [4]:
# Building the check interface...
label_style = {'description_width': 'initial'}
cfgSelection = Dropdown(options=configFiles, value=configFiles.values()[0],
                        description="Configuration file:",
                        layout=layout_cfg, style=label_style)

cfgSelection.observe(lambda v: updateConfig(cfgSelection))
checkButton = ipywidgets.Button(description="Check", positioning="right")

checkBox = ipywidgets.HBox([cfgSelection, checkButton])

import time
import importlib
def onCheckButtonClicked(ev=None, width=400, resolution=110):
    checkButton.disabled = False
    print "Checking the %s configuration file..." % (cfgSelection.value)
    cfgFile = cfgSelection.value.split(".py")[0]
    cfg_mod = importlib.import_module(cfgFile)
    cfg = cfg_mod.cfg
    args = "python2 synpopCheckPopulation.py %s" % cfgFile
    proc = Popen(args.split(" "), stdout=PIPE, stderr=STDOUT)
    seen_IMGs = set()
    proc_done = False
    all_done = False
    while (not all_done) or (not proc_done):
        for img in sorted(glob.glob("figures/*_%s_*" % cfg["referenceName"])):
            if img in seen_IMGs: continue
            title = "## %s" % os.path.basename(img[:-4])
            display(Markdown(title))
            done = False
            while not done:
                time.sleep(8)
                if img[-4:].lower() == ".pdf":
                    try:
                        wim = WImage(filename=img, resolution=resolution, width=width)
                    except Exception as e:
                        time.sleep(2)
                        continue
                else:
                    try:
                        wim = Image(img, width=width)
                    except Exception as e:
                        time.sleep(2)
                        continue
                try:
                    display(wim)
                except:
                    continue
                done=True
            seen_IMGs.add(img)
        if (not proc_done):
            if proc.poll() is not None:
                proc_done = True
        elif not all_done:
            all_done = True
        time.sleep(.5)
    out = proc.wait()
    display(Markdown("**Check complete!**"))
    pass
    
checkButton.on_click(onCheckButtonClicked)


# Synpop generation demo

## Create the synthetic population

We will create the synthetic population of a small province of Piedmont (for time constraints).

We will then see how to check the generation procedure with a previously prepared population.


In [5]:
display(inputBox)

## Check the generated population

In [6]:
display(checkBox)

# Synthetic network construction

## Data pre-processing and synthetic population creation


**Enrico Ubaldi** 

*Health habits pilot* - ISI Foundation 

# Motivations and goal

ABMs need a detailed description of population:

<div style="float: left; width: 50%;">
<ul>
<li> demographic traits (age, sex, health status...)
<li> spatial information (households and workplaces location)
<li> interaction patterns between agents (network of contacts)
</ul>
</div>
<div style="float: right; width: 50%;">
<font size=23 color="#ff2222"> X</font>
<img align="right" src="figures/synpop.jpg">
</div>

$\to$ key ingredient: **synthetic population**



# Motivations and goal

We aim at a *synthetic population*:

<div style="float: left; width: 50%;">
<ul>
<li> <b>realistic</b>:
    <ul>
        <li> reproducing real world data
        <li>  encoding real traits of agents
    </ul>
</ul>
</div>
<div style="float: right; width: 50%;">
<img align="right" src="figures/synPop_2011_DE1_agePerRole.png">
</div>

# Motivations and goal

We aim at a *synthetic population*:

<div style="float: left; width: 50%;">
<ul>
<li> <b>realistic</b>:
    <ul>
        <li> reproducing real world data
        <li>  encoding real traits of agents
    </ul>
<li> <b>general</b>:
    <ul>
        <li> relies on globally available data
        <li> quick and easy generation of any area
    </ul>
</ul>
</div>
<div style="float: right; width: 50%;">
    <img align="right" src="figures/synpop_hhSpatialDistribution.png?id=2334">
</div>

# Motivations and goal

We aim at a *synthetic population*:

<div style="float: left; width: 50%;">
<ul>
<li> <b>realistic</b>:
    <ul>
        <li> reproducing real world data
        <li>  encoding real traits of agents
    </ul>
<li> <b>general</b>:
    <ul>
        <li> relies on globally available data
        <li> quick and easy generation of any area
    </ul>
<li> <b>HPC-compliant</b>:
    <ul>
        <li> suitable for multi-node computing
        <li> appropriate file format for parallel I/O
    </ul>
</ul>
</div>
<div style="float: right; width: 50%;">
<img align="right" src="figures/synpop_hierarchy.png">
</div>

# Design: data sources

Make use of open data from:

<div style="float: left; width: 35%;">
<ul>
<li> <b>Eurostat</b>:
    <ul>
        <li> population/household structure, demography
        <li> employment and education levels
    </ul>
<li> <b>National statistics offices</b>:
    <ul>
        <li> commuting data
        <li> number and size of schools/workplaces
        <li> health stats
    </ul>
<li> <b>Online resources</b>:
    <ul>
        <li> <em>PISA</em> and <em>PIRLS</em> surveys (schools and classes size distribution)
        <li> <b>O</b>pen <b>S</b>treet <b>M</b>aps API (local boundaries)
        <li> <em>SEDAC</em> rasters (population density)
    </ul>
</ul>
</div>
<div style="float: right; width: 65%;">
<img align="right" src="figures/slides_figures.png" width="400%">
</div>

# Design: data pre-processing and geoDB

Organized the data in a *geographical database*:

<div style="float: left; width: 40%;">
<ul>
<li> <b>provides</b>:
    <ul>
        <li> fast and easy access to data
        <li> homogeneous representation of statistics
        <li> hierarchical representation of the population (more later)
    </ul>
<li> <b>unlocks</b>:
    <ul>
        <li> general generation procedure
        <li> easy check of generated population
    </ul>
</ul>
</div>
<div style="float: right; width: 60%;">
<img align="right" src="figures/db_query.png?id=12">
</div>

# Design: population structure

Agents (people) are arranged in:

<div style="float: left; width: 50%;">
<ul>
<li> households
<li> workplaces/schools
</ul>

These locations are spatially located and organized:
<ul>
<li> <b>NUTS</b> codes (country, region, province)
<li> <b>LAU</b> codes (local administrative units, e.g., city, district)
<li> <b>LE</b> codes (local entities, user defined):
    <ul>
        <li> neighborhood (groups of ~5000 people)
        <li> communities (~800 people)
        <li> household cluster (~90 people)
    </ul>
</ul>
$\to$ Enables a natural multi-node representation of the population
</div>
<div style="float: right; width: 50%;">
<img align="right" src="figures/synpop_hierarchy.png">
</div>

# Results: generation procedure

<div style="float: left; width: 40%;">
<ul>
<li> Select <b>NUTS</b> codes
<li> Retrieve statistics on    
    <ul>
        <li> sex, age, households structure
        <li> school/employment rates per age
        <li> commuting
    </ul>
<li> For each subcode
    <ul>
        <li> generate households
        <li> compute number of workers/students (local + commuters)
        <li> assign worker/student to workplace/school
    </ul>
<li> Cluster households and workplaces in <b>LE</b>
</ul>
</div>
<div style="float: right; width: 60%;">
<font size=22 color="#ff2222"> X</font>
Image with the algorithm of the creation process
<img align="right" src="figures/synpop_diagram.jpg">
</div>

# Results: population structure


The generated population:
<div style="float: left; width: 40%;">
<ul>
<li> <b>reproduces</b>
    <ul>
        <li> overall population age structure
        <li> in-household sex and age structure
        <li> workplaces size 
        <li> distribution of commuting distance
    </ul>
<li> <b>features</b>
    <ul>
        <li> fine-grained spatial representation
        <li> local clustering of agents groups
        <li> hierarchical structure
    </ul>
</ul>
</div>
<div style="float: right; width: 60%;">
<img align="right" src="figures/synPop_ITC11_agePopulationPerSex.png">
</div>

# Results: HPC-compliance and model implementation

The generated file is a standard *hdf5* file that provides:

<div style="float: left; width: 40%;">
<ul>
<li> fast and performant I/O operation on the synthetic population
<li> provides suitable input for the parallel version of the developed model
</ul>
</div>
<div style="float: right; width: 60%;">
<img align="right" src="figures/ag_hh_wp_tables.png">
</div>