# Extract the base graph for the network of contiguous districts in West Africa (Guinea, Liberia, Sierra Leone)

In [1]:
from matplotlib import pyplot as plt
%matplotlib inline

import json
import os

import numpy as np
import pandas as pd
import networkx as nx

from lib.settings import DATA_DIR, EBOLA_BASE_GRAPH_FILE

## 1. Build the graph

Load graph from the adjacency matrix of districts located at

```
data/ebola/district_adjacency.txt
```

This is formatted as follows. The first  lines gives info on the graph name, then every two lines, we have:
* The name of a node in the first line, followed by its degree (space-separated)
* The space-separated list of neighbors in the second line

The following cell parses this file into a `networkx` graph.

In [2]:
graph = nx.DiGraph()

with open(os.path.join(DATA_DIR, 'ebola', 'district_adjacency.txt'), 'r') as f:
    f.readline() # Pass first line
    while True:
        line = f.readline()
        if not line:
            break
        node, degree = line.rstrip('\n').split(' ')
        degree = int(degree)
        neighbors = f.readline().rstrip('\n').split(' ')
        if (degree == 0) and neighbors == ['']:
            continue
        assert len(neighbors) == degree
        graph.add_node(node)
        graph.add_edges_from([(node, neigh) for neigh in neighbors])

Check if the graph is undirected (i.e., all edges exist in both directions)

In [3]:
assert np.allclose(nx.adjacency_matrix(graph).toarray(), nx.adjacency_matrix(graph).toarray().T)
graph = graph.to_undirected()

### Manually fix graph


Add missing edges due to disconnectivity in shapefiles.

In [4]:
graph.add_edges_from([('MALI', 'TOUGUE'), ('MALI', 'PITA')]) # Connect MALI
graph.add_edges_from([('DALABA', 'DABOLA'), ('DALABA', 'FARANAH')]) # Connect missing edges to DALABA

graph.number_of_nodes(), graph.number_of_edges()

(55, 127)

Make the Guinea capital districts interchangeable. *(Decided not to do this in the end)*

In [5]:
# capital_districts = ['CONAKRY', 'COYAH', 'DUBREKA']
# capital_neighbors = set()
# for u in capital_districts:
#     capital_neighbors.update(list(graph2.neighbors(u)))
# for u in capital_districts:
#     graph3.add_edges_from([(u, v) for v in capital_neighbors])
# graph3.remove_edges_from(graph3.selfloop_edges())  # Remove self loops in capital disctricts

## 2. Add meta-data to the graph

Define a dict with node position and tuple district/country

In [6]:
district_country_pos_dict = {
    ('BEYLA', 'Guinea'): [-8.633333, 8.683333],
    ('BO', 'Sierra Leone'): [-11.471, 7.9552],
    ('BOFFA', 'Guinea'): [-14.039161, 10.180825],
    ('BOKE', 'Guinea'): [-14.100133, 11.186467],
    ('BOMBALI', 'Sierra Leone'): [-12.163272, 9.247584],
    ('BOMI', 'Liberia'): [-10.845147, 6.756293],
    ('BONG', 'Liberia'): [-9.367308, 6.829502],
    ('BONTHE', 'Sierra Leone'): [-12.503992, 7.525703],
    ('CONAKRY', 'Guinea'): [-13.578401, 9.641185],
    ('COYAH', 'Guinea'): [-13.387612, 9.708636],
    ('DABOLA', 'Guinea'): [-11.110785, 10.729781],
    ('DALABA', 'Guinea'): [-12.249070, 10.686818],
    ('DINGUIRAYE', 'Guinea'): [-10.715423, 11.289951],
    ('DUBREKA', 'Guinea'): [-13.514774, 9.790735],
    ('FARANAH', 'Guinea'): [-10.749247, 10.045102],
    ('FORECARIAH', 'Guinea'): [-13.090435, 9.434471],
    ('FRIA', 'Guinea'): [-13.584187, 10.367454],
    ('GBARPOLU', 'Liberia'): [-10.080730, 7.495264],
    ('GRAND_BASSA', 'Liberia'): [-9.812493, 6.230845],
    ('GRAND_CAPE_MOUNT', 'Liberia'): [-11.071176, 7.046776],
    ('GRAND_GEDEH', 'Liberia'): [-8.221298, 5.922208],
    ('GRAND_KRU', 'Liberia'): [-8.221298, 4.761386],
    ('GUECKEDOU', 'Guinea'): [-10.131116, 8.564969],
    ('KAILAHUN', 'Sierra Leone'): [-10.571809, 8.280220],
    ('KAMBIA', 'Sierra Leone'): [-12.917652, 9.126166],
    ('KANKAN', 'Guinea'): [-9.311828, 10.382789],
    ('KENEMA', 'Sierra Leone'): [-11.195717, 7.863215],
    ('KEROUANE', 'Guinea'): [-9.007367, 9.270260],
    ('KINDIA', 'Guinea'): [-12.862989, 10.040672],
    ('KISSIDOUGO', 'Guinea'): [-10.114318, 9.191454],
    ('KOINADUGU', 'Sierra Leone'): [-11.524805, 9.530862],
    ('KONO', 'Sierra Leone'): [-10.890310, 8.766329],
    ('KOUROUSSA', 'Guinea'): [-9.885059, 10.648923],
    ('LOFA', 'Liberia'): [-9.723267, 8.191118],
    ('LOLA', 'Guinea'): [-8.533653, 7.802235],
    ('MACENTA', 'Guinea'): [-9.472824, 8.538294],
    ('MALI', 'Guinea'): [-12.297718, 12.074294],
    ('MARGIBI', 'Liberia'): [-10.304890, 6.515187],
    ('MARYLAND', 'Liberia'): [-7.741670, 4.725888],
    ('MONTSERRADO', 'Liberia'): [-10.529611, 6.552581],
    ('MOYAMBA', 'Sierra Leone'): [-12.435192, 8.162051],
    ("NZEREKORE", 'Guinea'): [-8.825250, 7.747836],
    ('NIMBA', 'Liberia'): [-8.660059, 6.842761],
    ('PITA', 'Guinea'): [-12.397943, 11.057462],
    ('PORT_LOKO', 'Sierra Leone'): [-12.785352, 8.768689],
    ('PUJEHUN', 'Sierra Leone'): [-11.721064, 7.356299],
    ('RIVER_GEE', 'Liberia'): [-7.872160, 5.260489],
    ('RIVERCESS', 'Liberia'): [-9.456155, 5.902533],
    ('SIGUIRI', 'Guinea'): [-9.178830, 11.414811],
    ('SINOE', 'Liberia'): [-8.660059, 5.498710],
    ('TELIMELE', 'Guinea'): [-13.029933, 10.908936],
    ('TONKOLILI', 'Sierra Leone'): [-11.797961, 8.738942],
    ('TOUGUE', 'Guinea'): [-11.664139, 11.446422],
    ('WESTERN', 'Sierra Leone'): [-13.035694, 8.311498],
    ('YOMOU', 'Guinea'): [-9.259157, 7.569628],
}

country_dict = dict(list(district_country_pos_dict.keys()))

pos_dict = {k[0]: v for k, v in district_country_pos_dict.items()}

### 2.1. Add the population weight

We first add the district "size" as a normalized weight based on population density extracted from WorldPop.

In [7]:
# Load the WorldPop district population estimates
pop_df = pd.read_csv(os.path.join(DATA_DIR, 'ebola', 'ebola_populations.csv'), index_col='district')
# Sum of population in all districts
total_pop = pop_df.population.sum()
# Normalized population weight
pop_df['normed_pop'] = pop_df.population / pop_df.population.sum()

print(pop_df.shape)
display(pop_df.head())

(55, 2)


Unnamed: 0_level_0,population,normed_pop
district,Unnamed: 1_level_1,Unnamed: 2_level_1
BEYLA,248143,0.011869
BOFFA,217743,0.010415
BOKE,526569,0.025187
CONAKRY,1729239,0.082712
COYAH,407975,0.019514


In [8]:
# Check that both graph and population densities have the same districts
assert set(pop_df.index) == set(graph.nodes())
# Set the normalized population density as attribute "size"
nx.set_node_attributes(graph, values=pop_df.normed_pop.to_dict(), name='size')

### 2.2. Add the contry of each district

In [9]:
# Check that both graph and population densities have the same districts
assert set(list(country_dict.keys())) == set(graph.nodes())
# Set the normalized population density as attribute "size"
nx.set_node_attributes(graph, values=country_dict, name='country')

### 2.3. Add a position for drawing the network

In [10]:
# Check that both graph and population densities have the same districts
assert set(list(pos_dict.keys())) == set(graph.nodes())
# Set the normalized population density as attribute "size"
nx.set_node_attributes(graph, values=pos_dict, name='pos')

---

## 3. Save the resulting graph

In [11]:
with open(EBOLA_BASE_GRAPH_FILE, 'w') as f:
    base_graph_data = nx.readwrite.node_link_data(graph)
    json.dump(base_graph_data, f)

---

## 4. Graph visualization

Visualize graph on map

In [12]:
import folium

m = folium.Map(location=[9.442303, -10.843602], tiles='openstreetmap', zoom_start=7)

for x, y in graph.edges():
    edge = folium.PolyLine(locations=[pos_dict[y][::-1], pos_dict[x][::-1]], weight=2, color='black')
    edge.add_to(m)

labels = {u: u.capitalize() for u in graph.nodes()}
for n, label in labels.items():
    folium.map.Marker(
        pos_dict[n][::-1],
        icon=folium.features.DivIcon(
            icon_size=(30,15),
            icon_anchor=(0,0),
            html=f'<div style="font-size: 10pt">{label}</div>',
            )
    ).add_to(m)
    
m.save(os.path.join(DATA_DIR, 'ebola_map.html'))
    
m