In [1]:
from matplotlib import pyplot as plt
%matplotlib inline

import json

import numpy as np
import pandas as pd
import networkx as nx

Read data file

In [2]:
df = pd.read_csv('../data/ebola/rstb20160308_si_001.csv')
print(df.shape)
df.head()

(33338, 16)


Unnamed: 0,Country,EpiCaseDef,FinalStatus,DateOnset,DateOnsetInferred,DateReport,DateOutcomeComp,DateHospitalCurrentAdmit,Age,CL_DistrictRes,HospitalizedEver,TreatmentCendreType,TreatmentCentre,delay.onset.hospitalisation,QuarterOnsetInferred,retrospective
0,Guinea,confirmed,Dead,2015-02-06,2015-02-06,2015-02-06,2015-02-06,,45.0,FORECARIAH,No,Not Hospitalised,,,Jan - Mar 2015,True
1,Guinea,confirmed,Dead,2015-03-02,2015-03-02,2015-03-07,2015-03-07,,40.0,CONAKRY,No,Not Hospitalised,,,Jan - Mar 2015,True
2,Guinea,confirmed,Dead,2015-03-15,2015-03-15,2015-03-17,2015-03-18,2015-03-18,0.0,FORECARIAH,Yes,ETU,Conakry 2,3.0,Jan - Mar 2015,False
3,Guinea,confirmed,Dead,2014-06-06,2014-06-06,2014-06-09,2014-06-16,2014-06-09,30.0,GUECKEDOU,Yes,ETU,Gueckedou 1,3.0,Apr - Jun 2014,False
4,Guinea,probable,Dead,2014-02-27,2014-02-27,2014-02-27,2014-03-03,,46.0,GUECKEDOU,Yes,Unknown TC Type,,,Jan - Mar 2014,False


Count number of rows with no districts

In [3]:
df.CL_DistrictRes.isna().sum()

984

Extract the number of distinct districts (keep (disctrict, country) pairs to avoid district name duplicates in different countries).

In [4]:
district_list = [(e['CL_DistrictRes'], e['Country']) for _,e in df.iterrows() if isinstance(e['CL_DistrictRes'], str)]
district_set = sorted(set(district_list))
len(district_set)

55

Manually extracted connections

In [52]:
guinea_adj_dict = {
    ('BEYLA', 'Guinea'): [
        ('LOLA', 'Guinea'),
        ('MACENTA', 'Guinea'),
        ('KEROUANE', 'Guinea'),
        ("N'ZEREKORE", 'Guinea'),
    ],
    ('BOFFA', 'Guinea'): [
        ('FRIA', 'Guinea'),
        ('BOKE', 'Guinea'),
        ('CONAKRY', 'Guinea'), 
        ('COYAH', 'Guinea'), 
        ('DUBREKA', 'Guinea'),
    ],
    ('BOKE', 'Guinea'): [
        ('BOFFA', 'Guinea')
    ],
    ('CONAKRY', 'Guinea'): [
        ('COYAH', 'Guinea'), 
        ('DUBREKA', 'Guinea'),
        ('FRIA', 'Guinea'),
        ('KINDIA', 'Guinea'),
        ('FORECARIAH', 'Guinea'),
        ('BOFFA', 'Guinea'),
    ],
    ('COYAH', 'Guinea'): [
        ('CONAKRY', 'Guinea'),
        ('DUBREKA', 'Guinea'),
        ('FRIA', 'Guinea'),
        ('KINDIA', 'Guinea'),
        ('FORECARIAH', 'Guinea'),
        ('BOFFA', 'Guinea'),
    ],
    ('DABOLA', 'Guinea'): [
        ('DINGUIRAYE', 'Guinea'),
        ('KOUROUSSA', 'Guinea'),
        ('FARANAH', 'Guinea'),
        ('DALABA', 'Guinea'),
    ],
    ('DALABA', 'Guinea'): [
        ('PITA', 'Guinea'),
        ('TOUGUE', 'Guinea'),
        ('DABOLA', 'Guinea'),
        ('FARANAH', 'Guinea'),
        ('KINDIA', 'Guinea'),
    ],
    ('DINGUIRAYE', 'Guinea'): [
        ('SIGUIRI', 'Guinea'),
        ('KOUROUSSA', 'Guinea'),
        ('DABOLA', 'Guinea'),
        ('TOUGUE', 'Guinea'),
    ],
    ('DUBREKA', 'Guinea'): [
        ('CONAKRY', 'Guinea'),
        ('COYAH', 'Guinea'), 
        ('FRIA', 'Guinea'),
        ('KINDIA', 'Guinea'),
        ('FORECARIAH', 'Guinea'),
        ('BOFFA', 'Guinea'),
    ],
    ('FARANAH', 'Guinea'): [
        ('DABOLA', 'Guinea'),
        ('KISSIDOUGO', 'Guinea'),
        ('DALABA', 'Guinea'),
        ('KOINADUGU', 'Sierra Leone'),
    ],
    ('FORECARIAH', 'Guinea'): [
        ('KINDIA', 'Guinea'),
        ('CONAKRY', 'Guinea'), 
        ('COYAH', 'Guinea'), 
        ('DUBREKA', 'Guinea'),
        ('KAMBIA', 'Sierra Leone')
    ],
    ('FRIA', 'Guinea'): [
        ('CONAKRY', 'Guinea'), 
        ('COYAH', 'Guinea'), 
        ('DUBREKA', 'Guinea'),
        ('TELIMELE', 'Guinea'),
        ('BOFFA', 'Guinea')
    ],
    ('GUECKEDOU', 'Guinea'): [
        ('MACENTA', 'Guinea'),
        ('KISSIDOUGO', 'Guinea'),
        ('LOFA', 'Liberia'),
        ('KAILAHUN', 'Sierra Leone'),
        ('KONO', 'Sierra Leone'),
    ],
    ('KANKAN', 'Guinea'): [
        ('KEROUANE', 'Guinea'),
        ('KISSIDOUGO', 'Guinea'),
        ('SIGUIRI', 'Guinea'),
        ('KOUROUSSA', 'Guinea'),
    ],
    ('KEROUANE', 'Guinea'): [
        ('BEYLA', 'Guinea'),
        ('KISSIDOUGO', 'Guinea'),
        ('KANKAN', 'Guinea'),
        ('MACENTA', 'Guinea'),
    ],
    ('KINDIA', 'Guinea'): [
        ('DALABA', 'Guinea'),
        ('TELIMELE', 'Guinea'),
        ('FORECARIAH', 'Guinea'),
        ('CONAKRY', 'Guinea'), 
        ('COYAH', 'Guinea'), 
        ('DUBREKA', 'Guinea'),
        ('BOMBALI', 'Sierra Leone'),
    ],
    ('KISSIDOUGO', 'Guinea'): [
        ('GUECKEDOU', 'Guinea'),
        ('KEROUANE', 'Guinea'),
        ('KANKAN', 'Guinea'),
        ('KOUROUSSA', 'Guinea'),
        ('FARANAH', 'Guinea'),
        
    ],
    ('KOUROUSSA', 'Guinea'): [
        ('SIGUIRI', 'Guinea'),
        ('KANKAN', 'Guinea'),
        ('DINGUIRAYE', 'Guinea'),
        ('DABOLA', 'Guinea'),
        ('KISSIDOUGO', 'Guinea'),
    ],
    ('LOLA', 'Guinea'): [
        ('BEYLA', 'Guinea'),
        ("N'ZEREKORE", 'Guinea'),
        ('NIMBA', 'Liberia'),
    ],
    ('MACENTA', 'Guinea'): [
        ("N'ZEREKORE", 'Guinea'),
        ('BEYLA', 'Guinea'),
        ('GUECKEDOU', 'Guinea'),
        ('LOFA', 'Liberia'),
        ('KEROUANE', 'Guinea'),
    ],
    ('MALI', 'Guinea'): [
        ('PITA', 'Guinea'),
        ('TOUGUE', 'Guinea'),
    ],
    ("N'ZEREKORE", 'Guinea'): [
        ('YOMOU', 'Guinea'),
        ('LOLA', 'Guinea'),
        ('MACENTA', 'Guinea'),
        ('NIMBA', 'Liberia'),
        ('BEYLA', 'Guinea'),
    ],
    ('PITA', 'Guinea'): [
        ('TELIMELE', 'Guinea'),
        ('MALI', 'Guinea'),
        ('TOUGUE', 'Guinea'),
        ('DALABA', 'Guinea'),
    ],
    ('SIGUIRI', 'Guinea'): [
        ('KANKAN', 'Guinea'),
        ('DINGUIRAYE', 'Guinea'),
        ('KOUROUSSA', 'Guinea'),
    ],
    ('TELIMELE', 'Guinea'): [
        ('FRIA', 'Guinea'),
        ('KINDIA', 'Guinea'),
        ('PITA', 'Guinea'),
    ],
    ('TOUGUE', 'Guinea'): [
        ('DINGUIRAYE', 'Guinea'),
        ('PITA', 'Guinea'),
        ('MALI', 'Guinea'),
        ('DALABA', 'Guinea'),
    ],
    ('YOMOU', 'Guinea'): [
        ("N'ZEREKORE", 'Guinea'),
        ('LOFA', 'Liberia'),
        ('BONG', 'Liberia'),
        ('NIMBA', 'Liberia'),
    ]
}

In [53]:
sierraleone_adj_dict = {
    ('BO', 'Sierra Leone'): [
        ('MOYAMBA', 'Sierra Leone'),
        ('BONTHE', 'Sierra Leone'),
        ('PUJEHUN', 'Sierra Leone'),
        ('KENEMA', 'Sierra Leone'),
        ('TONKOLILI', 'Sierra Leone')
    ],
    ('BOMBALI', 'Sierra Leone'): [
        ('KAMBIA', 'Sierra Leone'),
        ('PORT LOKO', 'Sierra Leone'),
        ('TONKOLILI', 'Sierra Leone'),
        ('KOINADUGU', 'Sierra Leone'),
        ('KINDIA', 'Guinea'),
    ],
    ('BONTHE', 'Sierra Leone'): [
        ('MOYAMBA', 'Sierra Leone'),
        ('BO', 'Sierra Leone'),
        ('PUJEHUN', 'Sierra Leone')
    ],
    ('KAILAHUN', 'Sierra Leone'): [
        ('KONO', 'Sierra Leone'),
        ('KENEMA', 'Sierra Leone'),
        ('LOFA', 'Liberia'),
        ('GUECKEDOU', 'Guinea'),
    ],
    ('KAMBIA', 'Sierra Leone'): [
        ('BOMBALI', 'Sierra Leone'),
        ('PORT LOKO', 'Sierra Leone'),
        ('FORECARIAH', 'Guinea'),
    ],
    ('KENEMA', 'Sierra Leone'): [
        ('KONO', 'Sierra Leone'),
        ('KAILAHUN', 'Sierra Leone'),
        ('PUJEHUN', 'Sierra Leone'),
        ('BO', 'Sierra Leone')
    ],
    ('KOINADUGU', 'Sierra Leone'): [
        ('BOMBALI', 'Sierra Leone'),
        ('TONKOLILI', 'Sierra Leone'),
        ('KONO', 'Sierra Leone'),
        ('FARANAH', 'Guinea')
    ],
    ('KONO', 'Sierra Leone'): [
        ('KOINADUGU', 'Sierra Leone'),
        ('TONKOLILI', 'Sierra Leone'),
        ('KENEMA', 'Sierra Leone'),
        ('KAILAHUN', 'Sierra Leone'),
        ('GUECKEDOU', 'Guinea'),
    ],
    ('MOYAMBA', 'Sierra Leone'): [
        ('WESTERN', 'Sierra Leone'),
        ('TONKOLILI', 'Sierra Leone'),
        ('BO', 'Sierra Leone'),
        ('BONTHE', 'Sierra Leone')
    ],
    ('PORT LOKO', 'Sierra Leone'): [
        ('KAMBIA', 'Sierra Leone'),
        ('BOMBALI', 'Sierra Leone'),
        ('WESTERN', 'Sierra Leone'),
        ('TONKOLILI', 'Sierra Leone'),
        
    ],
    ('PUJEHUN', 'Sierra Leone'): [
        ('KENEMA', 'Sierra Leone'),
        ('BO', 'Sierra Leone'),
        ('BONTHE', 'Sierra Leone'),
        ('GRAND CAPE MOUNT', 'Liberia')
    ],
    ('TONKOLILI', 'Sierra Leone'): [
        ('KOINADUGU', 'Sierra Leone'),
        ('BOMBALI', 'Sierra Leone'),
        ('PORT LOKO', 'Sierra Leone'),
        ('MOYAMBA', 'Sierra Leone'),
        ('BO', 'Sierra Leone'),
        ('KONO', 'Sierra Leone'),
    ],
    ('WESTERN', 'Sierra Leone'): [
        ('PORT LOKO', 'Sierra Leone'),
        ('MOYAMBA', 'Sierra Leone')
    ]
}

In [54]:
liberia_adj_dict = {
    ('BOMI', 'Liberia'): [
        ('GRAND CAPE MOUNT', 'Liberia'),
        ('GBARPOLU', 'Liberia'),
        ('MONTSERRADO', 'Liberia'),
    ],
    ('BONG', 'Liberia'): [
        ('LOFA', 'Liberia'),
        ('GBARPOLU', 'Liberia'),
        ('MONTSERRADO', 'Liberia'),
        ('MARGIBI', 'Liberia'),
        ('GRAND BASSA', 'Liberia'),
        ('NIMBA', 'Liberia'),
        ('YOMOU', 'Guinea'),
    ],
    ('GBARPOLU', 'Liberia'): [
        ('BOMI', 'Liberia'),
        ('GRAND CAPE MOUNT', 'Liberia'),
        ('LOFA', 'Liberia'),
        ('BONG', 'Liberia'),
    ],
    ('GRAND BASSA', 'Liberia'): [
        ('MARGIBI', 'Liberia'),
        ('BONG', 'Liberia'),
        ('RIVERCESS', 'Liberia'),
        ('NIMBA', 'Liberia'),
    ],
    ('GRAND CAPE MOUNT', 'Liberia'): [
        ('BOMI', 'Liberia'),
        ('GBARPOLU', 'Liberia'),
        ('PUJEHUN', 'Sierra Leone'),
    ],
    ('GRAND GEDEH', 'Liberia'): [
        ('NIMBA', 'Liberia'),
        ('SINOE', 'Liberia'),
        ('RIVER GEE', 'Liberia'),
    ],
    ('GRAND KRU', 'Liberia'): [
        ('MARYLAND', 'Liberia'),
        ('RIVER GEE', 'Liberia'),
        ('SINOE', 'Liberia')
    ],
    ('LOFA', 'Liberia'): [
        ('GBARPOLU', 'Liberia'),
        ('BONG', 'Liberia'),
        ('KAILAHUN', 'Sierra Leone'),
        ('YOMOU', 'Guinea'),
        ('MACENTA', 'Guinea'),
        ('GUECKEDOU', 'Guinea')
    ],
    ('MARGIBI', 'Liberia'): [
        ('MONTSERRADO', 'Liberia'),
        ('BONG', 'Liberia'),
        ('GRAND BASSA', 'Liberia'),
    ],
    ('MARYLAND', 'Liberia'): [
        ('RIVER GEE', 'Liberia'),
        ('GRAND KRU', 'Liberia')
    ],
    ('MONTSERRADO', 'Liberia'): [
        ('BOMI', 'Liberia'),
        ('BONG', 'Liberia'),
        ('MARGIBI', 'Liberia'),
    ],
    ('NIMBA', 'Liberia'): [
        ('BONG', 'Liberia'),
        ('GRAND BASSA', 'Liberia'),
        ('RIVERCESS', 'Liberia'),
        ('GRAND GEDEH', 'Liberia'),
        ('YOMOU', 'Guinea'),
        ("N'ZEREKORE", 'Guinea'),
        ('LOLA', 'Guinea')
    ],
    ('RIVER GEE', 'Liberia'): [
        ('GRAND GEDEH', 'Liberia'),
        ('SINOE', 'Liberia'),
        ('GRAND KRU', 'Liberia'),
        ('MARYLAND', 'Liberia')
    ],
    ('RIVERCESS', 'Liberia'): [
        ('GRAND BASSA', 'Liberia'),
        ('NIMBA', 'Liberia'),
        ('SINOE', 'Liberia'),
    ],
    ('SINOE', 'Liberia'): [
        ('RIVERCESS', 'Liberia'),
        ('GRAND GEDEH', 'Liberia'),
        ('RIVER GEE', 'Liberia'),
        ('GRAND KRU', 'Liberia')
    ]
}

Manually extracted GPS positions of each district.

In [55]:
pos_dict = {
    ('BEYLA', 'Guinea'): [-8.633333, 8.683333],
    ('BO', 'Sierra Leone'): [-11.471, 7.9552],
    ('BOFFA', 'Guinea'): [-14.039161, 10.180825],
    ('BOKE', 'Guinea'): [-14.100133, 11.186467],
    ('BOMBALI', 'Sierra Leone'): [-12.163272, 9.247584],
    ('BOMI', 'Liberia'): [-10.845147, 6.756293],
    ('BONG', 'Liberia'): [-9.367308, 6.829502],
    ('BONTHE', 'Sierra Leone'): [-12.503992, 7.525703],
    ('CONAKRY', 'Guinea'): [-13.578401, 9.641185],
    ('COYAH', 'Guinea'): [-13.387612, 9.708636],
    ('DABOLA', 'Guinea'): [-11.110785, 10.729781],
    ('DALABA', 'Guinea'): [-12.249070, 10.686818],
    ('DINGUIRAYE', 'Guinea'): [-10.715423, 11.289951],
    ('DUBREKA', 'Guinea'): [-13.514774, 9.790735],
    ('FARANAH', 'Guinea'): [-10.749247, 10.045102],
    ('FORECARIAH', 'Guinea'): [-13.090435, 9.434471],
    ('FRIA', 'Guinea'): [-13.584187, 10.367454],
    ('GBARPOLU', 'Liberia'): [-10.080730, 7.495264],
    ('GRAND BASSA', 'Liberia'): [-9.812493, 6.230845],
    ('GRAND CAPE MOUNT', 'Liberia'): [-11.071176, 7.046776],
    ('GRAND GEDEH', 'Liberia'): [-8.221298, 5.922208],
    ('GRAND KRU', 'Liberia'): [-8.221298, 4.761386],
    ('GUECKEDOU', 'Guinea'): [-10.131116, 8.564969],
    ('KAILAHUN', 'Sierra Leone'): [-10.571809, 8.280220],
    ('KAMBIA', 'Sierra Leone'): [-12.917652, 9.126166],
    ('KANKAN', 'Guinea'): [-9.311828, 10.382789],
    ('KENEMA', 'Sierra Leone'): [-11.195717, 7.863215],
    ('KEROUANE', 'Guinea'): [-9.007367, 9.270260],
    ('KINDIA', 'Guinea'): [-12.862989, 10.040672],
    ('KISSIDOUGO', 'Guinea'): [-10.114318, 9.191454],
    ('KOINADUGU', 'Sierra Leone'): [-11.524805, 9.530862],
    ('KONO', 'Sierra Leone'): [-10.890310, 8.766329],
    ('KOUROUSSA', 'Guinea'): [-9.885059, 10.648923],
    ('LOFA', 'Liberia'): [-9.723267, 8.191118],
    ('LOLA', 'Guinea'): [-8.533653, 7.802235],
    ('MACENTA', 'Guinea'): [-9.472824, 8.538294],
    ('MALI', 'Guinea'): [-12.297718, 12.074294],
    ('MARGIBI', 'Liberia'): [-10.304890, 6.515187],
    ('MARYLAND', 'Liberia'): [-7.741670, 4.725888],
    ('MONTSERRADO', 'Liberia'): [-10.529611, 6.552581],
    ('MOYAMBA', 'Sierra Leone'): [-12.435192, 8.162051],
    ("N'ZEREKORE", 'Guinea'): [-8.825250, 7.747836],
    ('NIMBA', 'Liberia'): [-8.660059, 6.842761],
    ('PITA', 'Guinea'): [-12.397943, 11.057462],
    ('PORT LOKO', 'Sierra Leone'): [-12.785352, 8.768689],
    ('PUJEHUN', 'Sierra Leone'): [-11.721064, 7.356299],
    ('RIVER GEE', 'Liberia'): [-7.872160, 5.260489],
    ('RIVERCESS', 'Liberia'): [-9.456155, 5.902533],
    ('SIGUIRI', 'Guinea'): [-9.178830, 11.414811],
    ('SINOE', 'Liberia'): [-8.660059, 5.498710],
    ('TELIMELE', 'Guinea'): [-13.029933, 10.908936],
    ('TONKOLILI', 'Sierra Leone'): [-11.797961, 8.738942],
    ('TOUGUE', 'Guinea'): [-11.664139, 11.446422],
    ('WESTERN', 'Sierra Leone'): [-13.035694, 8.311498],
    ('YOMOU', 'Guinea'): [-9.259157, 7.569628],
}

Save positions to csv

In [56]:
pos_list = list()
for k, v in pos_dict.items():
    pos_list.append({
        'district': k[0],
        'country': k[1],
        'longitude': v[0],
        'latitude': v[1]
    })
pos_list_df = pd.DataFrame(pos_list)
pos_list_df.to_csv('../data/ebola_pos_list.csv', index=False)
pos_list_df.head()

Unnamed: 0,country,district,latitude,longitude
0,Guinea,BEYLA,8.683333,-8.633333
1,Sierra Leone,BO,7.9552,-11.471
2,Guinea,BOFFA,10.180825,-14.039161
3,Guinea,BOKE,11.186467,-14.100133
4,Sierra Leone,BOMBALI,9.247584,-12.163272


Check that the resulting graph is correct (i.e., all connections exists in both directions)

In [57]:
adj_dict = liberia_adj_dict.copy()
adj_dict.update(sierraleone_adj_dict)
adj_dict.update(guinea_adj_dict)

In [58]:
graph = nx.DiGraph(adj_dict)

In [59]:
for u,v in graph.edges():
    if not graph.has_edge(v,u):
        raise ValueError('Missing edge:', (u,v))
    if u == v:
        raise ValueError('Self-loop', (u, v))

In [60]:
assert np.allclose(nx.adjacency_matrix(graph).toarray(), nx.adjacency_matrix(graph.to_undirected()).toarray())
graph = graph.to_undirected()

In [61]:
graph.number_of_nodes(), graph.number_of_edges()

(55, 115)

Save edge list

In [62]:
edge_list = list()
for u, v in graph.edges():
    edge_list.append({
        'from_district': u[0],
        'from_country': u[1],
        'to_district': v[0],
        'to_country': v[1],
    })
edge_list_df = pd.DataFrame(edge_list)
edge_list_df.to_csv('../data/ebola_edge_list.csv', index=False)
edge_list_df.head()

Unnamed: 0,from_country,from_district,to_country,to_district
0,Liberia,BOMI,Liberia,GRAND CAPE MOUNT
1,Liberia,BOMI,Liberia,GBARPOLU
2,Liberia,BOMI,Liberia,MONTSERRADO
3,Liberia,BONG,Liberia,LOFA
4,Liberia,BONG,Liberia,GBARPOLU


In [63]:
nx.write_edgelist(graph, '../data/ebola/ebola_edge_list_simple.tsv', delimiter='\t', data=False)

Draw graph

In [16]:
import folium

In [19]:
m = folium.Map(location=[9.442303, -10.843602], tiles='Mapbox Bright', zoom_start=7)

for x, y in graph.edges():
    edge = folium.PolyLine(locations=[pos_dict[y][::-1], pos_dict[x][::-1]], weight=2, color='black')
    edge.add_to(m)

labels = {u: u[0].capitalize() for u in graph.nodes()}
for n, label in labels.items():
    folium.map.Marker(
        pos_dict[n][::-1],
        icon=folium.features.DivIcon(
            icon_size=(30,15),
            icon_anchor=(0,0),
            html=f'<div style="font-size: 10pt">{label}</div>',
            )
    ).add_to(m)
    
m.save('../data/ebola_map.html')
    
m