In [1]:
%run resources/library.py

In [2]:
style_notebook()

### Welcome to Notebook 3 of the HELINA Workshop

This notebook will help us explore the Place building block of Botswana's HIV epidemic. Using the pickle file of Table 6 of the BAIS 2013 report, we'll create a choropleth map.

### First, we need to read in the pickle file

In [3]:
import os
import pandas as pd
import matplotlib as plt
import geopandas as gpd

In [4]:
table6 = pd.read_pickle('outputs/table6pickle.pkl')
table6

Unnamed: 0,District,Male,Female,TotalPop
3,Gaborone,13.4,19.8,17.0
4,Francistown,20.7,27.5,24.3
5,Lobatse,13.4,20.0,17.2
6,Selebi-Phikwe,25.4,29.3,27.5
7,Orapa,9.9,20.2,15.6
8,Jwaneng,8.5,16.7,12.8
9,Sowa,13.3,26.5,19.8
10,Southern,10.6,12.8,11.8
11,Barolong,8.9,25.7,20.3
12,Ngwaketse West,10.9,24.5,18.8


In [5]:
# Rename district names to match with the shapefile. 
table6 = table6.replace({"Barolong": "Goodhope", "Central-Bobonong": "Bobirwa", "Central-Boteti": "Boteti", 
                        "Central-Mahalapye": "Mahalapye", "Central-Serowe": "Serowe", "Central-Tutume": "Tutume",
                        "Ngamiland North": "Okavango", "Ngamiland South": "Ngamiland", "Ngwaketse West": "Mabutsane",
                        "Northeast": "North East", "Southeast": "South East"})
table6.head()

Unnamed: 0,District,Male,Female,TotalPop
3,Gaborone,13.4,19.8,17.0
4,Francistown,20.7,27.5,24.3
5,Lobatse,13.4,20.0,17.2
6,Selebi-Phikwe,25.4,29.3,27.5
7,Orapa,9.9,20.2,15.6


In [6]:
# check data types.
table6.dtypes

District    object
Male        object
Female      object
TotalPop    object
dtype: object

In [7]:
# load in a shapefile

fp = "resources/BW_27Health_Districts.shp"
map_df = gpd.read_file(fp)
# check data type so we can see that this is not a normal dataframe, but a GEOdataframe
map_df

Unnamed: 0,Dname,uid,psnuuid,snulist,class,geometry
0,Gaborone,VB7am4futjm,Y6TnOG79VvP,Greater Gabarone Cluster,2.0,"POLYGON ((25.91833 -24.73833, 25.87139 -24.744..."
1,Lobatse,m9qc7MnDafM,m9qc7MnDafM,Lobatse,,"POLYGON ((25.71922 -25.26861, 25.71179 -25.277..."
2,Selebi-Phikwe,qUdeJw9Q0Vk,qUdeJw9Q0Vk,Selebi-Phikwe,,"POLYGON ((27.86585 -22.02339, 27.86016 -22.013..."
3,Francistown,h1CepDrLWib,ZmWGSvpahPk,Greater Francistown Cluster,1.0,"POLYGON ((27.44500 -21.22735, 27.43540 -21.186..."
4,Boteti,p5JLtcOw9iv,p5JLtcOw9iv,Boteti,,"POLYGON ((24.80555 -22.16306, 24.56528 -21.860..."
5,Chobe,e9Ze74PEe99,e9Ze74PEe99,Chobe,,"POLYGON ((24.99055 -19.00167, 24.71111 -19.000..."
6,Ghanzi,saOJtBcENzM,saOJtBcENzM,Ghanzi,,"POLYGON ((23.88083 -20.99007, 24.19639 -21.393..."
7,Kgalagadi South,KrgFtpUJ20E,KrgFtpUJ20E,Kgalagadi South District,,"POLYGON ((23.04889 -24.83722, 23.04972 -24.851..."
8,Kgatleng,yNcvm7JYBfi,Y6TnOG79VvP,Greater Gaborone Cluster,2.0,"POLYGON ((26.16783 -24.66397, 26.14139 -24.622..."
9,Kweneng East,Uz8LWtC0vYF,Y6TnOG79VvP,Greater Gaborone Cluster,2.0,"POLYGON ((25.81139 -23.34084, 25.82694 -23.344..."


In [8]:
map_df.dtypes

Dname         object
uid           object
psnuuid       object
snulist       object
class        float64
geometry    geometry
dtype: object

In [9]:
# rename "NAME_2" columnt to "District" to match with the csv.
map_df.rename(columns={'Dname':'District'}, inplace=True)
map_df

Unnamed: 0,District,uid,psnuuid,snulist,class,geometry
0,Gaborone,VB7am4futjm,Y6TnOG79VvP,Greater Gabarone Cluster,2.0,"POLYGON ((25.91833 -24.73833, 25.87139 -24.744..."
1,Lobatse,m9qc7MnDafM,m9qc7MnDafM,Lobatse,,"POLYGON ((25.71922 -25.26861, 25.71179 -25.277..."
2,Selebi-Phikwe,qUdeJw9Q0Vk,qUdeJw9Q0Vk,Selebi-Phikwe,,"POLYGON ((27.86585 -22.02339, 27.86016 -22.013..."
3,Francistown,h1CepDrLWib,ZmWGSvpahPk,Greater Francistown Cluster,1.0,"POLYGON ((27.44500 -21.22735, 27.43540 -21.186..."
4,Boteti,p5JLtcOw9iv,p5JLtcOw9iv,Boteti,,"POLYGON ((24.80555 -22.16306, 24.56528 -21.860..."
5,Chobe,e9Ze74PEe99,e9Ze74PEe99,Chobe,,"POLYGON ((24.99055 -19.00167, 24.71111 -19.000..."
6,Ghanzi,saOJtBcENzM,saOJtBcENzM,Ghanzi,,"POLYGON ((23.88083 -20.99007, 24.19639 -21.393..."
7,Kgalagadi South,KrgFtpUJ20E,KrgFtpUJ20E,Kgalagadi South District,,"POLYGON ((23.04889 -24.83722, 23.04972 -24.851..."
8,Kgatleng,yNcvm7JYBfi,Y6TnOG79VvP,Greater Gaborone Cluster,2.0,"POLYGON ((26.16783 -24.66397, 26.14139 -24.622..."
9,Kweneng East,Uz8LWtC0vYF,Y6TnOG79VvP,Greater Gaborone Cluster,2.0,"POLYGON ((25.81139 -23.34084, 25.82694 -23.344..."


In [10]:
map_df.dtypes

District      object
uid           object
psnuuid       object
snulist       object
class        float64
geometry    geometry
dtype: object

In [11]:
# save shapefile as GeoJson.
map_df.to_file('resources/polygons.json', driver="GeoJSON")

In [12]:
%matplotlib inline
map_df.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7f532075ab70>

In [13]:
# Check if districts in csv and shapefile match. 
df_merge = pd.merge(table6, map_df, on='District', how='outer')
df_merge

Unnamed: 0,District,Male,Female,TotalPop,uid,psnuuid,snulist,class,geometry
0,Gaborone,13.4,19.8,17.0,VB7am4futjm,Y6TnOG79VvP,Greater Gabarone Cluster,2.0,"POLYGON ((25.91833 -24.73833, 25.87139 -24.744..."
1,Francistown,20.7,27.5,24.3,h1CepDrLWib,ZmWGSvpahPk,Greater Francistown Cluster,1.0,"POLYGON ((27.44500 -21.22735, 27.43540 -21.186..."
2,Lobatse,13.4,20.0,17.2,m9qc7MnDafM,m9qc7MnDafM,Lobatse,,"POLYGON ((25.71922 -25.26861, 25.71179 -25.277..."
3,Selebi-Phikwe,25.4,29.3,27.5,qUdeJw9Q0Vk,qUdeJw9Q0Vk,Selebi-Phikwe,,"POLYGON ((27.86585 -22.02339, 27.86016 -22.013..."
4,Orapa,9.9,20.2,15.6,,,,,
5,Jwaneng,8.5,16.7,12.8,dEkUfWWZQ61,dEkUfWWZQ61,Jwaneng,,"POLYGON ((25.43238 -24.68462, 25.38345 -24.738..."
6,Sowa,13.3,26.5,19.8,,,,,
7,Southern,10.6,12.8,11.8,LEUjALXInGD,LEUjALXInGD,Southern,2.0,"POLYGON ((25.60806 -25.19139, 25.56341 -25.333..."
8,Goodhope,8.9,25.7,20.3,y8A4MNQse4l,y8A4MNQse4l,Goodhope,2.0,"POLYGON ((25.54722 -25.38528, 25.53500 -25.426..."
9,Mabutsane,10.9,24.5,18.8,GgzMAsHUgHC,GgzMAsHUgHC,Mabutsane,,"POLYGON ((23.90694 -24.84584, 23.48139 -24.848..."


In [14]:
# save as a csv.
table6.to_csv('resources/popdata_matched.csv')

In [15]:
import folium
import json

In [16]:
state_geo = r'resources/polygons.json'
table = r'resources/popdata_matched.csv'
csvtable = pd.read_csv(table)

In [17]:
m = folium.Map(location=[-22.212136, 23.717579], zoom_start=6)

folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=csvtable,
    columns=['District', 'TotalPop'],
    key_on='feature.properties.District',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="HIV Prevalence Total Population",
).add_to(m)

m.save(os.path.join('resources', 'map_total.html'))

HIV Prevalence Total Population (5 disticts have no data and display in dark grey). 

In [18]:
from IPython.display import IFrame

IFrame(src='resources/map_total.html', width=950, height=600)

In [19]:
m = folium.Map(location=[-22.212136, 23.717579], zoom_start=6)

folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=csvtable,
    columns=['District', 'Male'],
    key_on='feature.properties.District',
    fill_color='GnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="HIV Prevalence Male",
).add_to(m)

m.save(os.path.join('resources', 'map_males.html'))

In [20]:
from IPython.display import IFrame

IFrame(src='resources/map_males.html', width=950, height=600)

In [21]:
m = folium.Map(location=[-22.212136, 23.717579], zoom_start=6)

folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=csvtable,
    columns=['District', 'Female'],
    key_on='feature.properties.District',
    fill_color='BuPu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="HIV Prevalence Female",
).add_to(m)

m.save(os.path.join('resources', 'map_females.html'))

In [22]:
from IPython.display import IFrame

IFrame(src='resources/map_females.html', width=950, height=600)

But, wait! Why are some areas of the map grayed out?

There are 26 records in BAIS 2013 table vs 27 records in the current Botswana Shapefile. Three districts on the map have no data and display as dark grey. Orapa and Sowa from BAIS report are towns so they could not be matched to district shapefile. They could be counted as part of districts they are in. Palapye, Charleshill, Moshupa areas in the shapefile do not have matches with BAIS, maybe because they were not surveyed by BAIS or surveyed as part of other districts from which they separated later.

## Congratulations !  You have completed the "Place" lego block.

![PLACE LEGO](resources/Place_Lego_block.png)