# Setting up the file 

In [22]:
import geopandas as gpd
import pandas as pd
import rasterio as rio
import numpy as np
from pyproj import CRS
from shapely.geometry import Point
from sklearn import preprocessing

# Getting the population raster data

In [23]:
pop_data = '../Data/Demographics/WorldPop/UG_2020_population.tif'
shape_file = '../Data/Mapping layout/Admin3/uga_admbnda_adm3_UBOS_v5_cleaned [CB].shp'

In [24]:
raster = rio.open(pop_data)
dem_data = raster.read(1, masked = True)

In [25]:
dem_data

masked_array(
  data=[[--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        ...,
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --]],
  mask=[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]],
  fill_value=-99999.0,
  dtype=float32)

# Adding this into sub-county boundaries

In [26]:
uganda = gpd.read_file(shape_file)

In [27]:
def mask_raster(raster_path, shape, indexes=1, crop=True, nodata=-9999):
    import rasterio as rio
    from rasterio.mask import mask
    with rio.open(raster_path) as raster:
        out_data, out_transform = mask(raster, [shape], indexes=indexes, crop=crop, nodata=nodata)
        out_meta = raster.meta
    return (out_data, out_transform, out_meta)

In [28]:
total = []
for shape in uganda['geometry']:
    out_data, _, _ = mask_raster(pop_data, shape)
    values = out_data.flatten()[out_data.flatten() > 0]
    # if empty array switch to np.nan to prevent warnings
    if values.size == 0:
        values = np.nan
    total.append(np.sum(values))

In [29]:
uganda['total_pop'] = total

In [30]:
uganda.head(n=3)

Unnamed: 0,ADM0_EN,ADM0_PCODE,ADM1_EN,ADM1_PCODE,ADM2_EN,ADM2_PCODE,ADM3_EN,ADM3_PCODE,geometry,total_pop
0,Uganda,UG,ABIM,UG314,LABWOR,UG3141,ABIM,UG314101,"POLYGON ((33.53202 2.70592, 33.53191 2.70617, ...",17620.712891
1,Uganda,UG,ABIM,UG314,LABWOR,UG3141,ABIM TOWN COUNCIL,UG314102,"POLYGON ((33.62482 2.68035, 33.62481 2.68037, ...",25303.634766
2,Uganda,UG,ABIM,UG314,LABWOR,UG3141,ALEREK,UG314103,"POLYGON ((33.57362 2.91031, 33.57362 2.91031, ...",26072.271484


# Get the hospital data into a shapefile

In [44]:
hosp_path = '../Data/Health_facilities/hospital_map.csv'
hospitals_data = pd.read_csv(hosp_path).set_index('Unnamed: 0')

In [45]:
hospitals_data

Unnamed: 0_level_0,Health Facility,Level,Ownership,lat,lon
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,Buikwe HC III,Health Centre III,Government,0.343480,33.027320
1,Buikwe St. Charles Lwanga HOSPITAL,Hospital,Private Not For Profit,0.339440,33.031810
2,Kasaku HC II,Health Centre II,NGO,0.363300,32.891120
3,Busabaga HC III,Health Centre III,Government,0.296940,32.906390
4,ENG BD Military Lugazi HC III,Health Centre III,Government,0.371400,32.912230
...,...,...,...,...,...
6234,Rhema Medical Clinic,Clinic,PFP,3.473783,31.249925
6235,Umoja Clinic,Clinic,PFP,3.473783,31.249925
6236,Yumbe Medical Clinic,Clinic,PFP,3.473783,31.249925
6237,Paidha Medical Centre Clinic,Clinic,PFP,2.421925,30.986945


In [46]:
# Getting this csv into a shapefile

# creating a geometry column 
geometry = [Point(xy) for xy in zip(hospitals_data['lon'], hospitals_data['lat'])]

# Coordinate reference system : WGS84
crs = CRS('epsg:4326')

# Creating a Geographic data frame 
h_df = gpd.GeoDataFrame(hospitals_data, crs=crs, geometry=geometry)
h_df = h_df[['Health Facility','Level','Ownership' ,'geometry']]
h_df.rename({'Health Facility':'Health_Facility'},axis=1,inplace=True)

h_df.head()

Unnamed: 0_level_0,Health_Facility,Level,Ownership,geometry
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Buikwe HC III,Health Centre III,Government,POINT (33.02732 0.34348)
1,Buikwe St. Charles Lwanga HOSPITAL,Hospital,Private Not For Profit,POINT (33.03181 0.33944)
2,Kasaku HC II,Health Centre II,NGO,POINT (32.89112 0.36330)
3,Busabaga HC III,Health Centre III,Government,POINT (32.90639 0.29694)
4,ENG BD Military Lugazi HC III,Health Centre III,Government,POINT (32.91223 0.37140)


In [47]:
h_df['Level'].unique()

array(['Health Centre III', 'Hospital', 'Health Centre II', 'Clinic',
       'Health Centre IV', 'National Referral Hospital',
       'Regional Referral Hospital'], dtype=object)

# Calculate hospital coverage 

In [48]:
# Define the population served by different facility types 

pop_served = {'Clinic' : 1000, 
              'Health Centre II' : 5000,
              'Health Centre III': 20000,
              'Health Centre IV': 100000,
              'Hospital' : 500000,
              'Regional Referral Hospital' : 2000000,
              'National Referral Hospital': 10000000}

# Tweak this to only account for larger facilities, from HC-IV onwards

pop_served_hc4 = {'Clinic' : 0, 
              'Health Centre II' : 0,
              'Health Centre III': 0,
              'Health Centre IV': 100000,
              'Hospital' : 500000,
              'Regional Referral Hospital' : 2000000,
              'National Referral Hospital': 10000000}

# List what admin level each facility size is referring to

significance = {'Clinic' : 'ADM3_PCODE', 
                  'Health Centre II' : 'ADM3_PCODE',
                  'Health Centre III': 'ADM3_PCODE',
                  'Health Centre IV': 'ADM2_PCODE',
                  'Hospital' : 'ADM2_PCODE',
                  'Regional Referral Hospital' : 'ADM1_PCODE',
                  'National Referral Hospital': 'ADM1_PCODE'}

In [49]:
# Get teh list of hopsital names (not unique) and add the coverage of each 

hospitals = list(h_df.Health_Facility.unique())
h_df['served'] = h_df['Level'].apply(lambda x: pop_served.get(x))

In [50]:
# Create new columns for the vars I am about to create 

uganda['contribution'] = 0

uganda['contribution_hc4'] = 0

for facility_type in pop_served.keys():
    uganda[facility_type]=0
    
uganda['contribution_pfp'] = 0

In [51]:
# Define the tools I'll combine to extract which point is in which polygon

# Gets one point and runs through which polygons include it
def get_complex_mask(point_list, df):
    masks = []
    for point in point_list:
        masks.append(df.geometry.contains(point))
    return combine_masks(masks)

# Return a single mask saying whether each polygons contains at least one of the points entrered as input of get_complex
def combine_masks(masks):
    final_mask = masks[0]
    for i in range(1, len(masks)):
        for m in range(len(masks[i])):
            final_mask[m] = final_mask[m] + masks[i][m]
    out = [True if x > 0 else False for x in final_mask]
    return out

In [52]:
for hospital in hospitals:# add [:1] to reduce that to one example
    df = h_df[h_df.Health_Facility == hospital].reset_index() # get only one hospital's data 
    
    h_type = df.loc[0, 'Level'] # type of the hospital to understand hospital serve count
    owner = int(df.loc[0, 'Ownership']=='PFP')
    sig = significance.get(h_type) # get the coverage area oh the hospital / gives us teh column name 
    
    mask = get_complex_mask(df.geometry.tolist(), uganda) #get the mask for each hospital points 
    
    if sum(mask) < 1:# For hospitals with no location data 
        continue
    hosp_location = uganda[mask] #Gives the full informationfor this hospital's location(s)
    area_name = hosp_location[sig].tolist() # Here retruns the name area for admin1/2/3 depedning on sig 
    location_mask = uganda[sig].isin(area_name)
    m = location_mask
    
    uganda.loc[m, 'contribution'] = uganda.loc[m, 'contribution'] + (pop_served.get(h_type)//len(uganda.loc[m]))
    uganda.loc[m, 'contribution_hc4'] = uganda.loc[m, 'contribution_hc4'] + (pop_served_hc4.get(h_type)//len(uganda.loc[m]))
    uganda.loc[m, 'contribution_pfp'] = uganda.loc[m, 'contribution_pfp'] + ((pop_served.get(h_type)//len(uganda.loc[m]))*owner)
    uganda.loc[m,h_type]=uganda.loc[m,h_type]+1


In [53]:
uganda.loc[m, 'contribution']

1434    72000
Name: contribution, dtype: int64

In [54]:
h_df.loc[3000, 'Ownership']

'Private Not For Profit'

In [55]:
uganda.head(n=6)

Unnamed: 0,ADM0_EN,ADM0_PCODE,ADM1_EN,ADM1_PCODE,ADM2_EN,ADM2_PCODE,ADM3_EN,ADM3_PCODE,geometry,total_pop,contribution,contribution_hc4,Clinic,Health Centre II,Health Centre III,Health Centre IV,Hospital,Regional Referral Hospital,National Referral Hospital,contribution_pfp
0,Uganda,UG,ABIM,UG314,LABWOR,UG3141,ABIM,UG314101,"POLYGON ((33.53202 2.70592, 33.53191 2.70617, ...",17620.712891,98333,83333,0,3,0,0,1,0,0,5000
1,Uganda,UG,ABIM,UG314,LABWOR,UG3141,ABIM TOWN COUNCIL,UG314102,"POLYGON ((33.62482 2.68035, 33.62481 2.68037, ...",25303.634766,108333,83333,0,1,1,0,1,0,0,0
2,Uganda,UG,ABIM,UG314,LABWOR,UG3141,ALEREK,UG314103,"POLYGON ((33.57362 2.91031, 33.57362 2.91031, ...",26072.271484,113333,83333,0,2,1,0,1,0,0,0
3,Uganda,UG,ABIM,UG314,LABWOR,UG3141,LOTUKEI,UG314104,"POLYGON ((33.52357 2.66188, 33.52343 2.66202, ...",33525.804688,113333,83333,0,2,1,0,1,0,0,0
4,Uganda,UG,ABIM,UG314,LABWOR,UG3141,MORULEM,UG314105,"POLYGON ((33.69505 2.62098, 33.69504 2.62099, ...",32999.273438,98333,83333,0,3,0,0,1,0,0,0
5,Uganda,UG,ABIM,UG314,LABWOR,UG3141,NYAKWAE,UG314106,"POLYGON ((33.70457 2.46390, 33.70466 2.46399, ...",22161.353516,123333,83333,0,4,1,0,1,0,0,0


In [56]:
print('Regarding coverage: This',
      uganda.contribution.sum(),
      'should be around the same as this',
      h_df[~pd.isna(h_df.geometry.x)].served.sum(),
      '\n While this should be lower than both',
      uganda.contribution_hc4.sum(),
      '\n Regarding facility type counts: This',
      uganda['Health Centre II'].sum(),
      'should be around the same as this',
      len(h_df[h_df['Level']=='Health Centre II']),
     '\n Regarding facility type counts: This',
      uganda['Clinic'].sum(),
      'should be around the same as this',
      len(h_df[h_df['Level']=='Clinic']))

Regarding coverage: This 193474007 should be around the same as this 193910000 
 While this should be lower than both 145399007 
 Regarding facility type counts: This 3952 should be around the same as this 3958 
 Regarding facility type counts: This 505 should be around the same as this 500


In [57]:
uganda['ratio']=uganda['contribution']/uganda['total_pop']
uganda['ratio_hc4']=uganda['contribution_hc4']/uganda['total_pop']
uganda['ratio_pfp']=uganda['contribution_pfp']/uganda['total_pop']
uganda['ratio_gvt_nfp']=uganda['ratio']-uganda['ratio_pfp']

In [58]:
print(uganda['ratio'].mean(),
      uganda['ratio_hc4'].mean(),
      uganda['ratio_pfp'].mean(),
      uganda['ratio_gvt_nfp'].mean())

4.726994905314556 3.5896174775267844 0.4933647176102018 4.233630187704354


In [59]:
uganda['ratio_rank']=uganda['ratio'].rank(ascending=True,pct=True)
uganda['ratio_hc4_rank']=uganda['ratio_hc4'].rank(ascending=True,pct=True)
uganda['ratio_pfp_rank']=uganda['ratio_pfp'].rank(ascending=True,pct=True)
uganda['ratio_gvt_nfp_rank']=uganda['ratio_gvt_nfp'].rank(ascending=True,pct=True)


In [60]:
# Let's normalize the ratios 

min_max_scaler = preprocessing.MinMaxScaler()

ratio_values = uganda[['ratio']].values.astype(float)
ratio_hc4_values = uganda[['ratio_hc4']].values.astype(float)
ratio_pfp_values = uganda[['ratio_pfp']].values.astype(float)

ratio_values_scaled = min_max_scaler.fit_transform(ratio_values)
ratio_hc4_values_scaled = min_max_scaler.fit_transform(ratio_hc4_values)
ratio_pfp_values_scaled = min_max_scaler.fit_transform(ratio_pfp_values)

uganda['ratio_norm']=ratio_values_scaled
uganda['ratio_hc4_norm']=ratio_hc4_values_scaled
uganda['ratio_pfp_norm']=ratio_pfp_values_scaled



In [61]:
uganda_noshapes=uganda.copy().drop(['geometry'],axis=1)

In [62]:
uganda_noshapes.to_csv('../Data/Results/health_map.csv')


In [63]:
uganda_noshapes[['total_pop','contribution','contribution_hc4']].describe()

Unnamed: 0,total_pop,contribution,contribution_hc4
count,1438.0,1438.0,1438.0
mean,29521.510896,134543.8,101112.0
std,30225.581511,478386.4,395238.8
min,1299.603027,0.0,0.0
25%,15367.226807,36666.0,12500.0
50%,23132.753906,70000.0,46153.0
75%,36520.177734,118020.0,85713.0
max,449522.3125,8511000.0,6480000.0
