In [95]:
import geopandas as gpd
import pandas as pd
import fiona
import numpy as np
from pyproj import CRS
from shapely.geometry import Point
from shapely.geometry import shape
pd.set_option('display.max_columns', None)

In [96]:
# First the hospital ranked scoree and that admin code attached to it

hospital = pd.read_csv('../Data/Results/health_map.csv')
summary = hospital[['ADM1_PCODE','ADM1_EN','ADM3_EN','ADM3_PCODE']].copy()
summary['hospital_score'] = hospital['ratio_rank']
summary.set_index('ADM3_PCODE',inplace=True,drop=True)

In [102]:
# Then the poverty data 

poverty = pd.read_csv('../Data/Results/poverty_map.csv')
poverty.set_index('ADM3_PCODE',inplace=True,drop=True)

poverty_target = 1-poverty['HH_Poverty'].copy()
poverty_target.rename('not_poverty_ratio',inplace=True)
poverty_target.head()

ADM3_PCODE
UG426101    0.936635
UG426104    0.936229
UG426105    0.947381
UG426102    0.942532
UG426107    0.944297
Name: not_poverty_ratio, dtype: float64

In [100]:
# The age data, where I need to recalculate the ratio

age = pd.read_csv('../Data/Results/Total_Age_gender.csv')
age.set_index('ADM3_PCODE',drop=True,inplace=True)

age['sum']=age.sum(axis=1)

age['elderly']=age['uga_f_65']+age['uga_f_70']+age['uga_f_75']+age['uga_f_80']+age['uga_m_65']+age['uga_m_70']+age['uga_m_75']+age['uga_m_80']
age['not_elderly_ratio']=1-age['elderly']/age['sum']

age['fertile']=age['uga_f_15']+age['uga_f_20']+age['uga_f_25']+age['uga_f_30']+age['uga_f_35']+age['uga_f_40']
age['not_fertile_ratio']=1-age['fertile']/age['sum']

age_target = age[['not_elderly_ratio','not_fertile_ratio']].copy()

age_target.head()

Unnamed: 0_level_0,not_elderly_ratio,not_fertile_ratio
ADM3_PCODE,Unnamed: 1_level_1,Unnamed: 2_level_1
UG314101,0.982067,0.776275
UG314102,0.982066,0.776278
UG314103,0.982074,0.776267
UG314104,0.982025,0.776196
UG314105,0.982068,0.77626


## DHS data

In [66]:
ref_path= '../Data/Mapping layout/Admin1/uga_admbnda_adm1_UBOS_v2.shp'
wash_path = '../Data/Mapping layout/districts/Districts_2014_v3_UBOS.shp'

In [67]:
ref_shapes = gpd.read_file(ref_path)
wash_shapes = gpd.read_file(wash_path)

In [68]:
# Checking CRS

print(ref_shapes.crs)
print(wash_shapes.crs)

# Converting CRS

wash_shapes.to_crs(crs='EPSG:4326',inplace=True)
print(wash_shapes.crs)

epsg:4326
epsg:21096
EPSG:4326


In [69]:
# Change indexes 
ref_shapes.set_index('ADM1_PCODE',inplace=True, drop=True)
wash_shapes.set_index('District_2',inplace=True, drop=True)

# Extract only polygons
ref_geom = ref_shapes["geometry"].centroid
wash_geom = wash_shapes["geometry"].centroid

In [70]:
wash_geom['abim'].distance(ref_geom['UG314'])

0.0030020858982970286

In [71]:
%%time

# sparse matrix so not great in term of efficency probably but does the trick I guess

convert=pd.DataFrame()

for i in list(wash_shapes.index):
    for j in list(ref_shapes.index):
        x = ref_geom.loc[j].distance(wash_geom.loc[i])
        convert.loc[j,i]=x

Wall time: 3.89 s


In [72]:
dict_wash_to_ref={}
for x in list(convert.columns):
    y = ref_shapes.index[np.argmin(convert[x])]
    dict_wash_to_ref[x]=y

In [73]:
#Now the DHS data 

dhs = pd.read_csv('../Data/Results/DHS_data.csv')
dhs['ADM1_PCODE']=dhs['districts'].replace(dict_wash_to_ref)
dhs.set_index('ADM1_PCODE',inplace=True,drop=True)
dhs_target=dhs[['Percentage with soap and detergents','Percentage with personal toilets','Percentage with water at handwashing place',
                'has_income','has_electricity', 'has_telephone', 'has_television', 'has_computer']].copy()
col_names ={'has_income':'income','Percentage with soap and detergents':'soap','Percentage with personal toilets':'toilet',
            'Percentage with water at handwashing place':'handwash','has_electricity':'elec',
            'has_telephone':'telephone','has_television':'tv','has_computer':'pc'}
dhs_target.rename(col_names,axis=1,inplace=True)
dhs_target.head()

Unnamed: 0_level_0,soap,toilet,handwash,income,elec,telephone,tv,pc
ADM1_PCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
UG102,71.85261,27.942354,93.654043,100.0,90.727418,97.202238,72.581934,20.223821
UG105,63.218391,62.015504,90.804598,71.910112,31.460674,91.011236,52.808989,8.988764
UG122,68.888889,77.586207,84.444444,43.478261,30.434783,84.782609,10.869565,2.173913
UG118,50.0,77.777778,63.157895,29.72973,18.918919,89.189189,13.513514,5.405405
UG111,40.384615,75.531915,76.923077,66.25,43.75,86.25,7.5,0.0


In [74]:
dhs_target.loc['UG314',:]

soap         34.482759
toilet       32.258065
handwash     51.724138
income       44.155844
elec         12.987013
telephone    53.246753
tv            3.896104
pc            0.000000
Name: UG314, dtype: float64

In [103]:
summary=pd.merge(summary,age_target,left_index=True, right_index=True)
summary=pd.merge(summary,poverty_target,left_index=True, right_index=True)
summary=pd.merge(summary,dhs_target,left_on='ADM1_PCODE', right_index=True)

In [106]:
len(dhs_target)

112

In [76]:
summary.head()

Unnamed: 0_level_0,ADM1_PCODE,ADM1_EN,ADM3_EN,hospital_score,not_poverty_ratio,not_elderly_ratio,not_fertile_ratio,soap,toilet,handwash,income,elec,telephone,tv,pc
ADM3_PCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
UG314101,UG314,ABIM,ABIM,0.757997,0.429099,0.982067,0.776275,34.482759,32.258065,51.724138,44.155844,12.987013,53.246753,3.896104,0.0
UG314102,UG314,ABIM,ABIM TOWN COUNCIL,0.650209,0.541591,0.982066,0.776278,34.482759,32.258065,51.724138,44.155844,12.987013,53.246753,3.896104,0.0
UG314103,UG314,ABIM,ALEREK,0.656467,0.368176,0.982074,0.776267,34.482759,32.258065,51.724138,44.155844,12.987013,53.246753,3.896104,0.0
UG314104,UG314,ABIM,LOTUKEI,0.547288,0.429381,0.982025,0.776196,34.482759,32.258065,51.724138,44.155844,12.987013,53.246753,3.896104,0.0
UG314105,UG314,ABIM,MORULEM,0.493741,0.492189,0.982068,0.77626,34.482759,32.258065,51.724138,44.155844,12.987013,53.246753,3.896104,0.0


In [89]:
summary[summary['ADM1_PCODE']=='UG207']

Unnamed: 0_level_0,ADM1_PCODE,ADM1_EN,ADM3_EN,hospital_score,not_poverty_ratio,not_elderly_ratio,not_fertile_ratio,soap,toilet,handwash,income,elec,telephone,tv,pc,poverty_summary_av,demographics_summary_av,wash_summary_av,connect_summary_av,poverty_score,demographics_score,wash_score,connect_score
ADM3_PCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
UG207101,UG207,KATAKWI,KAPUJAN,0.525035,0.733985,0.969797,0.783838,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.421047,0.876817,21.602434,12.162162,0.156437,0.153443,0.011976,0.046407
UG207102,UG207,KATAKWI,MAGORO,0.384562,0.733023,0.969796,0.783827,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.420566,0.876812,21.602434,12.162162,0.155689,0.151946,0.011976,0.046407
UG207103,UG207,KATAKWI,OMODOI,0.38943,0.720318,0.969792,0.78383,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.414213,0.876811,21.602434,12.162162,0.15494,0.151198,0.011976,0.046407
UG207104,UG207,KATAKWI,TOROMA,0.332406,0.747291,0.969792,0.78383,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.4277,0.876811,21.602434,12.162162,0.157186,0.150449,0.011976,0.046407
UG207201,UG207,KATAKWI,KATAKWI,0.358136,0.709273,0.969798,0.783838,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.408691,0.876818,21.602434,12.162162,0.153443,0.154192,0.011976,0.046407
UG207202,UG207,KATAKWI,KATAKWI TOWN COUNCIL,0.920028,0.849043,0.969792,0.78383,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.478575,0.876811,21.602434,12.162162,0.157934,0.149701,0.011976,0.046407
UG207203,UG207,KATAKWI,NGARIAM,0.913769,0.693031,0.969804,0.783811,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.40057,0.876808,21.602434,12.162162,0.151946,0.148204,0.011976,0.046407
UG207204,UG207,KATAKWI,ONGONGOJA,0.770515,0.694444,0.9698,0.783822,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.401276,0.876811,21.602434,12.162162,0.152695,0.148952,0.011976,0.046407
UG207205,UG207,KATAKWI,PALAM,0.833797,0.719742,0.969826,0.783758,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.413925,0.876792,21.602434,12.162162,0.154192,0.147455,0.011976,0.046407
UG207206,UG207,KATAKWI,USUK,0.742698,0.688533,0.969796,0.78383,6.896552,44.117647,13.793103,58.108108,4.054054,44.594595,0.0,0.0,29.398321,0.876813,21.602434,12.162162,0.151198,0.152695,0.011976,0.046407


In [91]:
summary.describe()

Unnamed: 0,hospital_score,not_poverty_ratio,not_elderly_ratio,not_fertile_ratio,soap,toilet,handwash,income,elec,telephone,tv,pc,poverty_summary_av,demographics_summary_av,wash_summary_av,connect_summary_av,poverty_score,demographics_score,wash_score,connect_score
count,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0,1336.0
mean,0.504964,0.787318,0.977434,0.785393,38.770541,59.331244,67.735968,79.404345,21.07088,70.363688,9.617508,2.045693,40.095832,0.881414,55.279251,25.774442,0.500374,0.500374,0.50526,0.50526
std,0.292591,0.174229,0.00591,0.016668,20.968186,16.779618,21.226787,18.750794,17.233756,15.095015,11.98072,3.159344,9.400498,0.006561,13.942592,9.695945,0.288783,0.288783,0.289318,0.28921
min,0.002434,0.114456,0.965167,0.701176,0.0,6.451613,5.0,23.699422,0.0,24.855491,0.0,0.0,11.925064,0.846485,18.112391,7.843137,0.000749,0.000749,0.004491,0.008982
25%,0.249478,0.696998,0.97249,0.776718,24.444444,48.236953,55.214724,64.285714,7.2,61.538462,1.960784,0.0,32.468218,0.878116,46.273032,18.303571,0.250561,0.250561,0.251497,0.257485
50%,0.510779,0.849196,0.977345,0.787099,37.5,59.813084,73.831776,84.234234,19.2,74.025974,6.493506,1.117318,42.535127,0.88215,54.719356,25.681818,0.500374,0.500374,0.505988,0.501497
75%,0.761648,0.916912,0.981108,0.796747,53.488372,74.509804,84.444444,96.618357,30.0,79.792746,12.5,2.48307,48.708442,0.885964,66.543817,30.454545,0.750187,0.750187,0.766467,0.753743
max,1.0,0.995764,0.994782,0.818705,84.431138,88.888889,100.0,100.0,90.727418,97.202238,72.581934,20.223821,50.497882,0.896315,84.976301,70.183853,1.0,1.0,1.0,1.0


In [107]:
summary['poverty_summary_av']=(summary['not_poverty_ratio']+summary['income'])/2
summary['demographics_summary_av']=(summary['not_elderly_ratio']+summary['not_fertile_ratio'])/2
summary['wash_summary_av']=(summary['soap']+summary['toilet']+summary['handwash'])/3
summary['connect_summary_av']=(summary['elec']+summary['telephone']+summary['tv']+summary['pc'])/4

summary['poverty_score']=summary['poverty_summary_av'].rank(ascending=True,pct=True,method='max')
summary['demographics_score']=summary['demographics_summary_av'].rank(ascending=True,pct=True,method='max')
summary['wash_score']=summary['wash_summary_av'].rank(ascending=True,pct=True,method='max')
summary['connect_score']=summary['connect_summary_av'].rank(ascending=True,pct=True,method='max')


summary_target=summary[['ADM1_PCODE', 'ADM3_EN','ADM1_EN','hospital_score','poverty_score', 'demographics_score', 'wash_score', 'connect_score']]

In [108]:
summary_target.describe()

Unnamed: 0,hospital_score,poverty_score,demographics_score,wash_score,connect_score
count,1406.0,1406.0,1406.0,1406.0,1406.0
mean,0.50114,0.500404,0.500404,0.505307,0.505307
std,0.29091,0.288773,0.288776,0.289282,0.289131
min,0.002434,0.000711,0.000711,0.004267,0.009246
25%,0.245306,0.250533,0.251067,0.253201,0.253912
50%,0.504868,0.500356,0.500711,0.503556,0.500711
75%,0.75226,0.750178,0.750178,0.766714,0.757646
max,1.0,1.0,1.0,1.0,1.0


In [109]:
summary_target.reset_index(drop=False,inplace=True)

summary_stacked = summary_target.melt(id_vars=['ADM3_PCODE','ADM3_EN','ADM1_PCODE','ADM1_EN'],
                               value_vars=['hospital_score','poverty_score', 'demographics_score', 'wash_score', 'connect_score'])

In [110]:
summary_stacked.head()

Unnamed: 0,ADM3_PCODE,ADM3_EN,ADM1_PCODE,ADM1_EN,variable,value
0,UG101131,KALANGALA TOWN COUNCIL,UG101,KALANGALA,hospital_score,0.809458
1,UG101131,KALANGALA TOWN COUNCIL,UG101,KALANGALA,hospital_score,0.809458
2,UG101132,MUGOYE,UG101,KALANGALA,hospital_score,0.603616
3,UG101201,BUBEKE,UG101,KALANGALA,hospital_score,0.705146
4,UG101202,BUFUMIRA,UG101,KALANGALA,hospital_score,0.726704


In [111]:
metrics_order = {'hospital_score' : 1, 
                 'poverty_score' : 2, 
                 'demographics_score' : 3,
                 'wash_score' : 4, 
                 'connect_score' : 5}

In [112]:
summary_stacked['increment']=summary_stacked['variable'].replace(metrics_order)

In [113]:
summary_stacked['increment'].unique()

array([1, 2, 3, 4, 5], dtype=int64)

In [114]:
summary.to_csv('../Data/Results/radar_chart.csv')
summary_stacked.to_csv('../Data/Results/radar_chart_stacked.csv')