# Test the sensitivity of investment recommendations to weighting assumptions

Visualize access improvements per entity (base case: road) using a weighting schema that assigns equal weights to sub-categories within service categories and equal weights to different service categories.</br></br>Compare these results to results from a sensitivity analysis varying weighting schemes N times in a normal distribution around the mean value for an equal weighting schema (i.e. 0.3333 for 3 variables).

This notebook is meant to provide a template for quickly conducting sensitivity analyses in use case requiring similar combinations of inputs into master index values. For this reason the custom functions implemented herein have been asbracted into a small separate code file.

In [127]:
import os, sys
import re

import pandas as pd
import geopandas as gpd
import numpy as np
import numba

import sklearn
from scipy import stats
from datetime import date
import sensitivity_testing as st

## Setup

In [128]:
data_dir = r'../../data'
tab_dir = r'tabular'
geo_dir = r'P:\PAK\GEO'
acc_dir = r'access'

Projections

In [129]:
# change this to whatever the desired output projection is
DEST_CRS = 'EPSG:32642'

dcrs_int = int(re.findall('[0-9]+',DEST_CRS)[0])
dcrs_int

32642

Dates

In [130]:
today = date.today().strftime("%y%m%d")

In [131]:
data_date = '211215'

### Functions reference

These functions will run the actual sensitivity analysis. You can expand them below for reference purposes.

In [132]:
# st.rank_by_weight??

In [133]:
# st.create_weight_array??

In [134]:
# st.Sensitivity_weighting??

In [135]:
# st.Sensitivity_inbudget??

### Load in tabular data
The data should contain the raw values used to evaluate the cost-efficiency (or other investment criteria) of each potential entity (investment). Note these values should already have been transformed unto equivalent scales for comparative purposes, e.g. normalized.

In [136]:
# # replace with the input CSV you're using
# feat_type = 'adm3'
# df_pth = os.path.join(data_dir,tab_dir,f"final//adm3_idx_access_stats_{data_date}.csv")

In [137]:
# or if using roads
feat_type = 'roads'
df_pth = os.path.join(data_dir,tab_dir,f"final//rds_idx_access_stats_{data_date}.csv")

Feature type

In [138]:
df = pd.read_csv(df_pth)

## Sensitivity Analysis

In [139]:
eq_wts = [0.3333333, 0.3333333, 0.3333333]
educ_priority_wts = [0.5, 0.25, 0.25]

In [140]:
master_cols = ['educ_altwt_idx','health_idx','markets_idx']

In [141]:
priority_wt_ranking = st.rank_by_weight(df,master_cols,educ_priority_wts)
priority_wt_ranking

array([ 1,  2,  3,  5,  6,  8,  4,  7, 12, 11, 18, 13, 16, 23, 17, 20, 14,
       19, 22, 10, 15, 21, 28, 26, 25,  9, 24, 30, 31, 29, 27, 35, 36, 34,
       33, 42, 44, 40, 38, 43, 39, 37, 32, 41, 45, 47, 46, 50, 52, 48, 49,
       51, 53, 56, 58, 59, 55, 54, 57, 60, 62, 61, 64, 69, 66, 65, 63, 70,
       68, 71, 67, 73, 72, 77, 75, 76, 74, 78, 79])

### Rank by seasonal mean, weighted by service

Define cols to test

In [142]:
wt_samp_arr, vals_arr, ranks_arr = st.Sensitivity_weighting(df,master_cols,iterations=50000)

In [143]:
wt_samp_arr

array([[3.18612350e-01, 4.72310336e-01, 2.09077314e-01],
       [3.65493966e-01, 2.96159853e-01, 3.38346181e-01],
       [1.55593164e-01, 3.58817782e-01, 4.85589054e-01],
       ...,
       [5.82413063e-01, 2.42658119e-01, 1.74928819e-01],
       [9.56316520e-01, 5.99537270e-04, 4.30839424e-02],
       [1.15556231e-01, 4.87759614e-01, 3.96684155e-01]])

In [144]:
vals_arr[1][::10]

array([[0.26016572, 0.29615985, 0.33834618],
       [0.01368543, 0.01784615, 0.16535843],
       [0.06700192, 0.0210866 , 0.08967722],
       [0.04378749, 0.04360668, 0.03607118],
       [0.0276426 , 0.02855132, 0.01431913],
       [0.01130783, 0.00782697, 0.02641545],
       [0.00231166, 0.0103465 , 0.01681778],
       [0.00851253, 0.00098211, 0.00563059]])

In [145]:
ranks_arr

array([[ 1,  2,  3, ..., 76, 78, 79],
       [ 1,  2,  3, ..., 77, 78, 79],
       [ 1,  2,  4, ..., 77, 78, 79],
       ...,
       [ 1,  2,  4, ..., 74, 78, 79],
       [ 2,  1, 11, ..., 62, 77, 79],
       [ 1,  2,  3, ..., 77, 78, 79]])

**Optional**</br>Calculate whether each entity (road) falls within the allocated budget for each ranking

In [146]:
if feat_type == 'rds':
    
    # budget maximum
    budget = 280000000

    # Populate a new array with the cost of construction for each entity (road), in the order these entities are ranked, for each set of rankings
    cost_arr = np.array(df['Cost_USD'])

    # # run the function which calculates what percent of entities fall within budget
    within_budget_arr, within_budget_pct = st.Sensitivity_inbudget(budget,cost_arr,ranks_arr)

    # display results
    print(np.sum(df['Cost_USD']))
    print(within_budget_arr)
    print(within_budget_pct)
    
else:
    None
    

## Append results to entities

Calculate descriptive stats based on the sensitivity test results

In [147]:
# show mean of weights
print(f'{master_cols[0]}: ',np.mean(wt_samp_arr[:,:1]))
print(f'{master_cols[1]}: ',np.mean(wt_samp_arr[:,:2]))
print(f'{master_cols[2]}: ',np.mean(wt_samp_arr[:,:3]))

educ_altwt_idx:  0.33215377265413437
health_idx:  0.3320648642461542
markets_idx:  0.33333333333333337


In [148]:
# show mean of vals
print(f'{master_cols[0]}: ',np.mean(vals_arr[:,:1]))
print(f'{master_cols[1]}: ',np.mean(vals_arr[:,:2]))
print(f'{master_cols[2]}: ',np.mean(vals_arr[:,:3]))

educ_altwt_idx:  0.3014265826810966
health_idx:  0.2642795736127337
markets_idx:  0.2274337963393143


In [149]:
# calculate standard deviation of sensitivity tested values, per category
df[['educ_altwt_idx_sens_std','health_idx_sens_std','markets_idx_sens_std']] = np.std(vals_arr,axis=0)

In [150]:
df[['educ_altwt_idx_sens_std','health_idx_sens_std','markets_idx_sens_std']]

Unnamed: 0,educ_altwt_idx_sens_std,health_idx_sens_std,markets_idx_sens_std
0,0.138759,0.194821,0.195466
1,0.194936,0.133058,0.071299
2,0.045613,0.130596,0.093679
3,0.024928,0.037409,0.192967
4,0.055189,0.089053,0.073948
...,...,...,...
74,0.001019,0.000730,0.000916
75,0.000766,0.001590,0.000521
76,0.002269,0.000021,0.000176
77,0.000272,0.000194,0.000228


In [151]:
# compute basic stats for each entity's ranking
# axis=0 for operating by column
ranks_mode = stats.mode(ranks_arr,axis=0)
ranks_mean = np.mean(ranks_arr,axis=0)
ranks_std = np.std(ranks_arr,axis=0)

In [152]:
ranks_std

array([ 0.284686  ,  0.69339858,  1.43709535,  4.15960639,  1.13154684,
        4.42375333,  1.60518161,  1.613145  ,  2.63060345,  5.26728541,
       10.91180492,  2.99142448,  2.99973538, 15.84086123,  2.43819167,
        4.23773313, 13.35055066,  3.4079144 ,  5.31890177,  7.82439955,
        6.07222058,  3.65009029,  5.2254408 ,  2.48896383,  5.44289515,
       16.22298546,  3.58439707,  2.95779014,  5.38334197,  6.172697  ,
        1.70151866,  7.44299414,  3.41538144,  2.43273448,  3.35053503,
        5.50883209, 14.29037857,  9.14789919,  2.69879709,  5.22642279,
        4.79481315,  4.58099125,  7.10407436,  7.26369521,  6.47367806,
        3.61281487,  1.38830802,  4.36360457,  9.9933697 ,  6.29832701,
        2.43479905,  4.19765929,  4.5603392 ,  7.68725675,  2.24555022,
        6.4894788 ,  1.48514854,  4.30477553,  2.90715558,  2.14661963,
        2.95159469,  2.99642501,  6.53437417,  3.46510278,  3.76044497,
        6.57251252,  5.26591946,  4.31145594,  3.54232487,  2.00

Calculate descriptive stats per entity

In [153]:
# join the mean ranking to the corresponding roads
df['sens_test_rank_mean'] = ranks_mean
df['sens_test_rank_mode'] = ranks_mode[0][0]
df['sens_test_rank_std'] = ranks_std

if feat_type == 'rds':
    # calculate budget numbers
    df['sens_test_in_budget_pct'] = within_budget_pct # enable if testing budget
else:
    None

Create master file

In [154]:
# define sets of columns to filter by

id_col = 'Adm3_Code'
id_cols = ['ADM2_EN','ADM3_EN','Adm2_Code','Adm3_Code','Adm2_Focus']

ranking_cols = ['overall_eq_wt_idx','overall_eq_wt_rank','overall_educ_altwt_idx','overall_educ_altwt_rank','overall_educ_priority_wt_idx','overall_educ_priority_wt_rank',\
                'sens_test_rank_mean','sens_test_rank_mode','sens_test_rank_std',\
                'educ_altwt_idx_sens_std','health_idx_sens_std','markets_idx_sens_std',
                'educ_ewqt_pctile','educ_altwt_pctile','health_pctile','markets_pctile','admin_pctile','overall_pctile','deprivation_20pct']

if feat_type == 'rds':
    # append budget info
    ranking_cols.append('sens_test_in_budget_pct')
else:
    None

In [155]:
# merge all the relevant results + ID columns into a master DF
df_sens_test = pd.concat([df[id_cols],df[master_cols],df[ranking_cols]],axis=1)

In [156]:
# take a peek
df[['sens_test_rank_mode','overall_educ_priority_wt_rank','sens_test_rank_std']].sort_values('sens_test_rank_mode')[20:35]

Unnamed: 0,sens_test_rank_mode,overall_educ_priority_wt_rank,sens_test_rank_std
18,20,22,5.318902
21,23,21,3.65009
17,23,19,3.407914
26,24,24,3.584397
22,25,28,5.225441
23,25,26,2.488964
30,28,27,1.701519
27,29,30,2.95779
24,30,25,5.442895
28,31,31,5.383342


In [157]:
df.columns

Index(['Road_ID', 'District', 'Current_Road_Cond', 'Current_Surface',
       'Current_Road_Class', 'Upgrade_Road_Cond', 'Upgrade_Surface',
       'Upgrade_Road_Class', 'Terrain', 'Cost_PKR',
       ...
       'overall_pctile', 'overall_educ_priority_pctile', 'deprivation_20pct',
       'Adm2_Focus', 'educ_altwt_idx_sens_std', 'health_idx_sens_std',
       'markets_idx_sens_std', 'sens_test_rank_mean', 'sens_test_rank_mode',
       'sens_test_rank_std'],
      dtype='object', length=278)

Prepare long data

In [158]:
df_sbar = df[['ADM3_EN','Adm3_Code','ADM2_EN','Adm2_Code','overall_educ_priority_wt_rank','educ_altwt_idx','health_idx','markets_idx']]

df_sbar = df_sbar.melt(id_vars=['ADM3_EN','Adm3_Code','ADM2_EN','Adm2_Code','overall_educ_priority_wt_rank'],
             var_name='Access_indicators',
             value_name='Index_values').sort_values('overall_educ_priority_wt_rank')

# Rename to more human-friendly labels
df_sbar['Access_indicators'].replace({
                       'markets_idx' : 'Markets',
                       'educ_altwt_idx' : 'Education',
                       'health_idx' : 'Health'}, inplace=True)

In [159]:
df_sbar = pd.merge(df_sbar,df_sens_test.drop(['ADM2_EN','ADM3_EN','Adm2_Code','overall_educ_priority_wt_rank'],axis=1),how='left',on='Adm3_Code')

In [160]:
df_sbar

Unnamed: 0,ADM3_EN,Adm3_Code,ADM2_EN,Adm2_Code,overall_educ_priority_wt_rank,Access_indicators,Index_values,Adm2_Focus,educ_altwt_idx,health_idx,...,educ_altwt_idx_sens_std,health_idx_sens_std,markets_idx_sens_std,educ_ewqt_pctile,educ_altwt_pctile,health_pctile,markets_pctile,admin_pctile,overall_pctile,deprivation_20pct
0,Chitral,PK20601,Chitral,PK206,1,Education,0.711819,Yes,0.711819,1.000000,...,0.138759,0.194821,0.195466,98.717949,98.717949,100.000000,100.000000,100.000000,100.000000,0.25
1,Chitral,PK20601,Chitral,PK206,1,Education,0.711819,Yes,0.127877,0.192016,...,0.024928,0.037409,0.192967,79.487179,75.641026,78.205128,98.717949,98.717949,96.153846,0.50
2,Chitral,PK20601,Chitral,PK206,1,Education,0.711819,Yes,0.283116,0.457102,...,0.055189,0.089053,0.073948,89.743590,88.461538,93.589744,94.871795,93.589744,94.871795,1.00
3,Chitral,PK20601,Chitral,PK206,1,Education,0.711819,Yes,0.496074,0.281518,...,0.096703,0.054846,0.046197,94.871795,96.153846,91.025641,89.743590,74.358974,92.307692,0.75
4,Chitral,PK20601,Chitral,PK206,1,Education,0.711819,Yes,0.009153,0.220778,...,0.001784,0.043012,0.019148,21.794872,20.512821,82.051282,64.102564,23.076923,60.256410,0.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
928,Lakki Marwat,PK21301,Lakki Marwat,PK213,79,Health,0.000674,Yes,0.000373,0.000674,...,0.000073,0.000131,0.000173,1.282051,1.282051,2.564103,1.282051,21.794872,0.000000,0.00
929,Lakki Marwat,PK21301,Lakki Marwat,PK213,79,Markets,0.000886,Yes,0.151255,0.150663,...,0.029485,0.029352,0.043858,82.051282,82.051282,70.512821,85.897436,94.871795,82.051282,0.75
930,Lakki Marwat,PK21301,Lakki Marwat,PK213,79,Markets,0.000886,Yes,0.133161,0.111164,...,0.025958,0.021657,0.039199,75.641026,76.923077,64.102564,84.615385,92.307692,73.076923,0.50
931,Lakki Marwat,PK21301,Lakki Marwat,PK213,79,Markets,0.000886,Yes,0.090194,0.073824,...,0.017582,0.014382,0.009243,65.384615,64.102564,57.692308,30.769231,75.641026,47.435897,0.00


**Export**

In [161]:
df.to_csv(os.path.join(data_dir,tab_dir,f"final//{feat_type}_access_all_w_sensitivity_{today}.csv"),index=False)
df_sens_test.to_csv(os.path.join(data_dir,tab_dir,f"final//{feat_type}_access_sensitivity_tested_rankings_{today}.csv"),index=False)
df_sbar.to_csv(os.path.join(data_dir,tab_dir,f"final//{feat_type}_idx_sens_test_long_{today}.csv"),index=False)