In [2]:
import pandas as pd
import numpy as np
import scipy.stats as stats

### Load ground truth  
Load the ground truth data from `TableS5_S6_S7_S8_ref/NK_gt_stats.csv`,  
which is the 'grid_year' sheet of `TableS4_ref/TableS4_evaluation.xlsx` with a district information.  

In [3]:
ground_truth = pd.read_csv('TableS5_S6_S7_S8_ref/NK_gt_stats.csv')
ground_truth

Unnamed: 0,district,y_x,year2016,year2017,year2018,year2019,4years_mean,FA2014,lnFA2014
0,0,6259_13990,0.105597,0.081331,0.109646,0.075321,0.092974,28240.915870,10.24852712
1,0,6259_13991,0.093337,0.102167,0.163974,0.098552,0.114507,32860.812480,10.40003612
2,0,6259_13992,0.101376,0.081884,0.119916,0.086924,0.097525,43246.583070,10.67467351
3,0,6260_13989,0.083021,0.031730,0.068891,0.018144,0.050447,3103.045177,8.040139223
4,0,6260_13990,0.088743,0.101351,0.124538,0.081862,0.099124,16225.334260,9.694329143
...,...,...,...,...,...,...,...,...,...
32573,209,6267_13903,0.103661,0.126874,0.127428,0.131098,0.122265,173559.077800,12.06427333
32574,209,6267_13904,0.112280,0.107932,0.104552,0.125163,0.112482,279545.043700,12.54091872
32575,209,6268_13901,0.100399,0.096662,0.125510,0.108526,0.107774,85932.149600,11.36131331
32576,209,6268_13902,0.076248,0.066684,0.110193,0.084869,0.084499,77966.322260,11.26403225


In [4]:
ground_truth['lnFA_pos'] = ground_truth['FA2014'].apply(lambda x: np.log(float(x)+1))
ground_truth = ground_truth.loc[ground_truth['lnFA_pos']>0]
gt_summary = ground_truth[['y_x','lnFA_pos']]
gt_summary

Unnamed: 0,y_x,lnFA_pos
0,6259_13990,10.248563
1,6259_13991,10.400067
2,6259_13992,10.674697
3,6260_13989,8.040461
4,6260_13990,9.694391
...,...,...
32573,6267_13903,12.064279
32574,6267_13904,12.540922
32575,6268_13901,11.361325
32576,6268_13902,11.264045


In [5]:
def spearman_corr(gt_df, pred_df):
    score = 'score'
    pred_group_df = pred_df[score].groupby(pred_df['y_x']).mean()
    pred_join_df = pd.merge(left=gt_df,right=pred_group_df,how='left',on=['y_x'],sort=False).dropna()
    spearman = 'spearman'
    print(spearman,abs(pred_join_df.corr(method=spearman)['lnFA_pos'][score]),end='\n')
    

## *siScore* performance  

Our model returns the scoring model at the third stage. The all results over NK are saved in `Stage3` directory.  
Materials path : `../../Stage3/nk_NK_scores.csv`, or simply `TableS5_S6_S7_S8_ref/nk_NK_scores.csv`

In [6]:
NK_df = pd.read_csv('../../Stage3/nk_NK_scores.csv')
spearman_corr(gt_summary,NK_df)

spearman 0.7739659532727418


## TableS5  
Training CNN from other countries satellite imagery (Nepal, Bangladesh) to predict FA.  
Materials path : `TableS5_S6_S7_S8_ref/Nepal_to_NK.csv`, `TableS5_S6_S7_S8_ref/Bangladesh_to_NK.csv`

In [7]:
Nepal_df = pd.read_csv('TableS5_S6_S7_S8_ref/Nepal_to_NK.csv')
Bangladesh_df = pd.read_csv('TableS5_S6_S7_S8_ref/Bangladesh_to_NK.csv')
SouthKorea_df = pd.read_csv('TableS5_S6_S7_S8_ref/SK_to_NK.csv')

### Nepal to NK  

In [8]:
spearman_corr(gt_summary,Nepal_df)

spearman 0.48052687284651696


### Bangladesh to NK  

In [9]:
spearman_corr(gt_summary,Bangladesh_df)

spearman 0.5029620760610586


### South Korea to NK  

In [10]:
spearman_corr(gt_summary,SouthKorea_df)

spearman 0.32895834659334444


As a result, we got a TableS5 as follows:  
&nbsp;  
  
<div>  <img src="../Supplementary/Table/TableS5.png" width=500 /> </div>  


## TableS6

In [28]:
t_BN_v_S_df = pd.read_csv('TableS5_S6_S7_S8_ref/train_Bangladesh_Nepal-val_SK_to_NK.csv')
t_BS_v_N_df = pd.read_csv('TableS5_S6_S7_S8_ref/train_Bangladesh_SK-val_Nepal_to_NK.csv')
t_NS_v_B_df = pd.read_csv('TableS5_S6_S7_S8_ref/train_Nepal_SK-val_Bangladesh_to_NK.csv')
t_BNS_v_BNS_df = pd.read_csv('TableS5_S6_S7_S8_ref/train_val_split_8020_Bangladesh_Nepal_SK_to_NK.csv')

In [27]:
spearman_corr(gt_summary,t_BN_v_S_df)

spearman 0.3620079770914801


In [29]:
spearman_corr(gt_summary,t_BS_v_N_df)

spearman 0.23273674628426652


In [30]:
spearman_corr(gt_summary,t_NS_v_B_df)

spearman 0.1010350643326365


In [31]:
spearman_corr(gt_summary,t_BNS_v_BNS_df)

spearman 0.09243654409607122


As a result, we got a TableS6 as follows:  
&nbsp;  
  
<div>  <img src="../Supplementary/Table/TableS6.png" width=800 /> </div>  

## TableS7


Comparing our model to the model utilizes data-driven POG in Stage 2 / the regression model.  
Materials path : `TableS5_S6_S7_S8_ref/NK_NL_guided.csv`, `TableS5_S6_S7_S8_ref/NK_LC_guided.csv`

### Regression  

For regression models, we directly got a spearman correlation from each material by using Excel.  
We got a spearman correlation of _0.4291 / 0.7050_ by utilzing _nightlight / land cover_ based regression model, respectively.  

### Data-guided POG  

In [32]:
Nightlight_guided_df = pd.read_csv('TableS5_S6_S7_S8_ref/NK_NL_guided.csv')

In [33]:
spearman_corr(gt_summary,Nightlight_guided_df)

spearman 0.5681678431045201


In [34]:
Landcover_guided_df = pd.read_csv('TableS5_S6_S7_S8_ref/NK_LC_guided.csv')

In [35]:
spearman_corr(gt_summary,Landcover_guided_df)

spearman 0.7338402314263139


As a result, we got a TableS7 as follows:  

&nbsp;  
  
<div>  <img src="../Supplementary/Table/TableS7.png" width=700 /> </div> 

## TableS8  


Report performance of our model's ablation studies (on other loss function).  
Test for the model hiring pairwise loss/ triplet loss (margin = 0.5).   

In [36]:
Pairwise_loss_df = pd.read_csv('TableS5_S6_S7_S8_ref/NK_score_pairwise_05.csv')

In [37]:
spearman_corr(gt_summary,Pairwise_loss_df)

spearman 0.652654068054562


In [38]:
Triplet_loss_df = pd.read_csv('TableS5_S6_S7_S8_ref/NK_score_triplet_05.csv')

In [39]:
spearman_corr(gt_summary,Triplet_loss_df)

spearman 0.31920391866887526


As a result, we got a TableS8 as follows:  

&nbsp;  
  
<div>  <img src="../Supplementary/Table/TableS8.png" width=700 /> </div> 