### Notebook to test the Wine Contest with results from Peter Guth

Carlos H Grohmann
2022

In [1]:
# imports
import sys,os
import pandas as pd
import numpy as np
from scipy.special import ndtri
import seaborn as sns

import demix_wine_functions as dw
from IPython.display import display

import qgrid

# set data dir -CHANGE TO YOURS HERE!!
base = '/mnt/d'
# base = '/home/guano'
datadir = f'{base}/Dropbox/USP/projetosPesquisa/Global_DEMs/DEMIX/SG_2/wine_jupyter/csv_files/files_2022_05_11'
tables_dir = f'{base}/Dropbox/USP/projetosPesquisa/Global_DEMs/DEMIX/SG_2/wine_jupyter/Friedmans_tables'

# list csv files
csv_files = os.listdir(datadir)
print('Available CSV files: \n')
for f in csv_files:
    print(f)

Available CSV files: 

Elev_diff_stats.csv
Roughness_diff_stats.csv
Slope_diff_stats.csv


In [2]:
# define here a list of the files to be opened:
# selected_csv_files = csv_files # opens ALL the files
selected_csv_files = ['Elev_diff_stats.csv','Roughness_diff_stats.csv','Slope_diff_stats.csv']


# make df with one criterion per row
df_criteria = dw.make_criteria_df(selected_csv_files,datadir)

# make a list of dems, will use it later
mtrc_idx  = list(df_criteria.columns).index('METRIC')
dem_list  = list(df_criteria.columns)[mtrc_idx+1:]

# qshow(df_criteria)
# grid = qgrid.QGridWidget(df=df_criteria)
# display(grid)
df_criteria

Unnamed: 0,AREA,DEMIX_TILE,REF_TYPE,METRIC,COP,SRTM,ALOS,NASA,ASTER,FABDEM
0,la_palma,N28UW018B,DSM,GRID_FULL,87.18,87.18,87.18,87.18,87.18,87.18
1,la_palma,N28UW018B,DSM,ELD_MIN,-41.90,-108.01,-49.62,-100.11,-78.62,-47.53
2,la_palma,N28UW018B,DSM,ELD_Max,120.57,190.56,139.80,137.56,125.01,120.16
3,la_palma,N28UW018B,DSM,ELD_Mean,1.36,7.34,3.48,5.15,8.76,-1.75
4,la_palma,N28UW018B,DSM,ELD_AVG,5.27,5.54,3.33,5.50,6.36,6.22
...,...,...,...,...,...,...,...,...,...,...
1477,brazil,S08QW038D,DTM,SMD_MAE,1.50,2.11,1.14,2.10,3.77,1.55
1478,brazil,S08QW038D,DTM,SMD_AB_MD,1.09,1.70,0.80,1.70,3.00,1.13
1479,brazil,S08QW038D,DTM,SMD_AB_MN,0.03,0.08,0.09,0.10,2.15,0.73
1480,brazil,S08QW038D,DTM,SMD_N,127449.00,127449.00,127449.00,127449.00,127449.00,127449.00


In [3]:
# manually done to compare with CL spreadsheet
tiles_cl = ['N28UW018B','N28VW018B','N28XW018A','N28XW018B','N43PW002B','N43PW002C','N59QE009G','N59RE009G','N59TE009G','S08PW038C','S08PW038D','S08QW038C','S08QW038D']
metrics_cl = ['ELD_RMSE','ELD_MAE','ELD_LE90','SMD_RMSE','SMD_MAE','SMD_LE90','RUFD_RMSE','RUFD_MAE','RUFD_LE90']
areas_cl = ['la_palma','pyrenees','norway','brazil']

In [5]:
# get changed (or not) df from qgrid
# df_for_ranking = grid.get_changed_df()

df_for_ranking = df_criteria.loc[df_criteria['DEMIX_TILE'].isin(tiles_cl)].loc[df_criteria['METRIC'].isin(metrics_cl)].loc[df_criteria['AREA'].isin(areas_cl)]

# calculate ranks for criteria (error metrics) in dataframes
df_ranks = dw.make_rank_df(df_for_ranking,dem_list)
# df_ranks.iloc[:13,[4,5,6,7,8,9,10,11,12,13,14,15]]
# df_ranks

#friedman stats
dw.friedman_stats(df_ranks,dem_list,tables_dir,cl=0.05)
print()

# DEMs ranked
dw.print_dems_ranked(df_ranks,dem_list)
print()

# apply Bonferroni-Dunn test
df_bd = dw.bonferroni_dunn_test(df_ranks,dem_list,alpha=0.95)
df_bd.style.applymap(lambda v: 'opacity: 0%;' if (v==0) else None)

n = 234 (number of criteria)
k = 6 (number of DEMs)
cf = 17199.0
sum of ranks = 4901.0
sum of (ranks squared) = 4743791.0
sum of (squared ranks) = 21199.0
chi_r = 899.031
For k=6, CL=0.05, and N=234, the critical value to compare is chi_crit=11.038
Yay!! We can reject the null hipothesis and go to the Post-Hoc analysis!!

             rank_sum  rank
ALOS_rank       340.0   1.0
COP_rank        484.0   2.0
FABDEM_rank     679.0   3.0
NASA_rank       967.0   4.0
SRTM_rank      1069.0   5.0
ASTER_rank     1362.0   6.0



Unnamed: 0,DEM,COP,SRTM,ALOS,NASA,ASTER,FABDEM
0,COP,0,Y,Y,Y,Y,Y
1,SRTM,0,0,Y,Y,Y,Y
2,ALOS,0,0,0,Y,Y,Y
3,NASA,0,0,0,0,Y,Y
4,ASTER,0,0,0,0,0,Y
5,FABDEM,0,0,0,0,0,0
