### Notebook to test the Wine Contest with results from Peter Guth

Carlos H Grohmann
2022

In [1]:
# imports
import sys,os
import pandas as pd
import numpy as np
from scipy.special import ndtri
import seaborn as sns

import demix_wine_functions as dw
from IPython.display import display

import qgrid

In [4]:
# set data dir -CHANGE TO YOURS HERE!!
base = '.'
date = 'files_2022_06_01'
datadir = f'{base}/csv_files/{date}'
tables_dir = f'{base}/Friedmans_tables'

# list csv files
csv_files = os.listdir(datadir)
print('Available CSV files: \n')
for f in csv_files:
    print(f)

Available CSV files: 

Elev_diff_stats.csv
Roughness_diff_stats.csv
Slope_diff_stats.csv


In [5]:
# define here a list of the files to be opened:
# selected_csv_files = csv_files # opens ALL the files
selected_csv_files = ['Elev_diff_stats.csv','Roughness_diff_stats.csv','Slope_diff_stats.csv']


# make df with one criterion per row
df_criteria = dw.make_criteria_df(selected_csv_files,datadir)

df_criteria

Unnamed: 0,AREA,DEMIX_TILE,REF_TYPE,REF_SLOPE,METRIC,COP,SRTM,ALOS,NASA,ASTER,FABDEM
0,us_3dep,N38VW124H,DTM,ALL,GRID_FULL,93.85,93.85,93.85,93.85,93.85,
1,us_3dep,N38VW124H,DTM,ALL,ELD_MIN,-35.41,-34.70,-29.11,-31.70,-43.26,
2,us_3dep,N38VW124H,DTM,ALL,ELD_Max,72.00,66.08,64.17,72.08,84.97,
3,us_3dep,N38VW124H,DTM,ALL,ELD_Mean,17.88,14.70,13.60,17.93,14.69,
4,us_3dep,N38VW124H,DTM,ALL,ELD_AVG,10.17,9.74,8.91,9.99,11.99,
...,...,...,...,...,...,...,...,...,...,...,...
4867,brazil,S24TW047G,DTM,STEEP,SMD_AB_MD,0.43,0.53,2.81,0.50,0.81,0.51
4868,brazil,S24TW047G,DTM,STEEP,SMD_AB_MN,4.73,6.97,26.69,7.50,5.55,9.85
4869,brazil,S24TW047G,DTM,STEEP,SMD_N,21884.00,21884.00,21954.00,21884.00,21884.00,21884.00
4870,brazil,S24TW047G,DTM,STEEP,SMD_LE90,17.97,18.66,38.36,18.79,20.37,20.94


In [6]:
# make a list of dems, will use it later
mtrc_idx  = list(df_criteria.columns).index('METRIC')
dem_list  = list(df_criteria.columns)[mtrc_idx+1:]

# qshow(df_criteria)
grid = qgrid.QGridWidget(df=df_criteria)
display(grid)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [9]:
# get changed (or not) df from qgrid
df_for_ranking = grid.get_changed_df()

# calculate ranks for criteria (error metrics) in dataframes
df_ranks = dw.make_rank_df(df_for_ranking,dem_list)
# df_ranks.iloc[:13,[4,5,6,7,8,9,10,11,12,13,14,15]]
# df_ranks

#friedman stats
dw.friedman_stats(df_ranks,dem_list,tables_dir,cl=0.05)
print()

# DEMs ranked
dw.print_dems_ranked(df_ranks,dem_list)
print()

# apply Bonferroni-Dunn test
df_bd = dw.bonferroni_dunn_test(df_ranks,dem_list,alpha=0.95)
df_bd.style.applymap(lambda v: 'opacity: 0%;' if (v==0) else None)

n = 1596 (number of criteria)
k = 6 (number of DEMs)
cf = 117306.0
sum of ranks = 28027.0
sum of (ranks squared) = 135633089.0
sum of (squared ranks) = 113815.0
chi_r = 73886.117
For k=6, CL=0.05, and N=1596, the critical value to compare is chi_crit=11.038
Yay!! We can reject the null hipothesis and go to the Post-Hoc analysis!!

             rank_sum  rank
FABDEM_rank    3707.0   1.0
COP_rank       3875.0   2.0
ALOS_rank      4042.0   3.0
NASA_rank      4999.0   4.0
SRTM_rank      5191.0   5.0
ASTER_rank     6213.0   6.0



Unnamed: 0,DEM,COP,SRTM,ALOS,NASA,ASTER,FABDEM
0,COP,0,Y,N,Y,Y,N
1,SRTM,0,0,Y,N,Y,Y
2,ALOS,0,0,0,Y,Y,Y
3,NASA,0,0,0,0,Y,Y
4,ASTER,0,0,0,0,0,Y
5,FABDEM,0,0,0,0,0,0
