### Notebook to show the Wine Contest with results from MicroDEM

Notebook by Carlos H Grohmann (IEE-USP, Brazil)  
last update 2022-07-04

### 1 - Import python libraries

In [1]:
# imports
import sys,os
import pandas as pd
import numpy as np
from scipy.special import ndtri
import seaborn as sns

from ipywidgets import Button
from tkinter import Tk, filedialog
from IPython.display import clear_output, display

import demix_wine_functions as dw

import qgrid

### 2 - Set data directories and define how to select CSV files  

You can choose the CSV files in two ways:  

1 - run this cell to create a "Select CSV file(s)" button, then choose a local file (re-run the cell to re-cretae the button and choose a different file)  

2 - uncomment the last lines to just print a list of CSV files in the "root_dir"
directory. In the next cell, you can then add the wanted files to a list manually.

In [2]:
# set data directories 
root_dir = '.'
tables_dir = f'{root_dir}/Friedmans_tables'

# --------------------------------------------------------
# choose local CSV file(s) - click on the button that is created
# when you run this cell and choose the file(s)
open_csv_files = Button(description="Select CSV file(s)")
open_csv_files.files = ()
open_csv_files.on_click(dw.select_files)
open_csv_files

# --------------------------------------------------------
# alternatively, list all CSV files from a directory 
# and add them manually to the list of files to be opened
# uncomment the lines below to list the files in "root_dir"

# all_files = os.listdir(root_dir)
# csv_files = [f for f in all_files if f.endswith('.csv')]
# print('Available CSV files: \n')
# for f in csv_files:
#     print(f)

Selected file(s):'demix_merged_transposed-13june2022.csv'


### 3 - Define which files will be opened, those selected using the button/file chooser dialog or those defined manually in a list

In [4]:
# --------------------------------------------------------
# this option will open the files selected via the button/file chooser 
selected_csv_files = open_csv_files.files

# --------------------------------------------------------
# uncomment the lines below to use the files list
# files_list = ['file1.csv','file2.csv','file3.csv']
# selected_csv_files = [f'{root_dir}/{f}' for f in files_list]

### 4 - Read CSV and create dataframe

In [5]:
# make df with one criterion per row
df_criteria = dw.make_criteria_df(selected_csv_files)

# make a list of dems, will use it later
crit_idx  = list(df_criteria.columns).index('CRITERION')
dem_list  = list(df_criteria.columns)[crit_idx+1:]

### 5 - Display the dataframe using qgrid  

You can use the filter controls next to each column name to further select which data you want to be considered

In [6]:
column_defs = dict.fromkeys(dem_list, {'editable': False})
column_defs

{'FABDEM': {'editable': False},
 'COP': {'editable': False},
 'ALOS': {'editable': False},
 'NASA': {'editable': False},
 'SRTM': {'editable': False},
 'ASTER': {'editable': False}}

In [7]:
# qshow(df_criteria)

grid = qgrid.show_grid(data_frame=df_criteria, column_definitions=column_defs)
display(grid)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

### 6 - Run the statistical analysis and check results

Run the next cell every time you change the selection of the dataframe using the filter controls to get new results based on the filtered data

In [8]:
# get changed (or not) df from qgrid
df_for_ranking = grid.get_changed_df()

# calculate ranks for criteria (error metrics) in dataframes
df_ranks = dw.make_rank_df(df_for_ranking,dem_list)

#friedman stats
CL = 0.05 # confidence level
dw.friedman_stats(df_ranks,dem_list,tables_dir,cl=CL)
print()

# DEMs ranked
dw.print_dems_ranked(df_ranks,dem_list)
print()

# apply Bonferroni-Dunn test
df_bd = dw.bonferroni_dunn_test(df_ranks,dem_list,alpha=0.95)
df_bd.style.applymap(lambda v: 'opacity: 0%;' if (v==0) else None)

N = 4440 (number of criteria)
k = 6 (number of DEMs)
CF = 326340.0
sum of ranks (vector) = [12874.0, 10596.0, 10359.0, 16638.0, 17092.0, 25258.0]
sum of (ranks squared) = [165739876.0, 112275216.0, 107308881.0, 276823044.0, 292136464.0, 637966564.0]
sum of squared ranks = 401053.0
sum of ranks squared (total) = 1592250045.0
chi_r = 9590.061
For k=6, CL=0.05, and N=4440, the critical value to compare is chi_crit=11.038
Yay!! We can reject the null hipothesis and go to the Post-Hoc analysis!!

             rank_sum  rank
ALOS_rank     10359.0   1.0
COP_rank      10596.0   2.0
FABDEM_rank   12874.0   3.0
NASA_rank     16638.0   4.0
SRTM_rank     17092.0   5.0
ASTER_rank    25258.0   6.0



Unnamed: 0,DEM,FABDEM,COP,ALOS,NASA,SRTM,ASTER
0,FABDEM,0,Y,Y,Y,Y,Y
1,COP,0,0,N,Y,Y,Y
2,ALOS,0,0,0,Y,Y,Y
3,NASA,0,0,0,0,Y,Y
4,SRTM,0,0,0,0,0,Y
5,ASTER,0,0,0,0,0,0
