In [1]:
import json
import pandas as pd
from pathlib import Path
from pandas import json_normalize
from tqdm import tqdm

In [2]:
assessment_data = Path(f'out/assessment_data')
assessment_data.exists()

True

In [3]:
assessment_dir_paths = list(assessment_data.glob('*/'))
assessment_dir_paths = list(filter(lambda x: '.DS_Store' not in x.name, assessment_dir_paths))
assessment_dir_paths

[PosixPath('out/assessment_data/100-trials_conf-classes-none_sample-from-val')]

In [4]:
def get_dswx_ids(assessment_dir) -> list:
    dswx_verification_paths = list(assessment_dir.glob('*/'))
    dswx_ids = [path.name for path in dswx_verification_paths]
    # Remove DSWx paths
    dswx_ids = list(filter(lambda dswx_id: '.' != dswx_id[0], dswx_ids))
    return dswx_ids


def combine_requirement_verification_for_one_assessment(assessment_dir_path):
    out_dir = Path(f'out/verification_stats_agg/{assessment_dir_path.name}')
    out_dir.mkdir(exist_ok=True, parents=True)
    
    dswx_ids = get_dswx_ids(assessment_dir_path)
    
    def read_one_requirements_json(dswx_id):
        path = assessment_dir_path / dswx_id / f'requirement_verification_{dswx_id}.json'
        data = json.load(open(path))
        return data
    
    data = list(map(read_one_requirements_json, dswx_ids))
    df = pd.DataFrame(data)
    cols = ['surface_water','partial_surface_water']
    df[cols + ['dswx_id']].groupby(cols).count()
    
    out_path = out_dir / 'verification_results.csv'
    df.to_csv(out_path, index=False)
    
    return df, out_path

In [5]:
dfs_req, out_paths = zip(*list(map(combine_requirement_verification_for_one_assessment, tqdm(assessment_dir_paths))))

100%|██████| 1/1 [00:00<00:00, 35.23it/s]


In [6]:
dfs_req[0].head()

Unnamed: 0,dswx_id,surface_water,partial_surface_water,dswx-WTR,dswx-BWTR,dswx-CONF,dswx-DIAG,dswx-WTR-1,dswx-WTR-2,dswx-LAND,...,hls_url_B04,hls_url_B05,hls_url_B06,hls_url_B07,hls_url_B08,hls_url_B09,hls_url_B10,hls_url_B11,hls_url_B12,hls_url_Fmask
0,OPERA_L3_DSWx-HLS_T54JTN_20210911T004659Z_2023...,True,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
1,OPERA_L3_DSWx-HLS_T54KUG_20210924T005709Z_2023...,False,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
2,OPERA_L3_DSWx-HLS_T11ULP_20211019T190421Z_2023...,False,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
3,OPERA_L3_DSWx-HLS_T17TNM_20210916T161829Z_2023...,True,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
4,OPERA_L3_DSWx-HLS_T32PQA_20210911T094031Z_2023...,True,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...


In [7]:
out_paths[0]

PosixPath('out/verification_stats_agg/100-trials_conf-classes-none_sample-from-val/verification_results.csv')

# Additional Metrics

In [8]:
def combine_metrics_for_one_assessment(assessment_dir_path):
    out_dir = Path(f'out/verification_stats_agg/{assessment_dir_path.name}')
    
    def read_one_assessment(dswx_id):
        path = assessment_dir_path / dswx_id / f'stats_{dswx_id}.json'
        data = pd.DataFrame(pd.json_normalize(json.load(open(path))))
        return data
    
    dswx_ids = get_dswx_ids(assessment_dir_path)
    
    data_assessments = list(map(read_one_assessment, dswx_ids))
    df_metrics = pd.concat(data_assessments, axis=0).reset_index(drop=True)
    df_metrics.to_csv(out_dir / 'metrics.csv', index=False)
    out_path = out_dir / 'metrics.csv'
    df_metrics.to_csv(out_path, index=False)
    return df_metrics, out_path

In [9]:
dfs_metrics, out_paths = zip(*list(map(combine_metrics_for_one_assessment, tqdm(assessment_dir_paths))))

100%|██████| 1/1 [00:00<00:00, 11.03it/s]


In [10]:
dfs_metrics[0].head()

Unnamed: 0,total_accuracy.mean,total_accuracy.std,binary_water_acc.mean,binary_water_acc.std,acc_per_class.Not_Water.mean,acc_per_class.Not_Water.std,acc_per_class.Open_Surface_Water.mean,acc_per_class.Open_Surface_Water.std,acc_per_class.Partial_Surface_Water.mean,acc_per_class.Partial_Surface_Water.std,...,confusion_matrix.Open_Surface_Water_OPERA_DSWx.Open_Surface_Water_OPERA_Validation.mean,confusion_matrix.Open_Surface_Water_OPERA_DSWx.Open_Surface_Water_OPERA_Validation.std,confusion_matrix.Open_Surface_Water_OPERA_DSWx.Partial_Surface_Water_OPERA_Validation.mean,confusion_matrix.Open_Surface_Water_OPERA_DSWx.Partial_Surface_Water_OPERA_Validation.std,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Not_Water_OPERA_Validation.mean,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Not_Water_OPERA_Validation.std,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Open_Surface_Water_OPERA_Validation.mean,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Open_Surface_Water_OPERA_Validation.std,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Partial_Surface_Water_OPERA_Validation.mean,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Partial_Surface_Water_OPERA_Validation.std
0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,...,,,,,,,,,,
1,0.769888,0.002634,0.976629,0.002634,0.976629,0.002634,0.792921,0.000806,0.770225,0.002461,...,167.0,0.0,92.0,0.0,1.25,1.095215,0.0,0.0,10.0,0.0
2,0.71986,0.008536,0.982794,0.005041,0.982794,0.005041,0.736886,0.008887,0.72004,0.008594,...,165.81,1.011999,130.54,4.253151,1.77,1.316983,1.19,1.011999,29.7,3.909326
3,0.693713,0.008475,0.858663,0.005826,0.858663,0.005826,0.826547,0.009518,0.702216,0.008008,...,140.28,3.695698,60.14,2.16034,2.19,1.244747,22.5,3.385948,42.5,1.760969
4,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,...,,,,,,,,,,


In [11]:
out_paths[0]

PosixPath('out/verification_stats_agg/100-trials_conf-classes-none_sample-from-val/metrics.csv')

# For presentations

Aggregate all validation statistics for a particular set of validation runs

In [12]:
index = [k for k, out_path in enumerate(out_paths) if '100-trials_conf-classes-none_sample-from-val' in str(out_path)][0]
index

0

In [13]:
df_final = dfs_metrics[index]
out_path_final = out_paths[index]

In [14]:
COLUMN_RENAME = {'total_accuracy.mean': 'Total Accuracy (All Classes)',
                 'binary_water_acc.mean': 'Binary Accuarcy (OSW + PSW)',
                 'precision.Open_Surface_Water.mean': 'Precision (OSW)',
                 'acc_per_class.Open_Surface_Water.mean': 'Binary Accuracy (OSW)',
                 'acc_per_class.Partial_Surface_Water.mean': 'Binary Accuracy (PSW)',
                 'precision.Partial_Surface_Water.mean': 'Precision (PSW)',
                 'recall.Open_Surface_Water.mean': 'Recall (OSW)',
                 'recall.Partial_Surface_Water.mean': 'Recall (PSW)',
                 #'supp_per_class.Open_Surface_Water.mean': 'Support (Val) (OSW)',
                 #'supp_per_class.Partial_Surface_Water.mean': 'Support (Val) (PSW)',
                 'f1_per_class.Open_Surface_Water.mean': 'F1 (OSW)',
                 'f1_per_class.Partial_Surface_Water.mean': 'F1 (PSW)'}
COLUMNS = list(COLUMN_RENAME.keys())

In [15]:
df_temp = df_final[COLUMNS].agg(['mean', 'median', 'std'])
df_temp.rename(columns=COLUMN_RENAME, inplace=True)

cols_not_supp = [col for col in df_temp.columns if 'Support' not in col]
df_temp[cols_not_supp] = df_temp[cols_not_supp] * 100

df_f = df_temp.T
df_f = df_f.round(2).astype(str)

df_f = df_f.reset_index(drop=False)
df_f = df_f.rename(columns={'index': 'Metric'})
def class_labeler(metric):
    if '(OSW + PSW)' in metric:
        return 'OSW + PSW'
    if 'OSW' in metric:
        return 'OSW'
    if 'PSW' in metric:
        return 'PSW'
    else:
        return 'All'
df_f['Class'] = df_f.Metric.map(class_labeler)

lookup_order = {'All': 0, 'OSW + PSW': 1, 'OSW': 2, 'PSW': 3}
df_f['Class_sort'] = df_f.Class.map(lambda c: lookup_order[c])

df_f['Metric'] = df_f.Metric.map(lambda m: m.split('(')[0])
df_f['Metric'] = df_f.Metric.map(lambda m: m + ' ($\%$)' if 'Support' not in m else m)
df_f.sort_values(by=['Class_sort', 'Metric'], inplace=True)
df_f = df_f.set_index(['Class', 'Metric'])
df_f.drop(columns=['Class_sort'], inplace=True)
df_f.rename(columns={'mean': 'Mean', 
                     'std': 'St. Dev.', 
                     'median': 'Median'}, inplace=True)
df_f

Unnamed: 0_level_0,Unnamed: 1_level_0,Mean,Median,St. Dev.
Class,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
All,Total Accuracy ($\%$),84.6,84.72,14.24
OSW + PSW,Binary Accuarcy ($\%$),94.11,96.43,7.65
OSW,Binary Accuracy ($\%$),90.12,96.48,11.26
OSW,F1 ($\%$),85.03,90.04,17.57
OSW,Precision ($\%$),81.25,93.79,21.49
OSW,Recall ($\%$),95.08,100.0,14.81
PSW,Binary Accuracy ($\%$),84.97,84.73,13.74
PSW,F1 ($\%$),51.24,44.46,40.34
PSW,Precision ($\%$),76.18,90.6,32.08
PSW,Recall ($\%$),57.36,56.34,42.47


In [16]:
presentation_dir = Path('presentation_images') / out_path_final.parent.name 
presentation_dir.mkdir(exist_ok=True, parents=True)
presentation_dir

PosixPath('presentation_images/100-trials_conf-classes-none_sample-from-val')

In [17]:
latex = df_f.style.to_latex(multirow_align='t', hrules=True)
with open(presentation_dir / 'total_accuracy_for_all_validation.tex', 'w') as f:
    f.write(latex)

In [18]:
df_req = dfs_req[index]

In [19]:
n_osw_passes = df_req.surface_water.sum()
n_pws_passes = df_req.partial_surface_water.sum()
n_both_pass = (df_req.surface_water & df_req.partial_surface_water).sum()
n_pws_passes, n_osw_passes, n_both_pass

(41, 40, 37)

In [20]:
n_osw_fails = (~df_req.surface_water).sum()
n_pws_fails = (~df_req.partial_surface_water).sum()
n_both_fail = (~df_req.surface_water | ~df_req.partial_surface_water).sum()
n_osw_fails, n_pws_fails, n_both_pass

(12, 11, 37)

In [21]:
df_passes = pd.DataFrame([{'Class': 'Open Surface Water (OSW)',
                          'Pass': n_osw_passes,
                          'Not Pass': n_osw_fails},
                         {'Class': 'Partial Surface Water (PSW)',
                          'Pass': n_pws_passes,
                          'Not Pass': n_pws_fails},
                         {'Class': 'Both (OSW + PSW)',
                          'Pass': n_both_pass,
                          'Not Pass': n_both_fail}])
df_passes = df_passes.set_index('Class')
df_passes

Unnamed: 0_level_0,Pass,Not Pass
Class,Unnamed: 1_level_1,Unnamed: 2_level_1
Open Surface Water (OSW),40,12
Partial Surface Water (PSW),41,11
Both (OSW + PSW),37,15


In [22]:
latex = df_passes.style.to_latex(multirow_align='t', hrules=True)
with open(presentation_dir / 'total_passes.tex', 'w') as f:
    f.write(latex)