In [7]:
import json
import pandas as pd
from pathlib import Path
from pandas import json_normalize
from tqdm import tqdm

In [8]:
assessment_data = Path(f'out/assessment_data')
assessment_data.exists()

True

In [9]:
assessment_dir_paths = list(assessment_data.glob('*/'))
assessment_dir_paths = list(filter(lambda x: '.DS_Store' not in x.name, assessment_dir_paths))
assessment_dir_paths

[PosixPath('out/assessment_data/100-trials_conf-geq-0_sample-from-val')]

In [10]:
def get_dswx_ids(assessment_dir) -> list:
    dswx_verification_paths = list(assessment_dir.glob('*/'))
    dswx_ids = [path.name for path in dswx_verification_paths]
    # Remove DSWx paths
    dswx_ids = list(filter(lambda dswx_id: '.' != dswx_id[0], dswx_ids))
    return dswx_ids


def combine_requirement_verification_for_one_assessment(assessment_dir_path):
    out_dir = Path(f'out/verification_stats_agg/{assessment_dir_path.name}')
    out_dir.mkdir(exist_ok=True, parents=True)
    
    dswx_ids = get_dswx_ids(assessment_dir_path)
    
    def read_one_requirements_json(dswx_id):
        path = assessment_dir_path / dswx_id / f'requirement_verification_{dswx_id}.json'
        data = json.load(open(path))
        return data
    
    data = list(map(read_one_requirements_json, dswx_ids))
    df = pd.DataFrame(data)
    cols = ['surface_water','partial_surface_water']
    df[cols + ['dswx_id']].groupby(cols).count()
    
    out_path = out_dir / 'verification_results.csv'
    df.to_csv(out_path, index=False)
    
    return df, out_path

In [11]:
dfs_req, out_paths = zip(*list(map(combine_requirement_verification_for_one_assessment, tqdm(assessment_dir_paths))))

100%|██████████████| 1/1 [00:00<00:00, 80.23it/s]


In [12]:
dfs_req[0].head()

Unnamed: 0,dswx_id,surface_water,partial_surface_water,dswx-WTR,dswx-BWTR,dswx-CONF,dswx-DIAG,dswx-WTR-1,dswx-WTR-2,dswx-LAND,...,hls_url_B04,hls_url_B05,hls_url_B06,hls_url_B07,hls_url_B08,hls_url_B09,hls_url_B10,hls_url_B11,hls_url_B12,hls_url_Fmask
0,OPERA_L3_DSWx-HLS_T54JTN_20210911T004659Z_2023...,True,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
1,OPERA_L3_DSWx-HLS_T52VFP_20210910T023549Z_2023...,True,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
2,OPERA_L3_DSWx-HLS_T47ULQ_20210911T043701Z_2023...,False,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
3,OPERA_L3_DSWx-HLS_T18UXG_20210902T154154Z_2023...,True,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,,https://data.lpdaac.earthdatacloud.nasa.gov/lp...
4,OPERA_L3_DSWx-HLS_T30TYN_20210905T105621Z_2023...,True,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...


In [13]:
out_paths[0]

PosixPath('out/verification_stats_agg/100-trials_conf-geq-0_sample-from-val/verification_results.csv')

# Additional Metrics

In [14]:
def combine_metrics_for_one_assessment(assessment_dir_path):
    out_dir = Path(f'out/verification_stats_agg/{assessment_dir_path.name}')
    
    def read_one_assessment(dswx_id):
        path = assessment_dir_path / dswx_id / f'stats_{dswx_id}.json'
        data = pd.DataFrame(pd.json_normalize(json.load(open(path))))
        return data
    
    dswx_ids = get_dswx_ids(assessment_dir_path)
    
    data_assessments = list(map(read_one_assessment, dswx_ids))
    df_metrics = pd.concat(data_assessments, axis=0).reset_index(drop=True)
    df_metrics.to_csv(out_dir / 'metrics.csv', index=False)
    out_path = out_dir / 'metrics.csv'
    df_metrics.to_csv(out_path, index=False)
    return df_metrics, out_path

In [15]:
dfs_metrics, out_paths = zip(*list(map(combine_metrics_for_one_assessment, tqdm(assessment_dir_paths))))

100%|██████████████| 1/1 [00:00<00:00, 33.44it/s]


In [16]:
dfs_metrics[0].head()

Unnamed: 0,total_accuracy.mean,total_accuracy.std,binary_water_acc.mean,binary_water_acc.std,acc_per_class.Not_Water.mean,acc_per_class.Not_Water.std,acc_per_class.Open_Surface_Water.mean,acc_per_class.Open_Surface_Water.std,acc_per_class.Partial_Surface_Water.mean,acc_per_class.Partial_Surface_Water.std,...,confusion_matrix.Open_Surface_Water_OPERA_DSWx.Open_Surface_Water_OPERA_Validation.mean,confusion_matrix.Open_Surface_Water_OPERA_DSWx.Open_Surface_Water_OPERA_Validation.std,confusion_matrix.Open_Surface_Water_OPERA_DSWx.Partial_Surface_Water_OPERA_Validation.mean,confusion_matrix.Open_Surface_Water_OPERA_DSWx.Partial_Surface_Water_OPERA_Validation.std,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Not_Water_OPERA_Validation.mean,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Not_Water_OPERA_Validation.std,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Open_Surface_Water_OPERA_Validation.mean,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Open_Surface_Water_OPERA_Validation.std,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Partial_Surface_Water_OPERA_Validation.mean,confusion_matrix.Partial_Surface_Water_OPERA_DSWx.Partial_Surface_Water_OPERA_Validation.std
0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,...,,,,,,,,,,
1,0.788244,0.012123,0.894371,0.009694,0.894371,0.009694,0.880679,0.010629,0.801437,0.011261,...,125.76,4.62562,18.5,2.496462,1.91,1.39331,34.67,4.410857,104.1,4.066369
2,0.721588,0.004837,0.985773,0.004775,0.985773,0.004775,0.733794,0.00207,0.723608,0.004619,...,166.2,0.876172,128.0,0.0,4.92,2.205045,0.13,0.337998,22.0,0.0
3,0.802475,0.016537,0.882495,0.012885,0.882495,0.012885,0.918623,0.010991,0.803832,0.016661,...,163.25,1.849788,36.41,4.946369,57.51,6.422019,3.68,1.836114,129.91,4.997161
4,0.791876,0.014094,0.842156,0.011631,0.842156,0.011631,0.947665,0.006906,0.793932,0.014026,...,140.78,3.459959,0.0,0.0,0.37,0.661419,25.19,3.460294,89.32,5.672688


In [17]:
out_paths[0]

PosixPath('out/verification_stats_agg/100-trials_conf-geq-0_sample-from-val/metrics.csv')

# For presentations

Aggregate all validation statistics for a particular set of validation runs

In [19]:
index = [k for k, out_path in enumerate(out_paths) if '100-trials_conf-geq-0_sample-from-val' in str(out_path)][0]
index

0

In [20]:
df_final = dfs_metrics[index]
out_path_final = out_paths[index]

In [21]:
COLUMN_RENAME = {'total_accuracy.mean': 'Total Accuracy (All Classes)',
                 'binary_water_acc.mean': 'Binary Accuarcy (OSW + PSW)',
                 'precision.Open_Surface_Water.mean': 'Precision (OSW)',
                 'acc_per_class.Open_Surface_Water.mean': 'Binary Accuracy (OSW)',
                 'acc_per_class.Partial_Surface_Water.mean': 'Binary Accuracy (PSW)',
                 'precision.Partial_Surface_Water.mean': 'Precision (PSW)',
                 'recall.Open_Surface_Water.mean': 'Recall (OSW)',
                 'recall.Partial_Surface_Water.mean': 'Recall (PSW)',
                 #'supp_per_class.Open_Surface_Water.mean': 'Support (Val) (OSW)',
                 #'supp_per_class.Partial_Surface_Water.mean': 'Support (Val) (PSW)',
                 'f1_per_class.Open_Surface_Water.mean': 'F1 (OSW)',
                 'f1_per_class.Partial_Surface_Water.mean': 'F1 (PSW)'}
COLUMNS = list(COLUMN_RENAME.keys())

In [22]:
df_temp = df_final[COLUMNS].agg(['mean', 'median', 'std'])
df_temp.rename(columns=COLUMN_RENAME, inplace=True)

cols_not_supp = [col for col in df_temp.columns if 'Support' not in col]
df_temp[cols_not_supp] = df_temp[cols_not_supp] * 100

df_f = df_temp.T
df_f = df_f.round(2).astype(str)

df_f = df_f.reset_index(drop=False)
df_f = df_f.rename(columns={'index': 'Metric'})
def class_labeler(metric):
    if '(OSW + PSW)' in metric:
        return 'OSW + PSW'
    if 'OSW' in metric:
        return 'OSW'
    if 'PSW' in metric:
        return 'PSW'
    else:
        return 'All'
df_f['Class'] = df_f.Metric.map(class_labeler)

lookup_order = {'All': 0, 'OSW + PSW': 1, 'OSW': 2, 'PSW': 3}
df_f['Class_sort'] = df_f.Class.map(lambda c: lookup_order[c])

df_f['Metric'] = df_f.Metric.map(lambda m: m.split('(')[0])
df_f['Metric'] = df_f.Metric.map(lambda m: m + ' ($\%$)' if 'Support' not in m else m)
df_f.sort_values(by=['Class_sort', 'Metric'], inplace=True)
df_f = df_f.set_index(['Class', 'Metric'])
df_f.drop(columns=['Class_sort'], inplace=True)
df_f.rename(columns={'mean': 'Mean', 
                     'std': 'St. Dev.', 
                     'median': 'Median'}, inplace=True)
df_f

Unnamed: 0_level_0,Unnamed: 1_level_0,Mean,Median,St. Dev.
Class,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
All,Total Accuracy ($\%$),85.77,80.76,15.31
OSW + PSW,Binary Accuarcy ($\%$),93.08,98.58,8.34
OSW,Binary Accuracy ($\%$),92.17,98.22,10.17
OSW,F1 ($\%$),86.09,97.28,21.72
OSW,Precision ($\%$),92.47,100.0,13.2
OSW,Recall ($\%$),87.07,99.91,25.85
PSW,Binary Accuracy ($\%$),86.29,82.76,14.67
PSW,F1 ($\%$),75.34,72.56,25.17
PSW,Precision ($\%$),84.24,88.0,19.18
PSW,Recall ($\%$),73.65,77.79,29.15


In [23]:
presentation_dir = Path('presentation_images') / out_path_final.parent.name 
presentation_dir.mkdir(exist_ok=True, parents=True)
presentation_dir

PosixPath('presentation_images/100-trials_conf-geq-0_sample-from-val')

In [24]:
latex = df_f.style.to_latex(multirow_align='t', hrules=True)
with open(presentation_dir / 'total_accuracy_for_all_validation.tex', 'w') as f:
    f.write(latex)

In [25]:
df_req = dfs_req[index]

In [26]:
n_osw_passes = df_req.surface_water.sum()
n_pws_passes = df_req.partial_surface_water.sum()
n_both_pass = (df_req.surface_water & df_req.partial_surface_water).sum()
n_pws_passes, n_osw_passes, n_both_pass

(15, 14, 14)

In [27]:
n_osw_fails = (~df_req.surface_water).sum()
n_pws_fails = (~df_req.partial_surface_water).sum()
n_both_fail = (~df_req.surface_water | ~df_req.partial_surface_water).sum()
n_osw_fails, n_pws_fails, n_both_pass

(3, 2, 14)

In [28]:
df_passes = pd.DataFrame([{'Class': 'Open Surface Water (OSW)',
                          'Pass': n_osw_passes,
                          'Not Pass': n_osw_fails},
                         {'Class': 'Partial Surface Water (PSW)',
                          'Pass': n_pws_passes,
                          'Not Pass': n_pws_fails},
                         {'Class': 'Both (OSW + PSW)',
                          'Pass': n_both_pass,
                          'Not Pass': n_both_fail}])
df_passes = df_passes.set_index('Class')
df_passes

Unnamed: 0_level_0,Pass,Not Pass
Class,Unnamed: 1_level_1,Unnamed: 2_level_1
Open Surface Water (OSW),14,3
Partial Surface Water (PSW),15,2
Both (OSW + PSW),14,3


In [30]:
latex = df_passes.style.to_latex(multirow_align='t', hrules=True)
with open(presentation_dir / 'total_passes.tex', 'w') as f:
    f.write(latex)