In [1]:
import json
import pandas as pd
from pathlib import Path
from pandas import json_normalize

In [2]:
SUFFIX = '100-trials'

In [3]:
out_dir = Path(f'verification_stats_agg__{SUFFIX}')
out_dir.mkdir(exist_ok=True)
out_dir.exists()

True

In [4]:
verification_dir = Path(f'verification_assessment_data__{SUFFIX}/')
dswx_verification_paths = list(verification_dir.glob('*/'))
dswx_ids = [path.name for path in dswx_verification_paths]
# Remove DSWx paths
dswx_ids = list(filter(lambda dswx_id: '.' != dswx_id[0], dswx_ids))
dswx_ids[:4]

['OPERA_L3_DSWx_HLS_T18VVN_20211002T164029Z_20221026T054210Z_S2B_30_v0.0']

In [5]:
def read_one_requirements_json(dswx_id):
    path = verification_dir / dswx_id / f'requirement_verification_{dswx_id}.json'
    data = json.load(open(path))
    return data

In [6]:
data = list(map(read_one_requirements_json, dswx_ids))

In [7]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,dswx_id,surface_water,partial_surface_water,dswx-WTR,dswx-BWTR,dswx-CONF,dswx-DIAG,dswx-WTR-1,dswx-WTR-2,dswx-LAND,...,hls_url_B04,hls_url_B05,hls_url_B06,hls_url_B07,hls_url_B08,hls_url_B09,hls_url_B10,hls_url_B11,hls_url_B12,hls_url_Fmask
0,OPERA_L3_DSWx_HLS_T18VVN_20211002T164029Z_2022...,True,True,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...


In [8]:
cols = ['surface_water','partial_surface_water']
df[cols + ['dswx_id']].groupby(cols).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,dswx_id
surface_water,partial_surface_water,Unnamed: 2_level_1
True,True,1


In [9]:
df.to_csv(out_dir / 'verification_results.csv', index=False)

# Additional Metrics

In [10]:
def read_one_assessment(dswx_id):
    path = verification_dir / dswx_id / f'stats_{dswx_id}.json'
    data = pd.DataFrame(pd.json_normalize(json.load(open(path))))
    return data

In [11]:
data_assessments = list(map(read_one_assessment, dswx_ids))
data_assessments[0]

Unnamed: 0,total_accuracy.mean,total_accuracy.std,acc_per_class.Not_Water.mean,acc_per_class.Not_Water.std,acc_per_class.Open_Surface_Water.mean,acc_per_class.Open_Surface_Water.std,acc_per_class.Partial_Surface_Water.mean,acc_per_class.Partial_Surface_Water.std,confusion_matrix.Not_Water_OPERA_DSWx.Not_Water_OPERA_Validation.mean,confusion_matrix.Not_Water_OPERA_DSWx.Not_Water_OPERA_Validation.std,...,f1_per_class.Partial_Surface_Water.mean,f1_per_class.Partial_Surface_Water.std,supp_per_class.Not_Water.mean,supp_per_class.Not_Water.std,supp_per_class.Open_Surface_Water.mean,supp_per_class.Open_Surface_Water.std,supp_per_class.Partial_Surface_Water.mean,supp_per_class.Partial_Surface_Water.std,dswx_id,planet_id
0,0.776866,0.017039,0.931657,0.010166,0.842415,0.015794,0.779661,0.016714,160.54,2.626285,...,0.567975,0.040138,188.32,5.519259,223.67,7.456344,89.01,7.527049,OPERA_L3_DSWx_HLS_T18VVN_20211002T164029Z_2022...,20211002_155415_1009


In [12]:
df_assessments = pd.concat(data_assessments, axis=0)
df_assessments.head()

Unnamed: 0,total_accuracy.mean,total_accuracy.std,acc_per_class.Not_Water.mean,acc_per_class.Not_Water.std,acc_per_class.Open_Surface_Water.mean,acc_per_class.Open_Surface_Water.std,acc_per_class.Partial_Surface_Water.mean,acc_per_class.Partial_Surface_Water.std,confusion_matrix.Not_Water_OPERA_DSWx.Not_Water_OPERA_Validation.mean,confusion_matrix.Not_Water_OPERA_DSWx.Not_Water_OPERA_Validation.std,...,f1_per_class.Partial_Surface_Water.mean,f1_per_class.Partial_Surface_Water.std,supp_per_class.Not_Water.mean,supp_per_class.Not_Water.std,supp_per_class.Open_Surface_Water.mean,supp_per_class.Open_Surface_Water.std,supp_per_class.Partial_Surface_Water.mean,supp_per_class.Partial_Surface_Water.std,dswx_id,planet_id
0,0.776866,0.017039,0.931657,0.010166,0.842415,0.015794,0.779661,0.016714,160.54,2.626285,...,0.567975,0.040138,188.32,5.519259,223.67,7.456344,89.01,7.527049,OPERA_L3_DSWx_HLS_T18VVN_20211002T164029Z_2022...,20211002_155415_1009


In [14]:
df_assessments.to_csv(out_dir / 'metrics.csv', index=False)
out_dir / 'metrics.csv'

PosixPath('verification_stats_agg__100-trials/metrics.csv')