In [1]:
from pathlib import Path
import pandas as pd
import sklearn.metrics
import numpy as np
from tqdm import tqdm
import geopandas as gpd

# Out Directory

In [2]:
across_sites_dir = Path('out/_all_sites_aggregated')
across_sites_dir.mkdir(exist_ok=True, parents=True)

# Sites

In [3]:
df_site_burst = gpd.read_file('data/val_sites_subset.geojson')
df_site_burst.head()

Unnamed: 0,site_id,change_label,change_type,change_time,last_observation_time,mgrs_tile_id,jpl_burst_id,geometry
0,4,VLmaj,Fire,2021-11-07,2021-11-02,34NCL,T036-076228-IW3,POINT (19.33289 4.57490)
1,13,VLmaj,Crop cycle change,2021-10-03,NaT,39STR,T108-230715-IW2,POINT (48.77698 31.75490)
2,13,VLmaj,Crop cycle change,2021-10-03,NaT,39STR,T101-214979-IW1,POINT (48.77698 31.75490)
3,22,VLmin,Natural browning,2021-12-19,2021-12-12,37SBV,T014-028130-IW2,POINT (36.61178 35.52891)
4,22,VLmin,Natural browning,2021-12-19,2021-12-12,37SBV,T021-043823-IW2,POINT (36.61178 35.52891)


In [4]:
df_site = df_site_burst.drop_duplicates(subset=['site_id'], keep='first')[['site_id', 'change_label', 'change_type']]
df_site.head()

Unnamed: 0,site_id,change_label,change_type
0,4,VLmaj,Fire
1,13,VLmaj,Crop cycle change
3,22,VLmin,Natural browning
6,30,noChange,No Change
7,34,VLmin,Wetland


# Get All Tables

In [5]:
metric_paths = sorted(list(Path('out').glob('*/*/metrics_ts/*.parquet')))
metric_paths[:3]

[PosixPath('out/site_104/T144-308010-IW2/metrics_ts/cusum_prob_max.parquet'),
 PosixPath('out/site_104/T144-308010-IW2/metrics_ts/cusum_vh.parquet'),
 PosixPath('out/site_104/T144-308010-IW2/metrics_ts/log_ratio_vh.parquet')]

In [6]:
metric_names = [p.stem for p in metric_paths]
metric_names[:3]

['cusum_prob_max', 'cusum_vh', 'log_ratio_vh']

In [7]:
burst_ids = [p.parents[1].stem for p in metric_paths]
burst_ids[:2]

['T144-308010-IW2', 'T144-308010-IW2']

In [8]:
site_ids = [int(p.parents[2].stem.split('_')[1]) for p in metric_paths]
site_ids[:2]

[104, 104]

In [9]:
dfs = [pd.read_parquet(p) for p in metric_paths]
dfs[0].head()

Unnamed: 0,site_id,burst_id,acq_date_post,metric,metric_90m,val_obs,rtc_changes_aggressive,rtc_changes_aggressive_confirmed,rtc_changes_conservative,rtc_changes_conservative_confirmed,val_change_aggressive,val_change_aggressive_confirmed,val_change_conservative,val_change_conservative_confirmed
0,104,T144-308010-IW2,2021-10-20 13:59:16,0.614765,0.504232,noChange,0,0,0,0,0,0,0,0
1,104,T144-308010-IW2,2021-10-26 14:00:16,1.0,1.0,noChange,1,1,1,1,0,0,0,0
2,104,T144-308010-IW2,2021-11-01 13:59:16,0.967204,0.954869,noChange,1,1,1,1,0,0,0,0
3,104,T144-308010-IW2,2021-11-07 14:00:16,0.976512,0.861402,noChange,1,1,1,1,0,0,0,0
4,104,T144-308010-IW2,2021-11-13 13:59:16,0.729506,0.6458,noChange,0,0,0,0,0,0,0,0


In [10]:
def update_obs_labels(df: pd.DataFrame) -> pd.DataFrame:
    df['val_obs'] = df['val_obs'].map(lambda obs: obs if obs != 'OCtot' else 'OCmaj')
    df['val_obs'] = df['val_obs'].map(lambda obs: obs if obs not in [0, '0'] else 'noObs')
    return df

dfs = list(map(update_obs_labels, dfs))

In [11]:
class_dict = {0: 'undisturbed',
              1: 'disturbed'}

In [12]:
def get_commission_error(y_val, y_pred, label) -> float:
    fp = np.sum((y_val != label) & (y_pred == label)).astype('float32')
    tp = np.sum((y_val == label) & (y_pred == label)).astype('float32')

    denom = tp + fp
    if denom == 0:
        return 0.
    ce = fp / denom
    return ce


def get_ommission_error(y_val, y_pred, label) -> float:
    fn = np.sum((y_val == label) & (y_pred != label)).astype('float32')
    tp = np.sum((y_val == label) & (y_pred == label)).astype('float32')

    denom = tp + fn
    if denom == 0:
        return 0.
    oe = fn / denom
    return oe

def get_total_accuracy(y_val, y_pred) -> float:
    if y_val.size == 0:
        return 1.
    acc = (y_val == y_pred).sum() / (y_val.size)
    return acc

def get_accuracy_metrics_for_one_pair(y_val, y_rtc):
    mask = (y_rtc == 255) | (y_val == 255)
    y_rtc_ = y_rtc[~mask]
    y_val_ = y_val[~mask]
    prec, recall, f1, supp = sklearn.metrics.precision_recall_fscore_support(y_val_,
                                                                             y_rtc_,
                                                                             labels=[0, 1],
                                                                             # if there are no classes
                                                                             # Assume "perfect"
                                                                             zero_division=1
                                                                             )
    recall_per_class = {class_dict[label]: recall[label] for label in [0, 1]}
    prec_per_class = {class_dict[label]: prec[label] for label in [0, 1]}
    f1_per_class = {class_dict[label]: f1[label] for label in [0, 1]}
    supp_per_class = {class_dict[label]: int(supp[label]) for label in [0, 1]}

    n = len(y_rtc_)
    d = y_rtc_.sum()
    supp_per_class_rtc = {'disturbed': d, 'undisturbed': n - d}
    supp_per_class_rtc_perc = {'disturbed': d / n,
                               'undisturbed': 1 - d/n}
    
    co_err = {'disturbed': get_commission_error(y_val_, y_rtc_, 1),
                'undisturbed': get_commission_error(y_val_, y_rtc_, 0)}
    om_err = {'disturbed': get_ommission_error(y_val_, y_rtc_, 1),
               'undisturbed': get_ommission_error(y_val_, y_rtc_, 0)}
    jacc = sklearn.metrics.jaccard_score(y_val_, y_rtc_, pos_label=1, zero_division=1)
    return {
            'precision': prec_per_class,
            'recall': recall_per_class,
            'f1_score': f1_per_class,
            'supp_val': supp_per_class,
            'supp_rtc': supp_per_class_rtc,
            'supp_rtc_percent': supp_per_class_rtc_perc,
            'commission_error': co_err,
            'ommission_error': om_err,
            'jaccard_score': jacc,
            'total_samples': sum(supp_per_class.values()),
            'total_accuracy': get_total_accuracy(y_val_, y_rtc_)}
    

In [13]:
get_accuracy_metrics_for_one_pair(dfs[0].val_change_aggressive_confirmed, dfs[0].rtc_changes_conservative_confirmed)

{'precision': {'undisturbed': 1.0, 'disturbed': 0.0},
 'recall': {'undisturbed': 0.9090909090909091, 'disturbed': 1.0},
 'f1_score': {'undisturbed': 0.9523809523809523, 'disturbed': 0.0},
 'supp_val': {'undisturbed': 33, 'disturbed': 0},
 'supp_rtc': {'disturbed': 3, 'undisturbed': 30},
 'supp_rtc_percent': {'disturbed': 0.09090909090909091,
  'undisturbed': 0.9090909090909091},
 'commission_error': {'disturbed': 1.0, 'undisturbed': 0.0},
 'ommission_error': {'disturbed': 0.0, 'undisturbed': 0.09090909},
 'jaccard_score': 0.0,
 'total_samples': 33,
 'total_accuracy': 0.9090909090909091}

In [14]:
def get_accuracy_metrics_for_all_pairs(df: pd.DataFrame):
    out = {}
    site_id = df.site_id[0]
    for val_token in ['conservative', 'aggressive']:
        for rtc_token in ['conservative', 'aggressive']:
            y_val = df[f'val_change_{val_token}_confirmed']
            y_rtc = df[f'rtc_changes_{rtc_token}_confirmed']
            acc_dict = get_accuracy_metrics_for_one_pair(y_val, y_rtc)
            acc_dict['site_id'] = site_id
            out.update({f'val-{val_token}-confirmed__rtc-{rtc_token}-confirmed': acc_dict})
    return out

In [15]:
get_accuracy_metrics_for_all_pairs(dfs[0])

{'val-conservative-confirmed__rtc-conservative-confirmed': {'precision': {'undisturbed': 1.0,
   'disturbed': 0.0},
  'recall': {'undisturbed': 0.9090909090909091, 'disturbed': 1.0},
  'f1_score': {'undisturbed': 0.9523809523809523, 'disturbed': 0.0},
  'supp_val': {'undisturbed': 33, 'disturbed': 0},
  'supp_rtc': {'disturbed': 3, 'undisturbed': 30},
  'supp_rtc_percent': {'disturbed': 0.09090909090909091,
   'undisturbed': 0.9090909090909091},
  'commission_error': {'disturbed': 1.0, 'undisturbed': 0.0},
  'ommission_error': {'disturbed': 0.0, 'undisturbed': 0.09090909},
  'jaccard_score': 0.0,
  'total_samples': 33,
  'total_accuracy': 0.9090909090909091,
  'site_id': 104},
 'val-conservative-confirmed__rtc-aggressive-confirmed': {'precision': {'undisturbed': 1.0,
   'disturbed': 0.0},
  'recall': {'undisturbed': 0.9090909090909091, 'disturbed': 1.0},
  'f1_score': {'undisturbed': 0.9523809523809523, 'disturbed': 0.0},
  'supp_val': {'undisturbed': 33, 'disturbed': 0},
  'supp_rtc':

In [16]:
acc_data = list(map(get_accuracy_metrics_for_all_pairs, tqdm(dfs[:])))

100%|█████████████| 742/742 [00:05<00:00, 125.41it/s]


# Aggregate Over All Sites

And a single pair of strategies

## all disturbance types

In [17]:
STRAT_KEY = 'val-aggressive-confirmed__rtc-aggressive-confirmed'
DISTURBED_METRIC_KEYS = ['precision', 'recall', 'f1_score', 'commission_error', 'ommission_error',
                         'supp_val', 'supp_rtc', 'supp_rtc_percent']
OTHER_KEYS = ['jaccard_score', 'total_samples', 'total_accuracy']

disturbed_acc_data = [{**{'metric': metric},
                       **{'site_id': site_id},
                       **{'burst_id': burst_id},
                       **{key: item[STRAT_KEY][key] for key in OTHER_KEYS}, 
                       **{key: item[STRAT_KEY][key]['disturbed'] for key in DISTURBED_METRIC_KEYS}}
                      for item, metric, burst_id, site_id in zip(acc_data, metric_names, burst_ids, site_ids)]

In [18]:
df_dist = pd.DataFrame(disturbed_acc_data)
df_dist.head()

Unnamed: 0,metric,site_id,burst_id,jaccard_score,total_samples,total_accuracy,precision,recall,f1_score,commission_error,ommission_error,supp_val,supp_rtc,supp_rtc_percent
0,cusum_prob_max,104,T144-308010-IW2,0.0,33,0.909091,0.0,1.0,0.0,1.0,0.0,0,3,0.090909
1,cusum_vh,104,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
2,log_ratio_vh,104,T144-308010-IW2,0.0,33,0.848485,0.0,1.0,0.0,1.0,0.0,0,5,0.151515
3,mahalanobis_1d_max,104,T144-308010-IW2,0.0,33,0.0,0.0,1.0,0.0,1.0,0.0,0,33,1.0
4,mahalanobis_2d,104,T144-308010-IW2,0.0,33,0.606061,0.0,1.0,0.0,1.0,0.0,0,13,0.393939


In [19]:
AGG_COLUMNS = OTHER_KEYS + DISTURBED_METRIC_KEYS

In [20]:
df_dist_agg_all = df_dist[['metric'] + AGG_COLUMNS].groupby('metric').agg(['mean', 'std'])
df_dist_agg_all

Unnamed: 0_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_rtc,supp_rtc,supp_rtc_percent,supp_rtc_percent
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
cusum_prob_max,0.519567,0.485321,28.877358,7.055836,0.846402,0.230319,0.848607,0.351562,0.610964,0.475458,...,0.151393,0.351562,0.389036,0.475458,3.858491,6.448977,0.943396,2.224668,0.04085,0.120176
cusum_vh,0.525056,0.497555,28.849057,7.054607,0.848068,0.225593,0.884615,0.318359,0.572512,0.492729,...,0.115385,0.318359,0.427488,0.492729,3.858491,6.448977,0.575472,1.876918,0.023012,0.07044
log_ratio_vh,0.333473,0.429881,28.858491,7.058178,0.766902,0.226323,0.509313,0.469937,0.660689,0.432284,...,0.490687,0.469937,0.339311,0.432284,3.858491,6.448977,4.981132,5.690395,0.17526,0.206123
mahalanobis_1d_max,0.139559,0.229328,28.849057,7.054607,0.139559,0.229328,0.139559,0.229328,1.0,0.0,...,0.860441,0.229328,0.0,0.0,3.858491,6.448977,28.849057,7.054607,1.0,0.0
mahalanobis_2d,0.367558,0.426295,28.877358,7.055836,0.7821,0.219156,0.497877,0.457772,0.755876,0.380737,...,0.502123,0.457772,0.244124,0.380737,3.858491,6.448977,5.773585,6.673734,0.207858,0.242091
mahalanobis_vh,0.139559,0.229328,28.849057,7.054607,0.139559,0.229328,0.139559,0.229328,1.0,0.0,...,0.860441,0.229328,0.0,0.0,3.858491,6.448977,28.849057,7.054607,1.0,0.0
transformer,0.479464,0.474605,28.754717,7.165807,0.832133,0.21899,0.734333,0.426063,0.640932,0.456183,...,0.265667,0.426063,0.359068,0.456183,3.858491,6.448977,2.018868,3.540194,0.077761,0.152553


In [21]:
df_dist_agg_all_count = df_dist[['metric'] + AGG_COLUMNS].groupby(['metric']).size().rename(index="count")#.reset_index(drop=False)
df_dist_agg_all['count'] = df_dist_agg_all_count
df_dist_agg_all

Unnamed: 0_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_rtc,supp_rtc,supp_rtc_percent,supp_rtc_percent,count
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 21_level_1
metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
cusum_prob_max,0.519567,0.485321,28.877358,7.055836,0.846402,0.230319,0.848607,0.351562,0.610964,0.475458,...,0.351562,0.389036,0.475458,3.858491,6.448977,0.943396,2.224668,0.04085,0.120176,106
cusum_vh,0.525056,0.497555,28.849057,7.054607,0.848068,0.225593,0.884615,0.318359,0.572512,0.492729,...,0.318359,0.427488,0.492729,3.858491,6.448977,0.575472,1.876918,0.023012,0.07044,106
log_ratio_vh,0.333473,0.429881,28.858491,7.058178,0.766902,0.226323,0.509313,0.469937,0.660689,0.432284,...,0.469937,0.339311,0.432284,3.858491,6.448977,4.981132,5.690395,0.17526,0.206123,106
mahalanobis_1d_max,0.139559,0.229328,28.849057,7.054607,0.139559,0.229328,0.139559,0.229328,1.0,0.0,...,0.229328,0.0,0.0,3.858491,6.448977,28.849057,7.054607,1.0,0.0,106
mahalanobis_2d,0.367558,0.426295,28.877358,7.055836,0.7821,0.219156,0.497877,0.457772,0.755876,0.380737,...,0.457772,0.244124,0.380737,3.858491,6.448977,5.773585,6.673734,0.207858,0.242091,106
mahalanobis_vh,0.139559,0.229328,28.849057,7.054607,0.139559,0.229328,0.139559,0.229328,1.0,0.0,...,0.229328,0.0,0.0,3.858491,6.448977,28.849057,7.054607,1.0,0.0,106
transformer,0.479464,0.474605,28.754717,7.165807,0.832133,0.21899,0.734333,0.426063,0.640932,0.456183,...,0.426063,0.359068,0.456183,3.858491,6.448977,2.018868,3.540194,0.077761,0.152553,106


In [22]:
df_dist_agg_all.to_excel(across_sites_dir / 'dist_all_sites.xlsx')

## By Disturbance type

In [23]:
df_dist_m = pd.merge(df_site, df_dist, on='site_id', how = 'right')
df_dist_m.head()

Unnamed: 0,site_id,change_label,change_type,metric,burst_id,jaccard_score,total_samples,total_accuracy,precision,recall,f1_score,commission_error,ommission_error,supp_val,supp_rtc,supp_rtc_percent
0,104,noChange,No Change,cusum_prob_max,T144-308010-IW2,0.0,33,0.909091,0.0,1.0,0.0,1.0,0.0,0,3,0.090909
1,104,noChange,No Change,cusum_vh,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
2,104,noChange,No Change,log_ratio_vh,T144-308010-IW2,0.0,33,0.848485,0.0,1.0,0.0,1.0,0.0,0,5,0.151515
3,104,noChange,No Change,mahalanobis_1d_max,T144-308010-IW2,0.0,33,0.0,0.0,1.0,0.0,1.0,0.0,0,33,1.0
4,104,noChange,No Change,mahalanobis_2d,T144-308010-IW2,0.0,33,0.606061,0.0,1.0,0.0,1.0,0.0,0,13,0.393939


In [24]:
df_dist_agg_by_type = df_dist_m[['change_type', 'metric'] + AGG_COLUMNS].groupby(['change_type', 'metric']).agg(['mean', 'std'])
df_dist_agg_by_type

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_rtc,supp_rtc,supp_rtc_percent,supp_rtc_percent
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
change_type,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Clear cut,cusum_prob_max,0.411765,,23.0,,0.565217,,1.000000,,0.411765,,...,0.000000,,0.588235,,17.0,,7.0,,0.304348,
Clear cut,cusum_vh,0.352941,,23.0,,0.521739,,1.000000,,0.352941,,...,0.000000,,0.647059,,17.0,,6.0,,0.260870,
Clear cut,log_ratio_vh,0.391304,,23.0,,0.391304,,0.600000,,0.529412,,...,0.400000,,0.470588,,17.0,,15.0,,0.652174,
Clear cut,mahalanobis_1d_max,0.739130,,23.0,,0.739130,,0.739130,,1.000000,,...,0.260870,,0.000000,,17.0,,23.0,,1.000000,
Clear cut,mahalanobis_2d,0.409091,,23.0,,0.434783,,0.642857,,0.529412,,...,0.357143,,0.470588,,17.0,,14.0,,0.608696,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wetland,log_ratio_vh,0.285714,,32.0,,0.687500,,0.571429,,0.363636,,...,0.428571,,0.636364,,11.0,,7.0,,0.218750,
Wetland,mahalanobis_1d_max,0.343750,,32.0,,0.343750,,0.343750,,1.000000,,...,0.656250,,0.000000,,11.0,,32.0,,1.000000,
Wetland,mahalanobis_2d,0.000000,,32.0,,0.656250,,1.000000,,0.000000,,...,0.000000,,1.000000,,11.0,,0.0,,0.000000,
Wetland,mahalanobis_vh,0.343750,,32.0,,0.343750,,0.343750,,1.000000,,...,0.656250,,0.000000,,11.0,,32.0,,1.000000,


In [25]:
df_dist_agg_by_type_count = df_dist_m[['change_type', 'metric'] + AGG_COLUMNS].groupby(['change_type', 'metric']).size().rename(index="count")#.reset_index(drop=False)
df_dist_agg_by_type_count.head()

change_type  metric            
Clear cut    cusum_prob_max        1
             cusum_vh              1
             log_ratio_vh          1
             mahalanobis_1d_max    1
             mahalanobis_2d        1
Name: count, dtype: int64

In [26]:
df_dist_agg_by_type['count'] = df_dist_agg_by_type_count
df_dist_agg_by_type.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_rtc,supp_rtc,supp_rtc_percent,supp_rtc_percent,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 22_level_1
change_type,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Clear cut,cusum_prob_max,0.411765,,23.0,,0.565217,,1.0,,0.411765,,...,,0.588235,,17.0,,7.0,,0.304348,,1
Clear cut,cusum_vh,0.352941,,23.0,,0.521739,,1.0,,0.352941,,...,,0.647059,,17.0,,6.0,,0.26087,,1
Clear cut,log_ratio_vh,0.391304,,23.0,,0.391304,,0.6,,0.529412,,...,,0.470588,,17.0,,15.0,,0.652174,,1
Clear cut,mahalanobis_1d_max,0.73913,,23.0,,0.73913,,0.73913,,1.0,,...,,0.0,,17.0,,23.0,,1.0,,1
Clear cut,mahalanobis_2d,0.409091,,23.0,,0.434783,,0.642857,,0.529412,,...,,0.470588,,17.0,,14.0,,0.608696,,1


In [27]:
 df_dist_agg_by_type.to_excel(across_sites_dir / 'dist_all_sites_by_type.xlsx')

# By Change Label

In [28]:
df_dist_agg_by_label = df_dist_m[['change_label', 'metric'] + AGG_COLUMNS].groupby(['change_label', 'metric']).agg(['mean', 'std'])
df_dist_agg_by_label.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_rtc,supp_rtc,supp_rtc_percent,supp_rtc_percent
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
change_label,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
OCmaj,cusum_prob_max,0.0,0.0,21.833333,13.482087,0.621875,0.337859,0.333333,0.516398,0.166667,0.408248,...,0.666667,0.516398,0.833333,0.408248,2.5,2.073644,3.833333,3.763863,0.274306,0.374633
OCmaj,cusum_vh,0.0,0.0,21.833333,13.482087,0.743056,0.140675,0.333333,0.516398,0.166667,0.408248,...,0.666667,0.516398,0.833333,0.408248,2.5,2.073644,3.166667,3.544949,0.153125,0.140989
OCmaj,log_ratio_vh,0.008333,0.020412,21.833333,13.482087,0.451042,0.268728,0.177083,0.40392,0.2,0.4,...,0.822917,0.40392,0.8,0.4,2.5,2.073644,9.333333,6.439462,0.455556,0.322949
OCmaj,mahalanobis_1d_max,0.103819,0.069552,21.833333,13.482087,0.103819,0.069552,0.103819,0.069552,1.0,0.0,...,0.896181,0.069552,0.0,0.0,2.5,2.073644,21.833333,13.482087,1.0,0.0
OCmaj,mahalanobis_2d,0.089592,0.098185,21.833333,13.482087,0.422917,0.267262,0.257701,0.374466,0.633333,0.496655,...,0.742299,0.374466,0.366667,0.496655,2.5,2.073644,13.833333,10.107753,0.603819,0.342465


In [29]:
df_dist_agg_by_label_count = df_dist_m[['change_label', 'metric'] + AGG_COLUMNS].groupby(['change_label', 'metric']).size().rename(index="count")#.reset_index(drop=False)
df_dist_agg_by_label_count.head()

change_label  metric            
OCmaj         cusum_prob_max        6
              cusum_vh              6
              log_ratio_vh          6
              mahalanobis_1d_max    6
              mahalanobis_2d        6
Name: count, dtype: int64

In [30]:
df_dist_agg_by_label['count'] = df_dist_agg_by_label_count
df_dist_agg_by_label

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_rtc,supp_rtc,supp_rtc_percent,supp_rtc_percent,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 22_level_1
change_label,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
OCmaj,cusum_prob_max,0.0,0.0,21.833333,13.482087,0.621875,0.337859,0.333333,0.516398,0.166667,0.408248,...,0.516398,0.833333,0.408248,2.5,2.073644,3.833333,3.763863,0.274306,0.374633,6
OCmaj,cusum_vh,0.0,0.0,21.833333,13.482087,0.743056,0.140675,0.333333,0.516398,0.166667,0.408248,...,0.516398,0.833333,0.408248,2.5,2.073644,3.166667,3.544949,0.153125,0.140989,6
OCmaj,log_ratio_vh,0.008333,0.020412,21.833333,13.482087,0.451042,0.268728,0.177083,0.40392,0.2,0.4,...,0.40392,0.8,0.4,2.5,2.073644,9.333333,6.439462,0.455556,0.322949,6
OCmaj,mahalanobis_1d_max,0.103819,0.069552,21.833333,13.482087,0.103819,0.069552,0.103819,0.069552,1.0,0.0,...,0.069552,0.0,0.0,2.5,2.073644,21.833333,13.482087,1.0,0.0,6
OCmaj,mahalanobis_2d,0.089592,0.098185,21.833333,13.482087,0.422917,0.267262,0.257701,0.374466,0.633333,0.496655,...,0.374466,0.366667,0.496655,2.5,2.073644,13.833333,10.107753,0.603819,0.342465,6
OCmaj,mahalanobis_vh,0.103819,0.069552,21.833333,13.482087,0.103819,0.069552,0.103819,0.069552,1.0,0.0,...,0.069552,0.0,0.0,2.5,2.073644,21.833333,13.482087,1.0,0.0,6
OCmaj,transformer,0.0,0.0,21.833333,13.482087,0.560764,0.30512,0.166667,0.408248,0.166667,0.408248,...,0.408248,0.833333,0.408248,2.5,2.073644,5.666667,3.932768,0.335417,0.346848,6
OCmin,cusum_prob_max,0.533333,0.516398,28.666667,7.705904,0.913843,0.123539,1.0,0.0,0.533333,0.516398,...,0.0,0.466667,0.516398,2.533333,3.62268,0.0,0.0,0.0,0.0,15
OCmin,cusum_vh,0.466667,0.516398,28.666667,7.705904,0.907391,0.121223,0.933333,0.258199,0.533333,0.516398,...,0.258199,0.466667,0.516398,2.533333,3.62268,0.2,0.774597,0.006452,0.024987,15
OCmin,log_ratio_vh,0.449459,0.477748,28.666667,7.705904,0.887943,0.114562,0.745,0.418778,0.59537,0.465488,...,0.418778,0.40463,0.465488,2.533333,3.62268,1.933333,2.789436,0.062321,0.09102,15


In [31]:
 df_dist_agg_by_label.to_excel(across_sites_dir / 'dist_all_sites_by_label.xlsx')