In [1]:
from pathlib import Path
import pandas as pd
import sklearn.metrics
import numpy as np
from tqdm import tqdm
import geopandas as gpd
import seaborn as sns

# Out Directory

In [2]:
across_sites_dir = Path('out/_all_sites_aggregated')
across_sites_dir.mkdir(exist_ok=True, parents=True)

# Sites

In [3]:
df_site_burst = gpd.read_file('data/val_sites_subset.geojson')
df_site_burst.head()

Unnamed: 0,site_id,change_label,change_type,change_time,last_observation_time,mgrs_tile_id,jpl_burst_id,geometry
0,4,VLmaj,Fire,2021-11-07,2021-11-02,34NCL,T036-076228-IW3,POINT (19.33289 4.57490)
1,13,VLmaj,Crop cycle change,2021-10-03,NaT,39STR,T108-230715-IW2,POINT (48.77698 31.75490)
2,13,VLmaj,Crop cycle change,2021-10-03,NaT,39STR,T101-214979-IW1,POINT (48.77698 31.75490)
3,22,VLmin,Natural browning,2021-12-19,2021-12-12,37SBV,T014-028130-IW2,POINT (36.61178 35.52891)
4,22,VLmin,Natural browning,2021-12-19,2021-12-12,37SBV,T021-043823-IW2,POINT (36.61178 35.52891)


In [4]:
df_site = df_site_burst.drop_duplicates(subset=['site_id'], keep='first')[['site_id', 'change_label', 'change_type']]
df_site.head()

Unnamed: 0,site_id,change_label,change_type
0,4,VLmaj,Fire
1,13,VLmaj,Crop cycle change
3,22,VLmin,Natural browning
6,30,noChange,No Change
7,34,VLmin,Wetland


# Get All Tables

In [5]:
metric_paths = sorted(list(Path('out').glob('*/*/metrics_ts/*.parquet')))
metric_paths[:3]

[PosixPath('out/site_104/T144-308010-IW2/metrics_ts/cusum_prob_max.parquet'),
 PosixPath('out/site_104/T144-308010-IW2/metrics_ts/cusum_vh.parquet'),
 PosixPath('out/site_104/T144-308010-IW2/metrics_ts/log_ratio_vh.parquet')]

In [6]:
metric_names = [p.stem for p in metric_paths]
metric_names[:3]

['cusum_prob_max', 'cusum_vh', 'log_ratio_vh']

In [7]:
burst_ids = [p.parents[1].stem for p in metric_paths]
burst_ids[:2]

['T144-308010-IW2', 'T144-308010-IW2']

In [8]:
site_ids_rtc = [int(p.parents[2].stem.split('_')[1]) for p in metric_paths]
site_ids_rtc[:2]

[104, 104]

In [9]:
dfs_rtc = [pd.read_parquet(p) for p in metric_paths]
dfs_rtc[0].head()

Unnamed: 0,site_id,burst_id,acq_date_post,metric,metric_90m,val_obs,rtc_changes_aggressive,rtc_changes_aggressive_confirmed,rtc_changes_conservative,rtc_changes_conservative_confirmed,val_change_aggressive,val_change_aggressive_confirmed,val_change_conservative,val_change_conservative_confirmed
0,104,T144-308010-IW2,2021-10-20 13:59:16,0.614765,0.504232,noChange,0,0,0,0,0,0,0,0
1,104,T144-308010-IW2,2021-10-26 14:00:16,1.0,1.0,noChange,1,1,1,1,0,0,0,0
2,104,T144-308010-IW2,2021-11-01 13:59:16,0.967204,0.954869,noChange,1,1,1,1,0,0,0,0
3,104,T144-308010-IW2,2021-11-07 14:00:16,0.976512,0.861402,noChange,1,1,1,1,0,0,0,0
4,104,T144-308010-IW2,2021-11-13 13:59:16,0.729506,0.6458,noChange,0,0,0,0,0,0,0,0


In [10]:
hls_paths = [Path(f'data/dist_hls_timeseries_by_site/site_{site_id}.parquet') for site_id in set(site_ids_rtc)]
assert all([p.exists() for p in hls_paths])
assert len(hls_paths) == 59

In [12]:
dfs_hls = [pd.read_parquet(p) for p in hls_paths]
dfs_hls[0].head()

Unnamed: 0,site_id,granule_id,sensing_time,veg_status,gen_status,date,val_obs,val_change_conservative,val_change_aggressive,dist-hls-veg_change_conservative,dist-hls-veg_change_aggressive,dist-hls-gen_change_conservative,dist-hls-gen_change_aggressive,val_change_conservative_confirmed,val_change_aggressive_confirmed,dist-hls-veg_change_conservative_confirmed,dist-hls-veg_change_aggressive_confirmed,dist-hls-gen_change_conservative_confirmed,dist-hls-gen_change_aggressive_confirmed
0,256,OPERA_L3_DIST-ALERT-HLS_T30UUA_20211001T112121...,2021-10-01,0,0,2021-10-01,noObs,255,255,0,0,0,0,255,255,0,0,0,0
1,256,OPERA_L3_DIST-ALERT-HLS_T30UUA_20211006T112119...,2021-10-06,255,255,2021-10-06,noChange,0,0,0,0,0,0,0,0,0,0,0,0
2,256,OPERA_L3_DIST-ALERT-HLS_T30UUA_20211009T110533...,2021-10-09,255,255,2021-10-09,noChange,0,0,0,0,0,0,0,0,0,0,0,0
3,256,OPERA_L3_DIST-ALERT-HLS_T30UUA_20211011T112121...,2021-10-11,255,255,2021-10-11,noChange,0,0,0,0,0,0,0,0,0,0,0,0
4,256,OPERA_L3_DIST-ALERT-HLS_T30UUA_20211014T113321...,2021-10-14,0,0,2021-10-14,noChange,0,0,0,0,0,0,0,0,0,0,0,0


In [13]:
lll = [df['dist-hls-veg_change_conservative_confirmed'].sum() for df in dfs_hls]
np.mean(lll)

0.05084745762711865

In [14]:
def update_obs_labels(df: pd.DataFrame) -> pd.DataFrame:
    df['val_obs'] = df['val_obs'].map(lambda obs: obs if obs != 'OCtot' else 'OCmaj')
    df['val_obs'] = df['val_obs'].map(lambda obs: obs if obs not in [0, '0'] else 'noObs')
    return df

dfs_rtc = list(map(update_obs_labels, dfs_rtc))
dfs_hls = list(map(update_obs_labels, dfs_hls))

In [15]:
class_dict = {0: 'undisturbed',
              1: 'disturbed'}

In [16]:
def get_commission_error(y_val, y_pred, label) -> float:
    fp = np.sum((y_val != label) & (y_pred == label)).astype('float32')
    tp = np.sum((y_val == label) & (y_pred == label)).astype('float32')

    denom = tp + fp
    if denom == 0:
        return 0.
    ce = fp / denom
    return ce


def get_ommission_error(y_val, y_pred, label) -> float:
    fn = np.sum((y_val == label) & (y_pred != label)).astype('float32')
    tp = np.sum((y_val == label) & (y_pred == label)).astype('float32')

    denom = tp + fn
    if denom == 0:
        return 0.
    oe = fn / denom
    return oe

def get_total_accuracy(y_val, y_pred) -> float:
    if y_val.size == 0:
        return 1.
    acc = (y_val == y_pred).sum() / (y_val.size)
    return acc

def get_accuracy_metrics_for_one_pair(y_val, y_rtc):
    mask = (y_rtc == 255) | (y_val == 255)
    y_rtc_ = y_rtc[~mask]
    y_val_ = y_val[~mask]
    prec, recall, f1, supp = sklearn.metrics.precision_recall_fscore_support(y_val_,
                                                                             y_rtc_,
                                                                             labels=[0, 1],
                                                                             # if there are no classes
                                                                             # Assume "perfect"
                                                                             zero_division=1
                                                                             )
    recall_per_class = {class_dict[label]: recall[label] for label in [0, 1]}
    prec_per_class = {class_dict[label]: prec[label] for label in [0, 1]}
    f1_per_class = {class_dict[label]: f1[label] for label in [0, 1]}
    supp_per_class = {class_dict[label]: int(supp[label]) for label in [0, 1]}

    n = len(y_rtc_)
    d = y_rtc_.sum()
    supp_prod_change_per_class = {'disturbed': d, 'undisturbed': n - d}
    supp_prod_change_per_class_perc = {'disturbed': d / n,
                                       'undisturbed': 1 - d/n}
    
    co_err = {'disturbed': get_commission_error(y_val_, y_rtc_, 1),
                'undisturbed': get_commission_error(y_val_, y_rtc_, 0)}
    om_err = {'disturbed': get_ommission_error(y_val_, y_rtc_, 1),
               'undisturbed': get_ommission_error(y_val_, y_rtc_, 0)}
    jacc = sklearn.metrics.jaccard_score(y_val_, y_rtc_, pos_label=1, zero_division=1)
    return {
            'precision': prec_per_class,
            'recall': recall_per_class,
            'f1_score': f1_per_class,
            'supp_val': supp_per_class,
            'supp_prod_change': supp_prod_change_per_class,
            'supp_prod_change_perc': supp_prod_change_per_class_perc,
            'commission_error': co_err,
            'ommission_error': om_err,
            'jaccard_score': jacc,
            'total_samples': sum(supp_per_class.values()),
            'total_accuracy': get_total_accuracy(y_val_, y_rtc_)}
    

In [17]:
get_accuracy_metrics_for_one_pair(dfs_hls[0].val_change_aggressive_confirmed, dfs_hls[0]['dist-hls-veg_change_conservative'])

{'precision': {'undisturbed': 0.40131578947368424, 'disturbed': 1.0},
 'recall': {'undisturbed': 1.0, 'disturbed': 0.061855670103092786},
 'f1_score': {'undisturbed': 0.5727699530516432,
  'disturbed': 0.11650485436893204},
 'supp_val': {'undisturbed': 61, 'disturbed': 97},
 'supp_prod_change': {'disturbed': 6, 'undisturbed': 152},
 'supp_prod_change_perc': {'disturbed': 0.0379746835443038,
  'undisturbed': 0.9620253164556962},
 'commission_error': {'disturbed': 0.0, 'undisturbed': 0.5986842},
 'ommission_error': {'disturbed': 0.9381443, 'undisturbed': 0.0},
 'jaccard_score': 0.061855670103092786,
 'total_samples': 158,
 'total_accuracy': 0.4240506329113924}

In [18]:
get_accuracy_metrics_for_one_pair(dfs_rtc[0].val_change_aggressive_confirmed, dfs_rtc[0].rtc_changes_conservative_confirmed)

{'precision': {'undisturbed': 1.0, 'disturbed': 0.0},
 'recall': {'undisturbed': 0.9090909090909091, 'disturbed': 1.0},
 'f1_score': {'undisturbed': 0.9523809523809523, 'disturbed': 0.0},
 'supp_val': {'undisturbed': 33, 'disturbed': 0},
 'supp_prod_change': {'disturbed': 3, 'undisturbed': 30},
 'supp_prod_change_perc': {'disturbed': 0.09090909090909091,
  'undisturbed': 0.9090909090909091},
 'commission_error': {'disturbed': 1.0, 'undisturbed': 0.0},
 'ommission_error': {'disturbed': 0.0, 'undisturbed': 0.09090909},
 'jaccard_score': 0.0,
 'total_samples': 33,
 'total_accuracy': 0.9090909090909091}

In [19]:
def get_accuracy_metrics_for_all_pairs(df: pd.DataFrame, sensor='sar'):
    if sensor not in ['optical', 'sar']:
        raise ValueError
    out = {}
    sensor_column_tokens = ['rtc_changes'] if sensor == 'sar' else ['dist-hls-veg_change', 'dist-hls-gen_change']
    sensor_serial_tokens = ['rtc'] if sensor == 'sar' else ['dist-hls-veg', 'dist-hls-gen']
    site_id = df.site_id[0]
    for sensor_column_token, serial_sensor_token in zip(sensor_column_tokens, sensor_serial_tokens):
        for val_token in ['conservative', 'aggressive']:
            for change_strategy_token in ['conservative', 'aggressive']:
                y_val = df[f'val_change_{val_token}_confirmed']
                y_rtc = df[f'{sensor_column_token}_{change_strategy_token}_confirmed']
                acc_dict = get_accuracy_metrics_for_one_pair(y_val, y_rtc)
                acc_dict['site_id'] = site_id
                acc_dict['change_category'] = serial_sensor_token
                
                out.update({f'val-{val_token}-confirmed__{serial_sensor_token}-{change_strategy_token}-confirmed': acc_dict})
    return out

In [20]:
get_accuracy_metrics_for_all_pairs(dfs_rtc[0], sensor='sar')

{'val-conservative-confirmed__rtc-conservative-confirmed': {'precision': {'undisturbed': 1.0,
   'disturbed': 0.0},
  'recall': {'undisturbed': 0.9090909090909091, 'disturbed': 1.0},
  'f1_score': {'undisturbed': 0.9523809523809523, 'disturbed': 0.0},
  'supp_val': {'undisturbed': 33, 'disturbed': 0},
  'supp_prod_change': {'disturbed': 3, 'undisturbed': 30},
  'supp_prod_change_perc': {'disturbed': 0.09090909090909091,
   'undisturbed': 0.9090909090909091},
  'commission_error': {'disturbed': 1.0, 'undisturbed': 0.0},
  'ommission_error': {'disturbed': 0.0, 'undisturbed': 0.09090909},
  'jaccard_score': 0.0,
  'total_samples': 33,
  'total_accuracy': 0.9090909090909091,
  'site_id': 104,
  'change_category': 'rtc'},
 'val-conservative-confirmed__rtc-aggressive-confirmed': {'precision': {'undisturbed': 1.0,
   'disturbed': 0.0},
  'recall': {'undisturbed': 0.9090909090909091, 'disturbed': 1.0},
  'f1_score': {'undisturbed': 0.9523809523809523, 'disturbed': 0.0},
  'supp_val': {'undistu

In [21]:
get_accuracy_metrics_for_all_pairs(dfs_hls[0], sensor='optical')

{'val-conservative-confirmed__dist-hls-veg-conservative-confirmed': {'precision': {'undisturbed': 0.42038216560509556,
   'disturbed': 1.0},
  'recall': {'undisturbed': 1.0, 'disturbed': 0.0},
  'f1_score': {'undisturbed': 0.5919282511210763, 'disturbed': 0.0},
  'supp_val': {'undisturbed': 66, 'disturbed': 91},
  'supp_prod_change': {'disturbed': 0, 'undisturbed': 157},
  'supp_prod_change_perc': {'disturbed': 0.0, 'undisturbed': 1.0},
  'commission_error': {'disturbed': 0.0, 'undisturbed': 0.57961786},
  'ommission_error': {'disturbed': 1.0, 'undisturbed': 0.0},
  'jaccard_score': 0.0,
  'total_samples': 157,
  'total_accuracy': 0.42038216560509556,
  'site_id': 256,
  'change_category': 'dist-hls-veg'},
 'val-conservative-confirmed__dist-hls-veg-aggressive-confirmed': {'precision': {'undisturbed': 0.44594594594594594,
   'disturbed': 1.0},
  'recall': {'undisturbed': 1.0, 'disturbed': 0.0989010989010989},
  'f1_score': {'undisturbed': 0.616822429906542, 'disturbed': 0.18},
  'supp_v

In [22]:
acc_data_rtc = list(map(get_accuracy_metrics_for_all_pairs, tqdm(dfs_rtc[:])))
acc_data_hls = [get_accuracy_metrics_for_all_pairs(df, sensor='optical') for df in tqdm(dfs_hls)]

100%|██████████████████████████████████████| 742/742 [00:05<00:00, 125.90it/s]
100%|█████████████████████████████████████████| 59/59 [00:00<00:00, 66.93it/s]


# Aggregate Over All Sites

And a single pair of strategies

## all disturbance types

In [36]:
STRAT_KEY = 'val-conservative-confirmed__rtc-conservative-confirmed'
DISTURBED_METRIC_KEYS = ['precision', 'recall', 'f1_score', 'commission_error', 'ommission_error',
                         'supp_val', 'supp_prod_change', 
                         'supp_prod_change_perc']
OTHER_KEYS = ['jaccard_score', 'total_samples', 'total_accuracy']

assert(all([len(acc_data_rtc) == len(l) for l in [burst_ids, site_ids_rtc, metric_names]]))
disturbed_acc_data_rtc = [{**{'metric': metric},
                           **{'site_id': site_id},
                           **{'burst_id': burst_id},
                           **{key: item[STRAT_KEY][key] for key in OTHER_KEYS}, 
                           **{key: item[STRAT_KEY][key]['disturbed'] for key in DISTURBED_METRIC_KEYS}}
                          for item, metric, burst_id, site_id in zip(acc_data_rtc, metric_names, burst_ids, site_ids_rtc)]

In [37]:
HLS_STRAT_KEY = 'val-conservative-confirmed__dist-hls-gen-conservative-confirmed'

disturbed_acc_data_hls_gen = [{**{'metric': item[HLS_STRAT_KEY]['change_category']},
                               **{'site_id': item[HLS_STRAT_KEY]['site_id']},
                               **{key: item[HLS_STRAT_KEY][key] for key in OTHER_KEYS}, 
                               **{key: item[HLS_STRAT_KEY][key]['disturbed'] for key in DISTURBED_METRIC_KEYS}}
                              for item in acc_data_hls]

In [38]:
HLS_STRAT_KEY = 'val-conservative-confirmed__dist-hls-veg-conservative-confirmed'

disturbed_acc_data_hls_veg = [{**{'metric': item[HLS_STRAT_KEY]['change_category']},
                               **{'site_id': item[HLS_STRAT_KEY]['site_id']},
                               **{key: item[HLS_STRAT_KEY][key] for key in OTHER_KEYS}, 
                               **{key: item[HLS_STRAT_KEY][key]['disturbed'] for key in DISTURBED_METRIC_KEYS}}
                              for item in acc_data_hls]

In [42]:
df_dist_rtc = pd.DataFrame(disturbed_acc_data_rtc)
df_dist_rtc.head()

Unnamed: 0,metric,site_id,burst_id,jaccard_score,total_samples,total_accuracy,precision,recall,f1_score,commission_error,ommission_error,supp_val,supp_prod_change,supp_prod_change_perc
0,cusum_prob_max,104,T144-308010-IW2,0.0,33,0.909091,0.0,1.0,0.0,1.0,0.0,0,3,0.090909
1,cusum_vh,104,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
2,log_ratio_vh,104,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
3,mahalanobis_1d_max,104,T144-308010-IW2,0.0,33,0.0,0.0,1.0,0.0,1.0,0.0,0,33,1.0
4,mahalanobis_2d,104,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0


In [43]:
df_dist_hls_gen = pd.DataFrame(disturbed_acc_data_hls_gen)
df_dist_hls_gen.head()

Unnamed: 0,metric,site_id,jaccard_score,total_samples,total_accuracy,precision,recall,f1_score,commission_error,ommission_error,supp_val,supp_prod_change,supp_prod_change_perc
0,dist-hls-gen,256,0.0,157,0.420382,1.0,0.0,0.0,0.0,1.0,91,0,0.0
1,dist-hls-gen,4,1.0,179,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
2,dist-hls-gen,261,0.0,168,0.547619,1.0,0.0,0.0,0.0,1.0,76,0,0.0
3,dist-hls-gen,263,1.0,208,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
4,dist-hls-gen,264,1.0,209,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0


In [44]:
df_dist_hls_veg = pd.DataFrame(disturbed_acc_data_hls_veg)
df_dist_hls_veg.head()

Unnamed: 0,metric,site_id,jaccard_score,total_samples,total_accuracy,precision,recall,f1_score,commission_error,ommission_error,supp_val,supp_prod_change,supp_prod_change_perc
0,dist-hls-veg,256,0.0,157,0.420382,1.0,0.0,0.0,0.0,1.0,91,0,0.0
1,dist-hls-veg,4,1.0,179,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
2,dist-hls-veg,261,0.0,168,0.547619,1.0,0.0,0.0,0.0,1.0,76,0,0.0
3,dist-hls-veg,263,1.0,208,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
4,dist-hls-veg,264,1.0,209,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0


In [45]:
AGG_COLUMNS = OTHER_KEYS + DISTURBED_METRIC_KEYS

In [46]:
df_dist = pd.concat([df_dist_rtc, df_dist_hls_gen, df_dist_hls_veg])
df_dist.head()

Unnamed: 0,metric,site_id,burst_id,jaccard_score,total_samples,total_accuracy,precision,recall,f1_score,commission_error,ommission_error,supp_val,supp_prod_change,supp_prod_change_perc
0,cusum_prob_max,104,T144-308010-IW2,0.0,33,0.909091,0.0,1.0,0.0,1.0,0.0,0,3,0.090909
1,cusum_vh,104,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
2,log_ratio_vh,104,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
3,mahalanobis_1d_max,104,T144-308010-IW2,0.0,33,0.0,0.0,1.0,0.0,1.0,0.0,0,33,1.0
4,mahalanobis_2d,104,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0


In [47]:
df_dist_agg_all = df_dist[['metric'] + AGG_COLUMNS].groupby('metric').agg(['mean', 'std'])
df_dist_agg_all

Unnamed: 0_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
cusum_prob_max,0.726558,0.438487,28.773585,7.030929,0.902705,0.212813,0.876101,0.321068,0.810091,0.389103,...,0.123899,0.321068,0.189909,0.389103,2.169811,5.483259,0.716981,1.916121,0.032859,0.114995
cusum_vh,0.773585,0.420499,28.754717,7.028935,0.920268,0.194978,1.0,0.0,0.773585,0.420499,...,0.0,0.0,0.226415,0.420499,2.169811,5.483259,0.0,0.0,0.0,0.0
dist-hls-gen,0.711864,0.456782,184.661017,38.038823,0.91116,0.207094,0.966102,0.182521,0.728814,0.448388,...,0.033898,0.182521,0.271186,0.448388,15.864407,38.694853,0.152542,0.86729,0.000751,0.004264
dist-hls-veg,0.728814,0.448388,184.661017,38.038823,0.911911,0.206077,1.0,0.0,0.728814,0.448388,...,0.0,0.0,0.271186,0.448388,15.864407,38.694853,0.0,0.0,0.0,0.0
log_ratio_vh,0.593173,0.474284,28.764151,7.032648,0.874014,0.202545,0.677401,0.459773,0.822947,0.363139,...,0.322599,0.459773,0.177053,0.363139,2.169811,5.483259,2.311321,4.216438,0.086937,0.16569
mahalanobis_1d_max,0.079732,0.194978,28.754717,7.028935,0.079732,0.194978,0.079732,0.194978,1.0,0.0,...,0.920268,0.194978,0.0,0.0,2.169811,5.483259,28.754717,7.028935,1.0,0.0
mahalanobis_2d,0.696281,0.441653,28.773585,7.030929,0.895327,0.205529,0.784119,0.404937,0.836111,0.354937,...,0.215881,0.404937,0.163889,0.354937,2.169811,5.483259,1.471698,3.420408,0.059346,0.147708
mahalanobis_vh,0.079732,0.194978,28.754717,7.028935,0.079732,0.194978,0.079732,0.194978,1.0,0.0,...,0.920268,0.194978,0.0,0.0,2.169811,5.483259,28.754717,7.028935,1.0,0.0
transformer,0.787703,0.39643,28.660377,7.139277,0.925396,0.176141,0.955882,0.196997,0.811688,0.380763,...,0.044118,0.196997,0.188312,0.380763,2.169811,5.483259,0.490566,2.029522,0.016873,0.066811


In [64]:
unique_metrics = sorted(list(set(metric_names)))
METRIC_ORDER = ['dist-hls-veg', 'dist-hls-gen'] + unique_metrics

In [65]:
df_dist_agg_all_count = df_dist[['metric'] + AGG_COLUMNS].groupby(['metric']).size().rename(index="count")#.reset_index(drop=False)
df_dist_agg_all['count'] = df_dist_agg_all_count
df_dist_agg_all = df_dist_agg_all.reindex(METRIC_ORDER)
df_dist_agg_all

Unnamed: 0_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc,count
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 21_level_1
metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
dist-hls-veg,0.728814,0.448388,184.661017,38.038823,0.911911,0.206077,1.0,0.0,0.728814,0.448388,...,0.0,0.271186,0.448388,15.864407,38.694853,0.0,0.0,0.0,0.0,59
dist-hls-gen,0.711864,0.456782,184.661017,38.038823,0.91116,0.207094,0.966102,0.182521,0.728814,0.448388,...,0.182521,0.271186,0.448388,15.864407,38.694853,0.152542,0.86729,0.000751,0.004264,59
cusum_prob_max,0.726558,0.438487,28.773585,7.030929,0.902705,0.212813,0.876101,0.321068,0.810091,0.389103,...,0.321068,0.189909,0.389103,2.169811,5.483259,0.716981,1.916121,0.032859,0.114995,106
cusum_vh,0.773585,0.420499,28.754717,7.028935,0.920268,0.194978,1.0,0.0,0.773585,0.420499,...,0.0,0.226415,0.420499,2.169811,5.483259,0.0,0.0,0.0,0.0,106
log_ratio_vh,0.593173,0.474284,28.764151,7.032648,0.874014,0.202545,0.677401,0.459773,0.822947,0.363139,...,0.459773,0.177053,0.363139,2.169811,5.483259,2.311321,4.216438,0.086937,0.16569,106
mahalanobis_1d_max,0.079732,0.194978,28.754717,7.028935,0.079732,0.194978,0.079732,0.194978,1.0,0.0,...,0.194978,0.0,0.0,2.169811,5.483259,28.754717,7.028935,1.0,0.0,106
mahalanobis_2d,0.696281,0.441653,28.773585,7.030929,0.895327,0.205529,0.784119,0.404937,0.836111,0.354937,...,0.404937,0.163889,0.354937,2.169811,5.483259,1.471698,3.420408,0.059346,0.147708,106
mahalanobis_vh,0.079732,0.194978,28.754717,7.028935,0.079732,0.194978,0.079732,0.194978,1.0,0.0,...,0.194978,0.0,0.0,2.169811,5.483259,28.754717,7.028935,1.0,0.0,106
transformer,0.787703,0.39643,28.660377,7.139277,0.925396,0.176141,0.955882,0.196997,0.811688,0.380763,...,0.196997,0.188312,0.380763,2.169811,5.483259,0.490566,2.029522,0.016873,0.066811,106


In [66]:
cm = sns.light_palette("blue", as_cmap=True)
df_dist_agg_all_styled = df_dist_agg_all.style.background_gradient(cmap=cm)
df_dist_agg_all_styled

Unnamed: 0_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,f1_score,f1_score,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc,count
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 23_level_1
metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
dist-hls-veg,0.728814,0.448388,184.661017,38.038823,0.911911,0.206077,1.0,0.0,0.728814,0.448388,0.728814,0.448388,0.0,0.0,0.271186,0.448388,15.864407,38.694853,0.0,0.0,0.0,0.0,59
dist-hls-gen,0.711864,0.456782,184.661017,38.038823,0.91116,0.207094,0.966102,0.182521,0.728814,0.448388,0.711864,0.456782,0.033898,0.182521,0.271186,0.448388,15.864407,38.694853,0.152542,0.86729,0.000751,0.004264,59
cusum_prob_max,0.726558,0.438487,28.773585,7.030929,0.902705,0.212813,0.876101,0.321068,0.810091,0.389103,0.732023,0.436555,0.123899,0.321068,0.189909,0.389103,2.169811,5.483259,0.716981,1.916121,0.032859,0.114995,106
cusum_vh,0.773585,0.420499,28.754717,7.028935,0.920268,0.194978,1.0,0.0,0.773585,0.420499,0.773585,0.420499,0.0,0.0,0.226415,0.420499,2.169811,5.483259,0.0,0.0,0.0,0.0,106
log_ratio_vh,0.593173,0.474284,28.764151,7.032648,0.874014,0.202545,0.677401,0.459773,0.822947,0.363139,0.606442,0.470491,0.322599,0.459773,0.177053,0.363139,2.169811,5.483259,2.311321,4.216438,0.086937,0.16569,106
mahalanobis_1d_max,0.079732,0.194978,28.754717,7.028935,0.079732,0.194978,0.079732,0.194978,1.0,0.0,0.105607,0.237804,0.920268,0.194978,0.0,0.0,2.169811,5.483259,28.754717,7.028935,1.0,0.0,106
mahalanobis_2d,0.696281,0.441653,28.773585,7.030929,0.895327,0.205529,0.784119,0.404937,0.836111,0.354937,0.709325,0.4347,0.215881,0.404937,0.163889,0.354937,2.169811,5.483259,1.471698,3.420408,0.059346,0.147708,106
mahalanobis_vh,0.079732,0.194978,28.754717,7.028935,0.079732,0.194978,0.079732,0.194978,1.0,0.0,0.105607,0.237804,0.920268,0.194978,0.0,0.0,2.169811,5.483259,28.754717,7.028935,1.0,0.0,106
transformer,0.787703,0.39643,28.660377,7.139277,0.925396,0.176141,0.955882,0.196997,0.811688,0.380763,0.795617,0.391261,0.044118,0.196997,0.188312,0.380763,2.169811,5.483259,0.490566,2.029522,0.016873,0.066811,106


In [67]:
df_dist_agg_all_styled.to_excel(across_sites_dir / 'dist_all_sites.xlsx')

## By Disturbance type

In [52]:
df_dist_m = pd.merge(df_site, df_dist, on='site_id', how = 'right')
df_dist_m.head()

Unnamed: 0,site_id,change_label,change_type,metric,burst_id,jaccard_score,total_samples,total_accuracy,precision,recall,f1_score,commission_error,ommission_error,supp_val,supp_prod_change,supp_prod_change_perc
0,104,noChange,No Change,cusum_prob_max,T144-308010-IW2,0.0,33,0.909091,0.0,1.0,0.0,1.0,0.0,0,3,0.090909
1,104,noChange,No Change,cusum_vh,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
2,104,noChange,No Change,log_ratio_vh,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0
3,104,noChange,No Change,mahalanobis_1d_max,T144-308010-IW2,0.0,33,0.0,0.0,1.0,0.0,1.0,0.0,0,33,1.0
4,104,noChange,No Change,mahalanobis_2d,T144-308010-IW2,1.0,33,1.0,1.0,1.0,1.0,0.0,0.0,0,0,0.0


In [53]:
df_dist_agg_by_type = df_dist_m[['change_type', 'metric'] + AGG_COLUMNS].groupby(['change_type', 'metric']).agg(['mean', 'std'])
df_dist_agg_by_type

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
change_type,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Clear cut,cusum_prob_max,0.636364,,23.0,,0.826087,,1.0,,0.636364,,...,0.0,,0.363636,,11.0,,7.0,,0.304348,
Clear cut,cusum_vh,0.000000,,23.0,,0.521739,,1.0,,0.000000,,...,0.0,,1.000000,,11.0,,0.0,,0.000000,
Clear cut,dist-hls-gen,0.000000,,204.0,,0.441176,,0.0,,0.000000,,...,1.0,,1.000000,,108.0,,6.0,,0.029412,
Clear cut,dist-hls-veg,0.000000,,204.0,,0.470588,,1.0,,0.000000,,...,0.0,,1.000000,,108.0,,0.0,,0.000000,
Clear cut,log_ratio_vh,0.636364,,23.0,,0.826087,,1.0,,0.636364,,...,0.0,,0.363636,,11.0,,7.0,,0.304348,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wetland,log_ratio_vh,0.000000,,32.0,,0.843750,,0.0,,1.000000,,...,1.0,,0.000000,,0.0,,5.0,,0.156250,
Wetland,mahalanobis_1d_max,0.000000,,32.0,,0.000000,,0.0,,1.000000,,...,1.0,,0.000000,,0.0,,32.0,,1.000000,
Wetland,mahalanobis_2d,1.000000,,32.0,,1.000000,,1.0,,1.000000,,...,0.0,,0.000000,,0.0,,0.0,,0.000000,
Wetland,mahalanobis_vh,0.000000,,32.0,,0.000000,,0.0,,1.000000,,...,1.0,,0.000000,,0.0,,32.0,,1.000000,


In [54]:
df_dist_agg_by_type_count = df_dist_m[['change_type', 'metric'] + AGG_COLUMNS].groupby(['change_type', 'metric']).size().rename(index="count")#.reset_index(drop=False)
df_dist_agg_by_type_count.head()

change_type  metric        
Clear cut    cusum_prob_max    1
             cusum_vh          1
             dist-hls-gen      1
             dist-hls-veg      1
             log_ratio_vh      1
Name: count, dtype: int64

In [55]:
df_dist_agg_by_type['count'] = df_dist_agg_by_type_count
df_dist_agg_by_type.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 22_level_1
change_type,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Clear cut,cusum_prob_max,0.636364,,23.0,,0.826087,,1.0,,0.636364,,...,,0.363636,,11.0,,7.0,,0.304348,,1
Clear cut,cusum_vh,0.0,,23.0,,0.521739,,1.0,,0.0,,...,,1.0,,11.0,,0.0,,0.0,,1
Clear cut,dist-hls-gen,0.0,,204.0,,0.441176,,0.0,,0.0,,...,,1.0,,108.0,,6.0,,0.029412,,1
Clear cut,dist-hls-veg,0.0,,204.0,,0.470588,,1.0,,0.0,,...,,1.0,,108.0,,0.0,,0.0,,1
Clear cut,log_ratio_vh,0.636364,,23.0,,0.826087,,1.0,,0.636364,,...,,0.363636,,11.0,,7.0,,0.304348,,1


In [72]:
df_reindexed = df_dist_agg_by_type.reindex(pd.MultiIndex.from_product([df_dist_agg_by_type.index.levels[0], METRIC_ORDER], names=df_dist_agg_by_type.index.names))
df_reindexed

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 22_level_1
change_type,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Clear cut,dist-hls-veg,0.000000,,204.0,,0.470588,,1.0,,0.000000,,...,,1.000000,,108.0,,0.0,,0.000000,,1
Clear cut,dist-hls-gen,0.000000,,204.0,,0.441176,,0.0,,0.000000,,...,,1.000000,,108.0,,6.0,,0.029412,,1
Clear cut,cusum_prob_max,0.636364,,23.0,,0.826087,,1.0,,0.636364,,...,,0.363636,,11.0,,7.0,,0.304348,,1
Clear cut,cusum_vh,0.000000,,23.0,,0.521739,,1.0,,0.000000,,...,,1.000000,,11.0,,0.0,,0.000000,,1
Clear cut,log_ratio_vh,0.636364,,23.0,,0.826087,,1.0,,0.636364,,...,,0.363636,,11.0,,7.0,,0.304348,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wetland,log_ratio_vh,0.000000,,32.0,,0.843750,,0.0,,1.000000,,...,,0.000000,,0.0,,5.0,,0.156250,,1
Wetland,mahalanobis_1d_max,0.000000,,32.0,,0.000000,,0.0,,1.000000,,...,,0.000000,,0.0,,32.0,,1.000000,,1
Wetland,mahalanobis_2d,1.000000,,32.0,,1.000000,,1.0,,1.000000,,...,,0.000000,,0.0,,0.0,,0.000000,,1
Wetland,mahalanobis_vh,0.000000,,32.0,,0.000000,,0.0,,1.000000,,...,,0.000000,,0.0,,32.0,,1.000000,,1


In [73]:
cm = sns.light_palette("blue", as_cmap=True)
df_dist_agg_by_type_styled = df_reindexed.style.background_gradient(cmap=cm)
df_dist_agg_by_type_styled

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,f1_score,f1_score,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 24_level_1
change_type,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
Clear cut,dist-hls-veg,0.0,,204.0,,0.470588,,1.0,,0.0,,0.0,,0.0,,1.0,,108.0,,0.0,,0.0,,1
Clear cut,dist-hls-gen,0.0,,204.0,,0.441176,,0.0,,0.0,,0.0,,1.0,,1.0,,108.0,,6.0,,0.029412,,1
Clear cut,cusum_prob_max,0.636364,,23.0,,0.826087,,1.0,,0.636364,,0.777778,,0.0,,0.363636,,11.0,,7.0,,0.304348,,1
Clear cut,cusum_vh,0.0,,23.0,,0.521739,,1.0,,0.0,,0.0,,0.0,,1.0,,11.0,,0.0,,0.0,,1
Clear cut,log_ratio_vh,0.636364,,23.0,,0.826087,,1.0,,0.636364,,0.777778,,0.0,,0.363636,,11.0,,7.0,,0.304348,,1
Clear cut,mahalanobis_1d_max,0.478261,,23.0,,0.478261,,0.478261,,1.0,,0.647059,,0.521739,,0.0,,11.0,,23.0,,1.0,,1
Clear cut,mahalanobis_2d,0.636364,,23.0,,0.826087,,1.0,,0.636364,,0.777778,,0.0,,0.363636,,11.0,,7.0,,0.304348,,1
Clear cut,mahalanobis_vh,0.478261,,23.0,,0.478261,,0.478261,,1.0,,0.647059,,0.521739,,0.0,,11.0,,23.0,,1.0,,1
Clear cut,transformer,0.545455,,23.0,,0.782609,,1.0,,0.545455,,0.705882,,0.0,,0.454545,,11.0,,6.0,,0.26087,,1
Clear cut/fire,dist-hls-veg,0.0,,233.0,,0.939914,,1.0,,0.0,,0.0,,0.0,,1.0,,14.0,,0.0,,0.0,,1


In [57]:
 df_dist_agg_by_type_styled.to_excel(across_sites_dir / 'dist_all_sites_by_type.xlsx')

# By Change Label

In [58]:
df_dist_agg_by_label = df_dist_m[['change_label', 'metric'] + AGG_COLUMNS].groupby(['change_label', 'metric']).agg(['mean', 'std'])
df_dist_agg_by_label.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
change_label,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
OCmaj,cusum_prob_max,0.166667,0.408248,21.833333,13.482087,0.632639,0.343354,0.333333,0.516398,0.333333,0.516398,...,0.666667,0.516398,0.666667,0.516398,2.166667,2.041241,3.833333,3.763863,0.274306,0.374633
OCmaj,cusum_vh,0.333333,0.516398,21.833333,13.482087,0.906944,0.073488,1.0,0.0,0.333333,0.516398,...,0.0,0.0,0.666667,0.516398,2.166667,2.041241,0.0,0.0,0.0,0.0
OCmaj,dist-hls-gen,0.333333,0.57735,167.666667,53.519467,0.927262,0.074375,1.0,0.0,0.333333,0.57735,...,0.0,0.0,0.666667,0.57735,13.666667,17.214335,0.0,0.0,0.0,0.0
OCmaj,dist-hls-veg,0.333333,0.57735,167.666667,53.519467,0.927262,0.074375,1.0,0.0,0.333333,0.57735,...,0.0,0.0,0.666667,0.57735,13.666667,17.214335,0.0,0.0,0.0,0.0
OCmaj,log_ratio_vh,0.0,0.0,21.833333,13.482087,0.526736,0.277309,0.166667,0.408248,0.333333,0.516398,...,0.833333,0.408248,0.666667,0.516398,2.166667,2.041241,7.0,4.516636,0.380208,0.33219


In [74]:
df_dist_agg_by_label_count = df_dist_m[['change_label', 'metric'] + AGG_COLUMNS].groupby(['change_label', 'metric']).size().rename(index="count")#.reset_index(drop=False)
df_dist_agg_by_label_count.head()

change_label  metric        
OCmaj         cusum_prob_max    6
              cusum_vh          6
              dist-hls-gen      3
              dist-hls-veg      3
              log_ratio_vh      6
Name: count, dtype: int64

In [75]:
df_dist_agg_by_label['count'] = df_dist_agg_by_label_count
df_dist_agg_by_label

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 22_level_1
change_label,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
OCmaj,cusum_prob_max,0.166667,0.408248,21.833333,13.482087,0.632639,0.343354,0.333333,0.516398,0.333333,0.516398,...,0.516398,0.666667,0.516398,2.166667,2.041241,3.833333,3.763863,0.274306,0.374633,6
OCmaj,cusum_vh,0.333333,0.516398,21.833333,13.482087,0.906944,0.073488,1.0,0.0,0.333333,0.516398,...,0.0,0.666667,0.516398,2.166667,2.041241,0.0,0.0,0.0,0.0,6
OCmaj,dist-hls-gen,0.333333,0.57735,167.666667,53.519467,0.927262,0.074375,1.0,0.0,0.333333,0.57735,...,0.0,0.666667,0.57735,13.666667,17.214335,0.0,0.0,0.0,0.0,3
OCmaj,dist-hls-veg,0.333333,0.57735,167.666667,53.519467,0.927262,0.074375,1.0,0.0,0.333333,0.57735,...,0.0,0.666667,0.57735,13.666667,17.214335,0.0,0.0,0.0,0.0,3
OCmaj,log_ratio_vh,0.0,0.0,21.833333,13.482087,0.526736,0.277309,0.166667,0.408248,0.333333,0.516398,...,0.408248,0.666667,0.516398,2.166667,2.041241,7.0,4.516636,0.380208,0.33219,6
OCmaj,mahalanobis_1d_max,0.093056,0.073488,21.833333,13.482087,0.093056,0.073488,0.093056,0.073488,1.0,0.0,...,0.073488,0.0,0.0,2.166667,2.041241,21.833333,13.482087,1.0,0.0,6
OCmaj,mahalanobis_2d,0.166667,0.408248,21.833333,13.482087,0.587847,0.340339,0.333333,0.516398,0.333333,0.516398,...,0.516398,0.666667,0.516398,2.166667,2.041241,5.166667,4.708149,0.319097,0.365862,6
OCmaj,mahalanobis_vh,0.093056,0.073488,21.833333,13.482087,0.093056,0.073488,0.093056,0.073488,1.0,0.0,...,0.073488,0.0,0.0,2.166667,2.041241,21.833333,13.482087,1.0,0.0,6
OCmaj,transformer,0.333333,0.516398,21.833333,13.482087,0.870486,0.127165,0.833333,0.408248,0.333333,0.516398,...,0.408248,0.666667,0.516398,2.166667,2.041241,1.166667,2.857738,0.036458,0.089304,6
OCmin,cusum_prob_max,1.0,0.0,28.4,7.595111,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15


In [76]:
df_reindexed = df_dist_agg_by_label.reindex(pd.MultiIndex.from_product([df_dist_agg_by_label.index.levels[0], METRIC_ORDER], names=df_dist_agg_by_label.index.names))
df_reindexed

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,...,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 22_level_1
change_label,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
OCmaj,dist-hls-veg,0.333333,0.57735,167.666667,53.519467,0.927262,0.074375,1.0,0.0,0.333333,0.57735,...,0.0,0.666667,0.57735,13.666667,17.214335,0.0,0.0,0.0,0.0,3
OCmaj,dist-hls-gen,0.333333,0.57735,167.666667,53.519467,0.927262,0.074375,1.0,0.0,0.333333,0.57735,...,0.0,0.666667,0.57735,13.666667,17.214335,0.0,0.0,0.0,0.0,3
OCmaj,cusum_prob_max,0.166667,0.408248,21.833333,13.482087,0.632639,0.343354,0.333333,0.516398,0.333333,0.516398,...,0.516398,0.666667,0.516398,2.166667,2.041241,3.833333,3.763863,0.274306,0.374633,6
OCmaj,cusum_vh,0.333333,0.516398,21.833333,13.482087,0.906944,0.073488,1.0,0.0,0.333333,0.516398,...,0.0,0.666667,0.516398,2.166667,2.041241,0.0,0.0,0.0,0.0,6
OCmaj,log_ratio_vh,0.0,0.0,21.833333,13.482087,0.526736,0.277309,0.166667,0.408248,0.333333,0.516398,...,0.408248,0.666667,0.516398,2.166667,2.041241,7.0,4.516636,0.380208,0.33219,6
OCmaj,mahalanobis_1d_max,0.093056,0.073488,21.833333,13.482087,0.093056,0.073488,0.093056,0.073488,1.0,0.0,...,0.073488,0.0,0.0,2.166667,2.041241,21.833333,13.482087,1.0,0.0,6
OCmaj,mahalanobis_2d,0.166667,0.408248,21.833333,13.482087,0.587847,0.340339,0.333333,0.516398,0.333333,0.516398,...,0.516398,0.666667,0.516398,2.166667,2.041241,5.166667,4.708149,0.319097,0.365862,6
OCmaj,mahalanobis_vh,0.093056,0.073488,21.833333,13.482087,0.093056,0.073488,0.093056,0.073488,1.0,0.0,...,0.073488,0.0,0.0,2.166667,2.041241,21.833333,13.482087,1.0,0.0,6
OCmaj,transformer,0.333333,0.516398,21.833333,13.482087,0.870486,0.127165,0.833333,0.408248,0.333333,0.516398,...,0.408248,0.666667,0.516398,2.166667,2.041241,1.166667,2.857738,0.036458,0.089304,6
OCmin,dist-hls-veg,1.0,0.0,182.666667,28.138053,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9


In [77]:
cm = sns.light_palette("blue", as_cmap=True)
df_dist_agg_by_label_styled = df_reindexed.style.background_gradient(cmap=cm)
df_dist_agg_by_type_styled

Unnamed: 0_level_0,Unnamed: 1_level_0,jaccard_score,jaccard_score,total_samples,total_samples,total_accuracy,total_accuracy,precision,precision,recall,recall,f1_score,f1_score,commission_error,commission_error,ommission_error,ommission_error,supp_val,supp_val,supp_prod_change,supp_prod_change,supp_prod_change_perc,supp_prod_change_perc,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,Unnamed: 24_level_1
change_type,metric,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
Clear cut,dist-hls-veg,0.0,,204.0,,0.470588,,1.0,,0.0,,0.0,,0.0,,1.0,,108.0,,0.0,,0.0,,1
Clear cut,dist-hls-gen,0.0,,204.0,,0.441176,,0.0,,0.0,,0.0,,1.0,,1.0,,108.0,,6.0,,0.029412,,1
Clear cut,cusum_prob_max,0.636364,,23.0,,0.826087,,1.0,,0.636364,,0.777778,,0.0,,0.363636,,11.0,,7.0,,0.304348,,1
Clear cut,cusum_vh,0.0,,23.0,,0.521739,,1.0,,0.0,,0.0,,0.0,,1.0,,11.0,,0.0,,0.0,,1
Clear cut,log_ratio_vh,0.636364,,23.0,,0.826087,,1.0,,0.636364,,0.777778,,0.0,,0.363636,,11.0,,7.0,,0.304348,,1
Clear cut,mahalanobis_1d_max,0.478261,,23.0,,0.478261,,0.478261,,1.0,,0.647059,,0.521739,,0.0,,11.0,,23.0,,1.0,,1
Clear cut,mahalanobis_2d,0.636364,,23.0,,0.826087,,1.0,,0.636364,,0.777778,,0.0,,0.363636,,11.0,,7.0,,0.304348,,1
Clear cut,mahalanobis_vh,0.478261,,23.0,,0.478261,,0.478261,,1.0,,0.647059,,0.521739,,0.0,,11.0,,23.0,,1.0,,1
Clear cut,transformer,0.545455,,23.0,,0.782609,,1.0,,0.545455,,0.705882,,0.0,,0.454545,,11.0,,6.0,,0.26087,,1
Clear cut/fire,dist-hls-veg,0.0,,233.0,,0.939914,,1.0,,0.0,,0.0,,0.0,,1.0,,14.0,,0.0,,0.0,,1


In [79]:
 df_dist_agg_by_type_styled.to_excel(across_sites_dir / 'dist_all_sites_by_label.xlsx')