# Report over Segmentation Results #

In [116]:
from pathlib import Path

# Remote Sensing DDPM
from remote_sensing_ddpm.train_downstream_tasks import create_wandb_run_name

# Lit-Diffusion
from lit_diffusion.diffusion_base.constants import LOGGING_TRAIN_PREFIX, LOGGING_VAL_PREFIX
from lit_diffusion.constants import (
    PL_WANDB_LOGGER_CONFIG_KEY,
)

# FWTP
from from_wandb_to_paper.data.wandb_data import get_wandb_run_histories

## Get Data ##

In [117]:
FEATURE_EXTRACTOR_FILES = Path("../../config/model_configs/downstream_tasks/feature_extractors")
EWC_CLASSIFICATION_CONFIG_PATH = Path("../../config/model_configs/downstream_tasks/tier_1/ewc-segmentation.yaml")
EXPERIMENT_NAMES = [
    create_wandb_run_name(backbone_name=backbone_path.name, downstream_head_name=EWC_CLASSIFICATION_CONFIG_PATH.name)
    for backbone_path in FEATURE_EXTRACTOR_FILES.glob("*.yaml")
]
WAND_PROJECT_ID = "ssl-diffusion/rs-ddpm-ms-segmentation"
RUN_FILTER = {"name": {"$in": ["2p2n4tv2", "aa2yxpin", "ues188te", "pws0vl7w", "exffot4s"]}}

In [118]:
run_histories = get_wandb_run_histories(
    project_id=WAND_PROJECT_ID, run_filter=RUN_FILTER
)

Loading history: 38835it [00:22, 1717.80it/s]
Loading history: 38835it [00:20, 1940.61it/s]
Loading history: 38835it [00:21, 1775.16it/s]
Loading history: 38835it [00:32, 1201.29it/s]
Loading history: 38835it [00:21, 1768.75it/s]


## Report Metrics ##

In [7]:
# TODO: Graph showing the epoch validation trajectory of the mIoU/Accuracy for each modality

In [119]:
# TODO: Table showing metrics for the mIoU/accuracy of each modality with standard deviation
for k, v in run_histories.items():
    run_histories[k]["history"] = pd.DataFrame(run_histories[k]["history"])

In [120]:
import numpy as np
HISTORY_KEY = "history"
EPOCH_KEY = "epoch"

metrics_of_interest = ["train/mIoU", "val/mIoU"]
aggregate = {}
for metric in metrics_of_interest:
    run_metrics = []
    for k, v in run_histories.items():
        current_history_df = v["history"]
        metric_per_epoch = current_history_df.loc[current_history_df[metric].notnull(), [metric, EPOCH_KEY]].groupby(EPOCH_KEY).apply(lambda x: x.mean())
        metric_per_epoch = metric_per_epoch.to_dict()[metric]
        run_metrics += [metric_per_epoch]
    epochs = run_metrics[0].keys()
    
    mean_aggregated_metrics = {}
    std_aggregated_metrics = {}
    for epoch in epochs:
        tmp_lst = []
        for run in run_metrics:
            tmp_lst += [run[epoch]]
        mean_aggregated_metrics[epoch] = np.mean(tmp_lst)
        std_aggregated_metrics[epoch] = np.std(tmp_lst)
print(mean_aggregated_metrics)
print(std_aggregated_metrics)

{0.0: 0.3656998038291931, 1.0: 0.38486196398735045, 2.0: 0.3921057641506195, 3.0: 0.39617117047309874, 4.0: 0.39960357546806335}
{0.0: 0.010673600776803214, 1.0: 0.009712695409236307, 2.0: 0.0063447227788231, 3.0: 0.003367728289059368, 4.0: 0.0013683929234165457}


In [8]:
# TODO: Table showing classwise metrics for the mIoU/accuracy of each modality with standard deviation

## Report Label Fraction Experiments ##

In [None]:
# TODO Graph showing the mIoU/accuracy per label fraction used each modality 