First, let's set some of the parameters used throughout this notebook. We'll be focused on a single geographic split -- change the split ID to see another run.

In [None]:
import pathlib

geo_dir = pathlib.Path("/datadrive/glaciers/expers/geographic")
split_dir = geo_dir / "splits/2"
root_dir = pathlib.Path("/home/kris/glacier_mapping/")

Next, let's load in the final trained model from this experiment.

In [None]:
import torch
import yaml
from glacier_mapping.models.frame import Framework
from glacier_mapping.models.metrics import diceloss
from glacier_mapping.data.data import fetch_loaders
from glacier_mapping.train import validate
from addict import Dict

conf = yaml.safe_load(open(root_dir / "conf/train.yaml", "r"))
conf = Dict(conf)

frame = Framework(
    model_opts=conf.model_opts,
    optimizer_opts=conf.optim_opts,
    reg_opts=conf.reg_opts,
    loss_fn=diceloss(act=torch.nn.Softmax(dim=1), w=[1, 1, 0])
)

state = torch.load(split_dir / "runs/demo/models/model_final.pt")
frame.model.load_state_dict(state)

At this point, we can compute metrics across all the batches in the train, development, and test sets.

In [None]:
from glacier_mapping.models.metrics import tp_fp_fn

train_loaders = fetch_loaders(split_dir, 16)
test_loaders = fetch_loaders(split_dir, 16, dev_folder = "test")


def error_row(rates, ix=1, split="train"):
    tp, fp, fn = rates
    tp, fp, fn = tp.cpu().numpy(), fp.cpu().numpy(), fn.cpu().numpy()
    return {
        "split": split,
        "tp": tp[ix],
        "fp": fp[ix],
        "fn": fn[ix],
        "IoU": tp[ix] / (tp[ix] + fp[ix] + fn[ix])
    }


def aggregate_rates(metrics, loader, split="train"):
    device = torch.device("cuda")
    
    for x, y in loader:
        y_hat = frame.infer(x.to(device))
        rates = tp_fp_fn(y_hat > 0.4, y.to(device))
        metrics["debris"].append(error_row(rates, 1, split))
        metrics["clean_ice"].append(error_row(rates, 0, split))
        
    return metrics

metrics = {"debris": [], "clean_ice": []}
metrics = aggregate_rates(metrics, train_loaders["train"], "train")
metrics = aggregate_rates(metrics, train_loaders["val"], "val")
metrics = aggregate_rates(metrics, test_loaders["val"], "test")

Now, we can get the overall IoU, by summing the true and false positive pixels over all terms.

In [None]:
import pandas as pd

debris = pd.DataFrame(metrics["debris"])
clean_ice = pd.DataFrame(metrics["clean_ice"])

for split in ["train", "val", "test"]:
    debris_ = debris[debris["split"] == split]
    clean_ = clean_ice[clean_ice["split"] == split]
    print(debris_["tp"].sum() / (debris_["tp"].sum() + debris_["fp"].sum() + debris_["fn"].sum()))
    print(clean_["tp"].sum() / (clean_["tp"].sum() + clean_["fp"].sum() + clean_["fn"].sum()))

It'd be useful to have some context about which slices get the highest / lowest errors. Do you get worse performance the further away you get from the training area? To answer this, we'll want to look at the slice metadata.

In [None]:
import geopandas as gpd
slice_meta = gpd.read_file("/datadrive/glaciers/processed_exper/slices/slices.geojson", crs="EPSG:3857")
slice_meta = slice_meta.to_crs("EPSG:4326")
slice_meta["img_basename"] = [pathlib.Path(s).stem for s in slice_meta["img_slice"]]

In [None]:
slice_meta[slice_meta["img_basename"] == "slice_11_img_081"]

paths = {}
paths["train"] = train_loaders["train"].dataset.img_files
paths["val"] = train_loaders["val"].dataset.img_files
paths["test"] = test_loaders["val"].dataset.img_files