In [15]:
import rasterio as rio
import geopandas as gpd
from pathlib import Path
import pandas as pd
import numpy as np
import omnicloudmask

In [16]:
ocm_version = omnicloudmask.__version__
ocm_version

'1.5.0'

In [17]:
dataset_dir = Path.cwd() / "dataset"
preds_dir = dataset_dir / f"OCM preds v{ocm_version}"
preds_dir.exists()

True

In [18]:
val_points_path = Path.cwd() / "PlanetScope_validation.gpkg"
val_points = gpd.read_file(val_points_path)
val_points.head()

Unnamed: 0,raster name,class_name,geometry
0,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.8056 64.17455)
1,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.79537 64.18172)
2,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.74997 64.17825)
3,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.7839 64.20779)
4,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.73451 64.19856)


In [19]:
masks = list(preds_dir.glob("*.tif"))
len(masks)

18

In [20]:
clear_values = [0]
cloud_values = [1, 2]
shadow_values = [3]

In [None]:
for mask_path in masks:
    file_name = mask_path.stem
    scene_name = "_".join(file_name.split("_")[:5]) + "_AnalyticMS_SR_8b"
    filter_val_points = val_points[val_points["raster name"] == scene_name]
    src = rio.open(mask_path)
    local_projection = src.crs
    reprojected_val_points = filter_val_points.to_crs(local_projection)
    x, y = (
        reprojected_val_points["geometry"].x.to_list(),
        reprojected_val_points["geometry"].y.to_list(),
    )
    pred_values = list(rio.sample.sample_gen(src, zip(x, y), masked=False))  # type: ignore

    pred_values = [int(pred[0]) for pred in pred_values]
    remapped_values = []
    for val in pred_values:
        if val in cloud_values:
            remapped_values.append("cloud")
        elif val in shadow_values:
            remapped_values.append("shadow")
        elif val in clear_values:
            remapped_values.append("clear")
        else:
            print(f"unexpected value: {val}")

    # place back into the original dataframe
    val_points.loc[filter_val_points.index, "Prediction"] = remapped_values

In [22]:
val_points

Unnamed: 0,raster name,class_name,geometry,Prediction
0,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.8056 64.17455),cloud
1,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.79537 64.18172),cloud
2,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.74997 64.17825),cloud
3,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.7839 64.20779),cloud
4,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.73451 64.19856),cloud
...,...,...,...,...
5218,20231023_100346_43_24bf_3B_AnalyticMS_SR_8b,clear,POINT (-6.06016 12.37267),clear
5219,20231023_100346_43_24bf_3B_AnalyticMS_SR_8b,clear,POINT (-6.07231 12.37101),clear
5220,20231023_100346_43_24bf_3B_AnalyticMS_SR_8b,clear,POINT (-6.08445 12.3684),clear
5221,20231023_100346_43_24bf_3B_AnalyticMS_SR_8b,clear,POINT (-6.06939 12.34468),clear


In [23]:
# one hot encode the prediction and truth columns
val_points = pd.concat(
    [
        val_points,
        pd.get_dummies(
            val_points["class_name"],
        ),
    ],
    axis=1,
)

val_points = pd.concat(
    [val_points, pd.get_dummies(val_points["Prediction"], prefix="pred")], axis=1
)

val_points.head(5)

Unnamed: 0,raster name,class_name,geometry,Prediction,clear,cloud,shadow,pred_clear,pred_cloud,pred_shadow
0,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.8056 64.17455),cloud,False,True,False,False,True,False
1,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.79537 64.18172),cloud,False,True,False,False,True,False
2,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.74997 64.17825),cloud,False,True,False,False,True,False
3,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.7839 64.20779),cloud,False,True,False,False,True,False
4,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.73451 64.19856),cloud,False,True,False,False,True,False


In [24]:
def get_stats(labels, preds):
    tp = np.sum(labels * preds)
    tn = np.sum((1 - labels) * (1 - preds))
    fp = np.sum((1 - labels) * preds)
    fn = np.sum(labels * (1 - preds))
    if tp + fp == 0:
        ua = 0
    else:
        ua = tp / (tp + fp)
    if tp + fn == 0:
        pa = 0
    else:
        pa = tp / (tp + fn)
    return {
        "TP": tp,
        "TN": tn,
        "FP": fp,
        "FN": fn,
        "UA": ua,
        "PA": pa,
        "OA": (tp + tn) / (tp + tn + fp + fn),
        "BOA": 0.5 * (pa + (tn / (tn + fp))),
    }

In [25]:
classes = ["clear", "cloud", "shadow"]

In [26]:
val_points

Unnamed: 0,raster name,class_name,geometry,Prediction,clear,cloud,shadow,pred_clear,pred_cloud,pred_shadow
0,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.8056 64.17455),cloud,False,True,False,False,True,False
1,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.79537 64.18172),cloud,False,True,False,False,True,False
2,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.74997 64.17825),cloud,False,True,False,False,True,False
3,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.7839 64.20779),cloud,False,True,False,False,True,False
4,20231021_172556_48_24ab_3B_AnalyticMS_SR_8b,cloud,POINT (-103.73451 64.19856),cloud,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...
5218,20231023_100346_43_24bf_3B_AnalyticMS_SR_8b,clear,POINT (-6.06016 12.37267),clear,True,False,False,True,False,False
5219,20231023_100346_43_24bf_3B_AnalyticMS_SR_8b,clear,POINT (-6.07231 12.37101),clear,True,False,False,True,False,False
5220,20231023_100346_43_24bf_3B_AnalyticMS_SR_8b,clear,POINT (-6.08445 12.3684),clear,True,False,False,True,False,False
5221,20231023_100346_43_24bf_3B_AnalyticMS_SR_8b,clear,POINT (-6.06939 12.34468),clear,True,False,False,True,False,False


In [27]:
class_stats = {}
for class_name in classes:
    labels = val_points[class_name]
    preds = val_points[f"pred_{class_name}"]
    stats = get_stats(labels, preds)

    class_stats[class_name] = stats

In [28]:
model_summary = pd.DataFrame(class_stats).transpose()
model_summary.index.name = "Class"
model_summary = model_summary.reset_index()
model_summary

Unnamed: 0,Class,TP,TN,FP,FN,UA,PA,OA,BOA
0,clear,1699.0,3433.0,49.0,42.0,0.971968,0.975876,0.982577,0.980902
1,cloud,1716.0,3455.0,27.0,25.0,0.984509,0.98564,0.990044,0.988943
2,shadow,1707.0,3457.0,25.0,34.0,0.985566,0.980471,0.988704,0.986646
