# Setup

In [None]:
import geopandas as gpd
import pandas as pd
import leafmap as leafmap
from shapely.ops import unary_union
from shapely.geometry import Point, mapping, box, shape
import shapely
from typing import List
import os
from tqdm import tqdm
tqdm.pandas()

import sys
sys.path.append("..")

os.getcwd()
os.chdir("..")
root = os.path.dirname(os.getcwd())
# root = root + "/workspaces/mine-segmentation" # uncomment when running in Lightning Studios
root

In [None]:
test_dataset_annotations = root + "/data/raw/mining_tiles_test_annotations.gpkg"
DATASET = root + "/data/processed/mining_tiles_with_masks_and_bounding_boxes.gpkg"

## ONE-TIME Step: Add validated polygons to Dataset

In [None]:
if not os.path.exists(test_dataset_annotations):
    raise FileNotFoundError(f"Dataset {test_dataset_annotations} does not exist")

existing_layers = gpd.list_layers(DATASET).name.to_list()
print(existing_layers)

if not "test_polygons_validated" in existing_layers:
    test_polygons_validated = gpd.read_file(test_dataset_annotations, layer="polygons_annotated")
    test_polygons_validated.to_file(DATASET, layer="test_polygons_validated", driver="GPKG")
    print("Layer test_polygons_validated created in DATASET")
else:
    test_polygons_validated = gpd.read_file(DATASET, layer="test_polygons_validated")
    print("Layer test_polygons_validated already exists, read from DATASET")

## Check the validated polygons

In [None]:
# load original and validated polygons
tiles = gpd.read_file(DATASET, layer="tiles")
test_tiles = tiles[tiles["split"] == "test"]

# load original and validated polygons
test_polygons_original = gpd.read_file(DATASET, layer="preferred_polygons")
test_polygons_original = pd.merge(test_tiles[["tile_id"]], test_polygons_original, on="tile_id", how="left")

test_polygons_validated = gpd.read_file(DATASET, layer="test_polygons_validated")
test_polygons_validated = pd.merge(test_tiles[["tile_id"]], test_polygons_validated, on="tile_id", how="left")

# load original polygons for maus and tang
maus_test_polygons_original = gpd.read_file(DATASET, layer="maus_polygons")
maus_test_polygons_original = pd.merge(test_tiles[test_tiles["preferred_dataset"] == "maus"][["tile_id"]], maus_test_polygons_original, on="tile_id", how="left")

tang_test_polygons_original = gpd.read_file(DATASET, layer="tang_polygons")
tang_test_polygons_original = pd.merge(test_tiles[test_tiles["preferred_dataset"] == "tang"][["tile_id"]], tang_test_polygons_original, on="tile_id", how="left")

In [None]:
test_polygons_original.head(3)

In [None]:
test_polygons_validated.head(3)

In [None]:
# combine the two dataframes into one
test_polygons = pd.merge(test_polygons_original, test_polygons_validated, on="tile_id", suffixes=("_original", "_validated"))
test_polygons.head(3)

In [None]:
# add the tile geometry to the dataframe
test_polygons = pd.merge(test_polygons, test_tiles[["tile_id", "geometry"]], on="tile_id")

In [None]:
# calculate the metrics for the original and validated polygons
def calculate_metrics(row: pd.Series) -> pd.Series:
    original = row.geometry_original
    validated = row.geometry_validated
    tile = row.geometry

    true_positive_area = original.intersection(validated).area # both original and validated are present
    false_negative_area = validated.difference(original).area # validated is present, but original is not, thus it is a false negative
    false_positive_area = original.difference(validated).area # original is present, but validated is not, thus it is a false positive
    true_negative_area = tile.area - original.union(validated).area # neither original nor validated are present

    accuracy = (true_positive_area + true_negative_area) / tile.area
    precision = true_positive_area / (true_positive_area + false_positive_area)
    recall = true_positive_area / (true_positive_area + false_negative_area)
    f1 = 2 * precision * recall / (precision + recall)
    iou = true_positive_area / original.union(validated).area
    sensitivity = true_positive_area / original.area
    specificity = true_negative_area / (true_negative_area + false_positive_area)


    # accuracy = original.intersection(validated).area / max(original.area, validated.area)
    # precision = original.intersection(validated).area / original.area
    # recall = original.intersection(validated).area / validated.area
    # f1 = 2 * original.intersection(validated).area / (original.area + validated.area)
    # iou = original.intersection(validated).area / original.union(validated).area
    # sensitivity = original.intersection(validated).area / original.area
    # specificity = 1 - (original.difference(validated).area / original.area)

    return pd.Series({
        "tile_id": row.tile_id,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "iou": iou,
        "sensitivity": sensitivity,
        "specificity": specificity,
        "true_positive_area": true_positive_area,
        "false_negative_area": false_negative_area,
        "false_positive_area": false_positive_area,
        "true_negative_area": true_negative_area
    })

test_polygons_metrics = test_polygons.progress_apply(calculate_metrics, axis=1)
test_polygons_metrics.head(3)

In [None]:
# print the averages
test_polygons_metrics[["accuracy", "precision", "recall", "f1", "iou", "sensitivity", "specificity"]].mean()

In [None]:
# plot histograms for all the metrics
test_polygons_metrics[["accuracy", "precision", "recall", "f1", "iou", "sensitivity", "specificity"]].hist(figsize=(20, 20))

In [None]:
true_positive_area = test_polygons_metrics["true_positive_area"].mean()
false_negative_area = test_polygons_metrics["false_negative_area"].mean()
false_positive_area = test_polygons_metrics["false_positive_area"].mean()
true_negative_area = test_polygons_metrics["true_negative_area"].mean()
sum_areas = true_positive_area + false_negative_area + false_positive_area + true_negative_area

# calculate percentages
true_positive_area = true_positive_area / sum_areas * 100
false_negative_area = false_negative_area / sum_areas * 100
false_positive_area = false_positive_area / sum_areas * 100
true_negative_area = true_negative_area / sum_areas * 100

from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

confusion_matrix_values = np.array([[true_positive_area, false_negative_area], [false_positive_area, true_negative_area]])
class_names = ['Mining Area', 'No Mining Area']

disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix_values, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)
plt.xlabel('Original Polygon')
plt.ylabel('Validated Polygon')

# remove the bar 
plt.show()

In [None]:
# combine the metrics with the polygons
test_polygons = pd.merge(test_polygons, test_polygons_metrics, on="tile_id")
test_polygons.sort_values("iou")

In [None]:
test_polygons = test_polygons.sort_values("iou", ascending=True).reset_index(drop=True)
test_polygons.head(3)

In [None]:
index = 0

m = leafmap.Map(
        center=[test_polygons.geometry_original[index].centroid.y, test_polygons.geometry_original[index].centroid.x], 
        zoom=12,
        height="900px"
    )

# add satellite
m.add_basemap("SATELLITE")

original_polygon = test_polygons.geometry_original[index]
validated_polygon = test_polygons.geometry_validated[index]

print(test_polygons.iou[index])
print(test_polygons.tile_id[index])

# add the original polygon
m.add_geojson(mapping(original_polygon), layer_name="original", style={"color": "blue", "fillOpacity": 0.5})

# add the validated polygon
m.add_geojson(mapping(validated_polygon), layer_name="validated", style={"color": "red", "fillOpacity": 0.5})

# add the tile
m.add_geojson(mapping(test_polygons.geometry[index]), layer_name="tile", style={"color": "orange", "fillOpacity": 0.0})
m

## Calculate metrics for Maus and Tang Polygons

In [None]:
test_polygons_maus = pd.merge(maus_test_polygons_original, test_polygons_validated, on="tile_id", suffixes=("_original", "_validated"))
test_polygons_tang = pd.merge(tang_test_polygons_original, test_polygons_validated, on="tile_id", suffixes=("_original", "_validated"))

# add the tile geometry to the dataframe
test_polygons_maus = pd.merge(test_polygons_maus, test_tiles[["tile_id", "geometry"]], on="tile_id")
test_polygons_tang = pd.merge(test_polygons_tang, test_tiles[["tile_id", "geometry"]], on="tile_id")

test_polygons_maus_metrics = test_polygons_maus.progress_apply(calculate_metrics, axis=1)
test_polygons_tang_metrics = test_polygons_tang.progress_apply(calculate_metrics, axis=1)

test_polygons_maus = pd.merge(test_polygons_maus, test_polygons_maus_metrics, on="tile_id")
test_polygons_tang = pd.merge(test_polygons_tang, test_polygons_tang_metrics, on="tile_id")


### Maus

In [None]:
# print the averages
test_polygons_maus[["accuracy", "precision", "recall", "f1", "iou", "sensitivity", "specificity"]].mean()

In [None]:
# plot histograms for all the metrics
test_polygons_maus[["accuracy", "precision", "recall", "f1", "iou", "sensitivity", "specificity"]].hist(figsize=(20, 20))

### Tang

In [None]:
# print the averages
test_polygons_tang[["accuracy", "precision", "recall", "f1", "iou", "sensitivity", "specificity"]].mean()

In [None]:
# plot histograms for all the metrics
test_polygons_tang[["accuracy", "precision", "recall", "f1", "iou", "sensitivity", "specificity"]].hist(figsize=(20, 20))