In [None]:
from IPython.display import clear_output
import geopandas as gpd
import pandas as pd
from shapely import wkt
from shapely.geometry import Point, MultiPolygon, Polygon
from matplotlib import pyplot as plt
import numpy as np
from scipy.spatial import cKDTree
from types import SimpleNamespace
from geoalchemy2 import WKTElement
import os

In [None]:
import sys

sys.path.append(r"C:\Users\ebeva\SkyTruth\git\cerulean-cloud")
from cerulean_cloud.cloud_function_asa.utils.analyzer import InfrastructureAnalyzer

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
class VersionedInfrastructureAnalyzer(InfrastructureAnalyzer):
    def __init__(self, s1_scene, version=1, **kwargs):
        # polygon_geom = [Polygon([(0, 0), (1, 1), (1, 0)])]
        # s1_scene = gpd.GeoDataFrame(geometry=polygon_geom, crs="EPSG:4326")
        super().__init__(s1_scene, **kwargs)
        self.version = version

    def compute_coincidence_scores_for_infra(
        self,
        infra_gdf: gpd.GeoDataFrame,
        extremity_tree: cKDTree,
        all_extremity_points: np.ndarray,
        all_weights: np.ndarray,
        radius_of_interest: float,
        decay_factor: float,
        version: str = None,
    ) -> np.ndarray:
        if version is None:
            version = self.version  # Use instance version if not provided

        infra_coords = np.array([(geom.x, geom.y) for geom in infra_gdf.geometry])
        extremity_indices = extremity_tree.query_ball_point(
            infra_coords, r=radius_of_interest
        )
        coincidence_scores = np.zeros(len(infra_coords))

        for i, neighbors in enumerate(extremity_indices):
            if neighbors:
                neighbor_points = all_extremity_points[neighbors]
                neighbor_weights = all_weights[neighbors]
                dists = np.linalg.norm(neighbor_points - infra_coords[i], axis=1)

                # Compute C_i based on version
                if self.version == 1:
                    C_i = (
                        neighbor_weights
                        - self.decay_factor * dists / radius_of_interest
                    )
                elif self.version == 2:
                    C_i = neighbor_weights * (
                        1 - self.decay_factor * dists / radius_of_interest
                    )
                elif self.version == 3:
                    C_i = neighbor_weights * np.exp(
                        -self.decay_factor * dists / radius_of_interest
                    )

                coincidence_scores[i] = np.clip(C_i.max(), 0, 1)

        return coincidence_scores

In [None]:
def get_s1_scene(scene_id, download_path=os.getenv("ASA_DOWNLOAD_PATH")):
    """
    Downloads a S1 scene GeoJSON file from the specified URL if it hasn't been downloaded already.
    """
    url = f"https://api.cerulean.skytruth.org/collections/public.sentinel1_grd/items?scene_id={scene_id}&f=geojson"
    geojson_file_path = os.path.join(download_path, f"{scene_id}.geojson")
    if not os.path.exists(geojson_file_path):
        print(f"Downloading GeoJSON file for Scene {scene_id}...")
        os.system(f'curl "{url}" -o "{geojson_file_path}"')
        print(f"Downloaded GeoJSON to {geojson_file_path}")
    else:
        print(f"GeoJSON file already exists at {geojson_file_path}. Skipping download.")
    s1_gdf = gpd.read_file(geojson_file_path)
    # print(s1_gdf)
    s1_scene = SimpleNamespace(
        scene_id=scene_id,
        scihub_ingestion_time=s1_gdf.scihub_ingestion_time.iloc[0],
        start_time=s1_gdf.start_time.iloc[0],
        end_time=s1_gdf.end_time.iloc[0],
        geometry=WKTElement(str(s1_gdf.geometry.iloc[0])),
    )
    return s1_scene

In [None]:
def plot_metrics_by_decay(
    decay_rates,
    true_association_scores,
    false_association_scores,
    top_source_rate,
    top_3_source_rate,
    avg_max_score,
    version="v1",
):
    # Calculate True - False Association Scores
    true_minus_false_scores = [
        t - f for t, f in zip(true_association_scores, false_association_scores)
    ]

    # Set up the figure and subplots in a single row
    plt.figure(figsize=(24, 6))

    # Set the main title for the figure
    plt.suptitle(
        f"Metrics by Decay Rate for version {version}", fontsize=16, fontweight="bold"
    )

    # Generate positions for the decay rates (categorical x-axis)
    x = np.arange(len(decay_rates))

    # Subplot for True - False Association Scores by decay rate
    plt.subplot(1, 4, 1)
    plt.plot(x, true_minus_false_scores, marker="o", color="cornflowerblue")
    plt.xlabel("Decay Rate")
    plt.ylabel("True - False Association Scores")
    plt.title("Difference of Avg Source Score and Avg Non-Source Score")
    plt.xticks(x, decay_rates)
    plt.ylim(0, 1)
    for i, v in enumerate(true_minus_false_scores):
        plt.text(
            i,
            v + 0.01 if v >= 0 else v - 0.01,
            f"{v:.3f}",
            ha="center",
            va="bottom" if v >= 0 else "top",
        )

    # Subplot for Top Source Rate by decay rate
    plt.subplot(1, 4, 2)
    plt.plot(x, top_source_rate, marker="o", color="gold")
    plt.xlabel("Decay Rate")
    plt.ylabel("Top Source Rate")
    plt.title("Top Source Rate by Decay Rate")
    plt.xticks(x, decay_rates)
    plt.ylim(0, 1.1)
    for i, v in enumerate(top_source_rate):
        plt.text(i, v + 0.01, f"{v:.3f}", ha="center", va="bottom")

    # Subplot for Top 3 Source Rate by decay rate
    plt.subplot(1, 4, 3)
    plt.plot(x, top_3_source_rate, marker="o", color="mediumseagreen")
    plt.xlabel("Decay Rate")
    plt.ylabel("Top 3 Source Rate")
    plt.title("Top 3 Source Rate by Decay Rate")
    plt.xticks(x, decay_rates)
    plt.ylim(0, 1.1)
    for i, v in enumerate(top_3_source_rate):
        plt.text(i, v + 0.01, f"{v:.3f}", ha="center", va="bottom")

    # Subplot for Avg Max Score by decay rate
    plt.subplot(1, 4, 4)
    plt.plot(x, avg_max_score, marker="o", color="purple")
    plt.xlabel("Decay Rate")
    plt.ylabel("Avg Max Score")
    plt.title("Avg Max Score of Non-Source by Decay Rate")
    plt.xticks(x, decay_rates)
    plt.ylim(0, 0.5)
    for i, v in enumerate(avg_max_score):
        plt.text(i, v + 0.01, f"{v:.3f}", ha="center", va="bottom")

    # Adjust layout
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()


def show_plots(
    true_association_scores,
    false_association_scores,
    top_source_rate,
    top_3_source_rate,
):
    # Labels for versions and baseline
    labels = ["v1", "v2", "v3", "baseline"]

    # Define bar width and positions for each metric
    bar_width = 0.35
    x = np.arange(len(labels))

    # Calculate True minus False Association Scores
    true_scores = [np.mean(scores) for scores in true_association_scores]
    false_scores = [np.mean(scores) for scores in false_association_scores]
    true_minus_false_scores = [t - f for t, f in zip(true_scores, false_scores)]

    # Set up the figure and subplots in a single row
    plt.figure(figsize=(18, 6))

    # Subplot for True - False Association Scores
    plt.subplot(1, 3, 1)
    plt.bar(x, true_minus_false_scores, color="cornflowerblue", width=bar_width)
    plt.xlabel("Versions")
    plt.ylabel("True - False Association Scores")
    plt.title("Avg Source Score minus Avg non-source Score")
    plt.xticks(x, labels)
    plt.ylim(0, 1.1)  # Set range from 0 to 1
    for i, v in enumerate(true_minus_false_scores):
        plt.text(
            i,
            v + 0.01 if v >= 0 else v - 0.01,
            f"{v:.3f}",
            ha="center",
            va="bottom" if v >= 0 else "top",
        )

    # Subplot for Top Source Rate
    plt.subplot(1, 3, 2)
    plt.bar(x, top_source_rate, color="gold", width=bar_width)
    plt.xlabel("Versions")
    plt.ylabel("Top Source Rate")
    plt.title("Top Source Rate by Version")
    plt.xticks(x, labels)
    plt.ylim(0, 1.1)
    for i, v in enumerate(top_source_rate):
        plt.text(i, v + 0.01, f"{v:.3f}", ha="center", va="bottom")

    # Subplot for Top 3 Source Rate
    plt.subplot(1, 3, 3)
    plt.bar(x, top_3_source_rate, color="mediumseagreen", width=bar_width)
    plt.xlabel("Versions")
    plt.ylabel("Top 3 Source Rate")
    plt.title("Top 3 Source Rate by Version")
    plt.xticks(x, labels)
    plt.ylim(0, 1.1)
    for i, v in enumerate(top_3_source_rate):
        plt.text(i, v + 0.01, f"{v:.3f}", ha="center", va="bottom")

In [None]:
def point_to_polygon_probability(
    points_gdf, polygons_gdf, decay_constant=200, buffer=0.025
):
    probabilities = np.zeros(len(points_gdf))
    points_gdf["orig_index"] = points_gdf.index

    minx, miny, maxx, maxy = polygons_gdf.iloc[0].geometry.bounds
    filtered_points = points_gdf[
        (points_gdf.geometry.x >= minx - buffer)
        & (points_gdf.geometry.x <= maxx + buffer)
        & (points_gdf.geometry.y >= miny - buffer)
        & (points_gdf.geometry.y <= maxy + buffer)
    ]

    for i, point in enumerate(filtered_points.geometry):
        min_distance = float("inf")
        for polygon in polygons_gdf.geometry:
            if isinstance(polygon, MultiPolygon):
                distance = min(point.distance(part.exterior) for part in polygon.geoms)
            elif isinstance(polygon, Polygon):
                distance = point.distance(polygon.exterior)
            else:
                raise TypeError(
                    "Polygon geometries must be either Polygon or MultiPolygon types."
                )
            min_distance = min(min_distance, distance)

        probability = np.exp(-decay_constant * min_distance)
        index = filtered_points.iloc[i]["orig_index"]
        probabilities[index] = probability
    return probabilities

In [None]:
def get_utm_proj_string(gdf):
    # Calculate the centroid of the GeoDataFrame in latitude and longitude (EPSG:4326)
    centroid = gdf.to_crs("EPSG:4326").geometry.centroid

    # Get the mean longitude to determine UTM zone
    mean_longitude = centroid.x.mean()
    mean_latitude = centroid.y.mean()

    # Calculate UTM zone number
    utm_zone = int((mean_longitude + 180) / 6) + 1

    # Determine hemisphere
    hemisphere = "+north" if mean_latitude >= 0 else "+south"

    # Return PROJ string in UTM format
    proj_string = f"+proj=utm +zone={utm_zone} {hemisphere} +type=crs"
    return proj_string

In [None]:
def calculate_associated_infra(groundtruth_slicks, gfw_gdf, asa, baseline=False):
    associated_infra = []
    for i in range(len(groundtruth_slicks)):
        slick_of_interest = groundtruth_slicks.iloc[[i]]
        # gfw_gdf = gfw_gdf.to_crs(asa.crs_meters)
        if baseline:
            probabilities = asa(gfw_gdf, slick_of_interest)
            potential_sources = gfw_gdf[probabilities > 0]
            potential_sources["coincidence_score"] = probabilities[probabilities > 0]
        else:
            asa.crs_meters = get_utm_proj_string(slick_of_interest)
            asa.s1_scene.scene_id = slick_of_interest["scene_id"].values[0]
            potential_sources = asa.compute_coincidence_scores(slick_of_interest)
        # print(len(probabilities))
        if isinstance(potential_sources, type(None)):
            potential_sources = gpd.GeoDataFrame(
                columns=["structure_id", "coincidence_score"]
            )
        # print(probabilities)

        potential_sources = potential_sources.sort_values(
            by="coincidence_score", ascending=False
        )
        associated_infra.append(potential_sources)
    return associated_infra
    clear_output()

In [None]:
def highest_false_score(associated_infra, groundtruth_slicks):
    max_non_source_score = []
    for i, potential_sources in enumerate(associated_infra):
        false_association_scores = [0]
        # Grab slick of interest and ground truth structure ids
        slick_of_interest = groundtruth_slicks.iloc[[i]]
        ground_truth_sources = [
            int(structure_id)
            for structure_id in slick_of_interest["structure_ids"]
            .values[0]
            .strip("[]")
            .split(",")
        ]

        # Grab scores and gfw structure ids associated with slick
        scores = potential_sources["coincidence_score"].values
        struct_ids = potential_sources["structure_id"].values

        # Grab the top potential source and record if it is a true or false source
        # Record scores for true and false infra
        for j, struct_id in enumerate(struct_ids):
            if struct_id not in ground_truth_sources:
                false_association_scores.append(scores[j])
        max_non_source_score.append(max(false_association_scores))
    return max_non_source_score

In [None]:
def scores_and_rates(associated_infra, groundtruth_slicks):
    association_scores = []
    top_source_rate = []
    top_3_source_rate = []
    total_sources = []

    false_association_scores = []

    for i, potential_sources in enumerate(associated_infra):
        # Grab slick of interest and ground truth structure ids
        slick_of_interest = groundtruth_slicks.iloc[[i]]
        ground_truth_sources = [
            int(structure_id)
            for structure_id in slick_of_interest["structure_ids"]
            .values[0]
            .strip("[]")
            .split(",")
        ]

        # Grab scores and gfw structure ids associated with slick
        scores = potential_sources["coincidence_score"].values
        struct_ids = potential_sources["structure_id"].values

        # Grab the top potential source and record if it is a true or false source
        top_source = struct_ids[0] if len(struct_ids) > 0 else None
        top_source_rate.append(top_source in ground_truth_sources)

        # Accumulate ground truth source ids
        for ground_truth_source in ground_truth_sources:
            total_sources.append(ground_truth_source)

        # Record scores for true and false infra
        for j, struct_id in enumerate(struct_ids):
            if struct_id in ground_truth_sources:
                association_scores.append(scores[j])
            else:
                false_association_scores.append(scores[j])

        # Record if true source is among top 3 potential sources
        source_in_top_3 = False
        for j, struct_id in enumerate(struct_ids[0:3]):
            if struct_id in ground_truth_sources:
                source_in_top_3 = True
                break
        top_3_source_rate.append(source_in_top_3)

    return (
        association_scores,
        false_association_scores,
        top_source_rate,
        top_3_source_rate,
        total_sources,
    )

In [None]:
def generate_metrics(
    asa_v1, asa_v2, asa_v3, base_algo, gfw_gdf, groundtruth_slicks, unassociated_slicks
):
    associated_infra_v1 = calculate_associated_infra(
        groundtruth_slicks, gfw_gdf, asa_v1
    )
    associated_infra_v2 = calculate_associated_infra(
        groundtruth_slicks, gfw_gdf, asa_v2
    )
    associated_infra_v3 = calculate_associated_infra(
        groundtruth_slicks, gfw_gdf, asa_v3
    )
    associated_infra_baseline = calculate_associated_infra(
        groundtruth_slicks, gfw_gdf, base_algo, True
    )

    unassociated_infra_v1 = calculate_associated_infra(
        unassociated_slicks, gfw_gdf, asa_v1
    )
    unassociated_infra_v2 = calculate_associated_infra(
        unassociated_slicks, gfw_gdf, asa_v2
    )
    unassociated_infra_v3 = calculate_associated_infra(
        unassociated_slicks, gfw_gdf, asa_v3
    )
    unassociated_infra_baseline = calculate_associated_infra(
        unassociated_slicks, gfw_gdf, base_algo, True
    )

    non_source_max_v1 = highest_false_score(unassociated_infra_v1, groundtruth_slicks)
    non_source_max_v2 = highest_false_score(unassociated_infra_v2, groundtruth_slicks)
    non_source_max_v3 = highest_false_score(unassociated_infra_v3, groundtruth_slicks)
    non_source_max_baseline = highest_false_score(
        unassociated_infra_baseline, groundtruth_slicks
    )

    (
        true_association_scores_v1,
        false_association_scores_v1,
        top_source_rate_v1,
        top_3_source_rate_v1,
        total_sources_v1,
    ) = scores_and_rates(associated_infra_v1, groundtruth_slicks)
    (
        true_association_scores_v2,
        false_association_scores_v2,
        top_source_rate_v2,
        top_3_source_rate_v2,
        total_sources_v2,
    ) = scores_and_rates(associated_infra_v2, groundtruth_slicks)
    (
        true_association_scores_v3,
        false_association_scores_v3,
        top_source_rate_v3,
        top_3_source_rate_v3,
        total_sources_v3,
    ) = scores_and_rates(associated_infra_v3, groundtruth_slicks)
    (
        true_association_scores_baseline,
        false_association_scores_baseline,
        top_source_rate_baseline,
        top_3_source_rate_baseline,
        total_sources_baseline,
    ) = scores_and_rates(associated_infra_baseline, groundtruth_slicks)
    true_association_scores = [
        sum(true_association_scores_v1) / len(true_association_scores_v1),
        sum(true_association_scores_v2) / len(true_association_scores_v2),
        sum(true_association_scores_v3) / len(true_association_scores_v3),
        sum(true_association_scores_baseline) / len(true_association_scores_baseline),
    ]
    false_association_scores = [
        sum(false_association_scores_v1) / len(false_association_scores_v1),
        sum(false_association_scores_v2) / len(false_association_scores_v2),
        sum(false_association_scores_v3) / len(false_association_scores_v3),
        sum(false_association_scores_baseline) / len(false_association_scores_baseline),
    ]

    top_source_rate = [
        sum(top_source_rate_v1) / len(top_source_rate_v1),
        sum(top_source_rate_v2) / len(top_source_rate_v2),
        sum(top_source_rate_v3) / len(top_source_rate_v3),
        sum(top_source_rate_baseline) / len(top_source_rate_baseline),
    ]

    top_3_source_rate = [
        sum(top_3_source_rate_v1) / len(top_3_source_rate_v1),
        sum(top_3_source_rate_v2) / len(top_3_source_rate_v2),
        sum(top_3_source_rate_v3) / len(top_3_source_rate_v3),
        sum(top_3_source_rate_baseline) / len(top_3_source_rate_baseline),
    ]

    avg_max_non_source_score = [
        sum(non_source_max_v1) / len(non_source_max_v1),
        sum(non_source_max_v2) / len(non_source_max_v2),
        sum(non_source_max_v3) / len(non_source_max_v1),
        sum(non_source_max_baseline) / len(non_source_max_baseline),
    ]

    return (
        true_association_scores,
        false_association_scores,
        top_source_rate,
        top_3_source_rate,
        avg_max_non_source_score,
    )

In [None]:
df = pd.read_csv(r"C:\Users\ebeva\SkyTruth\cv3\infrastructure_validation_points.csv")
df["geometry"] = df["geometry"].apply(wkt.loads)
slick_gdf = gpd.GeoDataFrame(df, geometry="geometry")
slick_gdf.crs = "EPSG:4326"

In [None]:
df = pd.read_csv(r"C:\Users\ebeva\SkyTruth\cv3\nonoise_SAR_Fixed_Infrastructure.csv")
gfw_gdf = gpd.GeoDataFrame(
    df,
    geometry=[Point(xy) for xy in zip(df["lon"], df["lat"])],
    crs="EPSG:4326",  # Set the coordinate reference system to WGS84
)

In [None]:
download_path = r"C:\Users\ebeva\SkyTruth\cv3\s1_scene_envelope"
scene_id_to_gdf = {}
for scene_id in np.unique(slick_gdf["scene_id"].values):
    s1_scene = get_s1_scene(scene_id, download_path=download_path)
    break

In [None]:
groundtruth_slicks = slick_gdf[slick_gdf["structure_ids"] != "[]"]
unassociated_slicks = slick_gdf[slick_gdf["structure_ids"] == "[]"]
print(len(groundtruth_slicks))
print(len(unassociated_slicks))

Algorithm Versions Referenced in Analysis

**Version 1**
```
C_i = neighbor_weights - decay * dists / radius_of_interest
```
**Version 2**
```
C_i = neighbor_weights * (1 - decay * dists / radius_of_interest)
```
**Version 3**
```
C_i = neighbor_weights * np.exp(-decay * dists / radius_of_interest)
```


In [None]:
gfw_gdf["structure_start_date"] = pd.to_datetime(gfw_gdf["structure_start_date"])
gfw_gdf["structure_end_date"] = pd.to_datetime(gfw_gdf["structure_end_date"])

In [None]:
asa_v1 = VersionedInfrastructureAnalyzer(
    s1_scene, version=1, decay_factor=0.01, infra_gdf=gfw_gdf
)
asa_v2 = VersionedInfrastructureAnalyzer(
    s1_scene, version=2, decay_factor=0.01, infra_gdf=gfw_gdf
)
asa_v3 = VersionedInfrastructureAnalyzer(
    s1_scene, version=3, decay_factor=0.01, infra_gdf=gfw_gdf
)
(
    true_association_scores_001,
    false_association_scores_001,
    top_source_rate_001,
    top_3_source_rate_001,
    avg_max_score_001,
) = generate_metrics(
    asa_v1,
    asa_v2,
    asa_v3,
    point_to_polygon_probability,
    gfw_gdf,
    groundtruth_slicks,
    unassociated_slicks,
)
clear_output()
show_plots(
    true_association_scores_001,
    false_association_scores_001,
    top_source_rate_001,
    top_3_source_rate_001,
)

In [None]:
asa_v1 = VersionedInfrastructureAnalyzer(
    s1_scene, version=1, decay_factor=0.05, infra_gdf=gfw_gdf
)
asa_v2 = VersionedInfrastructureAnalyzer(
    s1_scene, version=2, decay_factor=0.05, infra_gdf=gfw_gdf
)
asa_v3 = VersionedInfrastructureAnalyzer(
    s1_scene, version=3, decay_factor=0.05, infra_gdf=gfw_gdf
)
(
    true_association_scores_005,
    false_association_scores_005,
    top_source_rate_005,
    top_3_source_rate_005,
    avg_max_score_005,
) = generate_metrics(
    asa_v1,
    asa_v2,
    asa_v3,
    point_to_polygon_probability,
    gfw_gdf,
    groundtruth_slicks,
    unassociated_slicks,
)
clear_output()

show_plots(
    true_association_scores_005,
    false_association_scores_005,
    top_source_rate_005,
    top_3_source_rate_005,
)

In [None]:
asa_v1 = VersionedInfrastructureAnalyzer(
    s1_scene, version=1, decay_factor=0.1, infra_gdf=gfw_gdf
)
asa_v2 = VersionedInfrastructureAnalyzer(
    s1_scene, version=2, decay_factor=0.1, infra_gdf=gfw_gdf
)
asa_v3 = VersionedInfrastructureAnalyzer(
    s1_scene, version=3, decay_factor=0.1, infra_gdf=gfw_gdf
)
(
    true_association_scores_01,
    false_association_scores_01,
    top_source_rate_01,
    top_3_source_rate_01,
    avg_max_score_01,
) = generate_metrics(
    asa_v1,
    asa_v2,
    asa_v3,
    point_to_polygon_probability,
    gfw_gdf,
    groundtruth_slicks,
    unassociated_slicks,
)
clear_output()

# show_plots(true_association_scores_01, false_association_scores_01, top_source_rate_01, top_3_source_rate_01)

In [None]:
asa_v1 = VersionedInfrastructureAnalyzer(
    s1_scene, version=1, decay_factor=0.5, infra_gdf=gfw_gdf
)
asa_v2 = VersionedInfrastructureAnalyzer(
    s1_scene, version=2, decay_factor=0.5, infra_gdf=gfw_gdf
)
asa_v3 = VersionedInfrastructureAnalyzer(
    s1_scene, version=3, decay_factor=0.5, infra_gdf=gfw_gdf
)
(
    true_association_scores_05,
    false_association_scores_05,
    top_source_rate_05,
    top_3_source_rate_05,
    avg_max_score_05,
) = generate_metrics(
    asa_v1,
    asa_v2,
    asa_v3,
    point_to_polygon_probability,
    gfw_gdf,
    groundtruth_slicks,
    unassociated_slicks,
)
clear_output()

# show_plots(true_association_scores_05, false_association_scores_05, top_source_rate_05, top_3_source_rate_05)

In [None]:
asa_v1 = VersionedInfrastructureAnalyzer(
    s1_scene, version=1, decay_factor=1.0, infra_gdf=gfw_gdf
)
asa_v2 = VersionedInfrastructureAnalyzer(
    s1_scene, version=2, decay_factor=1.0, infra_gdf=gfw_gdf
)
asa_v3 = VersionedInfrastructureAnalyzer(
    s1_scene, version=3, decay_factor=1.0, infra_gdf=gfw_gdf
)
(
    true_association_scores_1,
    false_association_scores_1,
    top_source_rate_1,
    top_3_source_rate_1,
    avg_max_score_1,
) = generate_metrics(
    asa_v1,
    asa_v2,
    asa_v3,
    point_to_polygon_probability,
    gfw_gdf,
    groundtruth_slicks,
    unassociated_slicks,
)
clear_output()

# show_plots(true_association_scores_1, false_association_scores_1, top_source_rate_1, top_3_source_rate_1)

In [None]:
asa_v1 = VersionedInfrastructureAnalyzer(
    s1_scene, version=1, decay_factor=2.0, infra_gdf=gfw_gdf
)
asa_v2 = VersionedInfrastructureAnalyzer(
    s1_scene, version=2, decay_factor=2.0, infra_gdf=gfw_gdf
)
asa_v3 = VersionedInfrastructureAnalyzer(
    s1_scene, version=3, decay_factor=2.0, infra_gdf=gfw_gdf
)
(
    true_association_scores_2,
    false_association_scores_2,
    top_source_rate_2,
    top_3_source_rate_2,
    avg_max_score_2,
) = generate_metrics(
    asa_v1,
    asa_v2,
    asa_v3,
    point_to_polygon_probability,
    gfw_gdf,
    groundtruth_slicks,
    unassociated_slicks,
)
clear_output()

# show_plots(true_association_scores_2, false_association_scores_2, top_source_rate_2, top_3_source_rate_2)

In [None]:
asa_v1 = VersionedInfrastructureAnalyzer(
    s1_scene, version=1, decay_factor=4.0, infra_gdf=gfw_gdf
)
asa_v2 = VersionedInfrastructureAnalyzer(
    s1_scene, version=2, decay_factor=4.0, infra_gdf=gfw_gdf
)
asa_v3 = VersionedInfrastructureAnalyzer(
    s1_scene, version=3, decay_factor=4.0, infra_gdf=gfw_gdf
)
(
    true_association_scores_4,
    false_association_scores_4,
    top_source_rate_4,
    top_3_source_rate_4,
    avg_max_score_4,
) = generate_metrics(
    asa_v1,
    asa_v2,
    asa_v3,
    point_to_polygon_probability,
    gfw_gdf,
    groundtruth_slicks,
    unassociated_slicks,
)
clear_output()

# show_plots(true_association_scores_4, false_association_scores_4, top_source_rate_4, top_3_source_rate_4)

In [None]:
asa_v1 = VersionedInfrastructureAnalyzer(
    s1_scene, version=1, decay_factor=8.0, infra_gdf=gfw_gdf
)
asa_v2 = VersionedInfrastructureAnalyzer(
    s1_scene, version=2, decay_factor=8.0, infra_gdf=gfw_gdf
)
asa_v3 = VersionedInfrastructureAnalyzer(
    s1_scene, version=3, decay_factor=8.0, infra_gdf=gfw_gdf
)
(
    true_association_scores_8,
    false_association_scores_8,
    top_source_rate_8,
    top_3_source_rate_8,
    avg_max_score_8,
) = generate_metrics(
    asa_v1,
    asa_v2,
    asa_v3,
    point_to_polygon_probability,
    gfw_gdf,
    groundtruth_slicks,
    unassociated_slicks,
)
clear_output()

# show_plots(true_association_scores_8, false_association_scores_8, top_source_rate_8, top_3_source_rate_8)

In [None]:
for v in range(0, 3):
    decay_rates = [0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 4.0, 8.0]
    true_association_scores_v1 = [
        true_association_scores_001[v],
        true_association_scores_005[v],
        true_association_scores_01[v],
        true_association_scores_05[v],
        true_association_scores_1[v],
        true_association_scores_2[v],
        true_association_scores_4[v],
        true_association_scores_8[v],
    ]

    false_association_scores_v1 = [
        false_association_scores_001[v],
        false_association_scores_005[v],
        false_association_scores_01[v],
        false_association_scores_05[v],
        false_association_scores_1[v],
        false_association_scores_2[v],
        false_association_scores_4[v],
        false_association_scores_8[v],
    ]

    top_source_rate_v1 = [
        top_source_rate_001[v],
        top_source_rate_005[v],
        top_source_rate_01[v],
        top_source_rate_05[v],
        top_source_rate_1[v],
        top_source_rate_2[v],
        top_source_rate_4[v],
        top_source_rate_8[v],
    ]

    top_3_source_rate_v1 = [
        top_3_source_rate_001[v],
        top_3_source_rate_005[v],
        top_3_source_rate_01[v],
        top_3_source_rate_05[v],
        top_3_source_rate_1[v],
        top_3_source_rate_2[v],
        top_3_source_rate_4[v],
        top_3_source_rate_8[v],
    ]
    avg_max_non_source_score = [
        avg_max_score_001[v],
        avg_max_score_005[v],
        avg_max_score_01[v],
        avg_max_score_05[v],
        avg_max_score_1[v],
        avg_max_score_2[v],
        avg_max_score_4[v],
        avg_max_score_8[v],
    ]
    plot_metrics_by_decay(
        decay_rates,
        true_association_scores_v1,
        false_association_scores_v1,
        top_source_rate_v1,
        top_3_source_rate_v1,
        avg_max_non_source_score,
        version=v + 1,
    )