In [None]:
import os
import sys
from dotenv import load_dotenv

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from IPython.display import clear_output

load_dotenv()

# Set the download path for demonstration and ensure the folder exists.
download_path = os.getenv("ASA_DOWNLOAD_PATH")
os.makedirs(download_path, exist_ok=True)

git_path = os.getenv("GIT_FOLDER")
cv3_path = os.getenv("CV3_FOLDER")
sys.path.append(git_path)
sys.path.append(cv3_path)

In [None]:
from cerulean_cloud.cloud_function_asa.utils.analyzer import (
    AISAnalyzer,
    InfrastructureAnalyzer,
    DarkAnalyzer,
)

In [None]:
# Import functions from your script.
from asa_analysis.evaluation.source_analyzer_evaluation import (
    label_dark_vessel_results_with_distance,
    label_results_with_st_name,
    apply_labeling,
    process_groundtruth_on_analyzer,
)

In [None]:
collation_datasets_folder = cv3_path + "/asa_analysis/evaluation/"

dark_vess_df = pd.read_csv(
    f"{collation_datasets_folder}/refined_dark_vessel_dataset.csv"
)
sar_detections = pd.read_csv(
    f"{collation_datasets_folder}/sar_detections_hitl_dark_ds.csv"
)
sar_detections_infra_vess = pd.read_csv(
    f"{collation_datasets_folder}/gfw_sar_detections_for_hitl.csv"
).drop(columns="Unnamed: 0")


# Keep only rows where 'mmsi' is NaN (i.e. true dark vessel detections)
# true_dark_vess_df = dark_vess_df[dark_vess_df['mmsi'].isna()]
# Create a GeoDataFrame for dark vessels.
dark_vessel_groundtruth = gpd.GeoDataFrame(
    dark_vess_df,
    geometry=gpd.points_from_xy(dark_vess_df["lon"], dark_vess_df["lat"]),
    crs="EPSG:4326",
)

# Process SAR detections (for example purposes; not used further in this demo).
sar_detections_gdf = gpd.GeoDataFrame(
    sar_detections,
    geometry=gpd.points_from_xy(
        sar_detections["detect_lon"], sar_detections["detect_lat"]
    ),
    crs="EPSG:4326",
)
sar_detections_gdf = sar_detections_gdf[sar_detections_gdf["structure_id"].isna()]
sar_detections_gdf = sar_detections_gdf.reset_index()

sar_detections_infra_vess_gdf = gpd.GeoDataFrame(
    sar_detections_infra_vess,
    geometry=gpd.points_from_xy(
        sar_detections_infra_vess["detect_lon"], sar_detections_infra_vess["detect_lat"]
    ),
    crs="EPSG:4326",
)
sar_detections_infra_vess_gdf = sar_detections_infra_vess_gdf[
    sar_detections_infra_vess_gdf["structure_id"].isna()
]
sar_detections_infra_vess_gdf = sar_detections_infra_vess_gdf.reset_index()

# Load hitl CSV for vessel and infrastructure groundtruth.
csv_path = f"{collation_datasets_folder}/slick_to_source_2025-3-20.csv"
hitl_df = pd.read_csv(csv_path)

# Extract groundtruth lists for infrastructure and vessels.
infrastructure_groundtruth = hitl_df[
    (hitl_df["type"] == 2) & (hitl_df["hitl_verification"])
]
vessel_groundtruth = hitl_df[(hitl_df["type"] == 1) & (hitl_df["hitl_verification"])]

if "slick" in vessel_groundtruth.columns:
    vessel_groundtruth = vessel_groundtruth.rename(columns={"slick": "slick_id"})
if "slick" in infrastructure_groundtruth.columns:
    infrastructure_groundtruth = infrastructure_groundtruth.rename(
        columns={"slick": "slick_id"}
    )

df = pd.read_csv(f"{collation_datasets_folder}/nonoise_SAR_Fixed_Infrastructure.csv")
gfw_gdf = gpd.GeoDataFrame(
    df,
    geometry=[Point(xy) for xy in zip(df["lon"], df["lat"])],
    crs="EPSG:4326",  # Set the coordinate reference system to WGS84
)

In [None]:
run = "/3-25-2025d"  # when running multiple per day I use extra alphabetic identifier
save_location = (
    cv3_path + r"\asa_analysis\evaluation\saved_results\collation datasets" + run
)

In [None]:
results_vessel_on_vessel_asa = process_groundtruth_on_analyzer(
    AISAnalyzer,
    vessel_groundtruth,
    analyzer_params={},
    filter_ais_infra=True,
    reuse_ais_trajectories=True,
)
results_vessel_on_vessel_asa = apply_labeling(
    results_vessel_on_vessel_asa, vessel_groundtruth, label_results_with_st_name
)

results_infra_on_vessel = process_groundtruth_on_analyzer(
    AISAnalyzer,
    infrastructure_groundtruth,
    analyzer_params={},
    reuse_ais_trajectories=True,
)

results_dark_on_vessel_asa = process_groundtruth_on_analyzer(
    AISAnalyzer,
    dark_vessel_groundtruth,
    analyzer_params={},
    reuse_ais_trajectories=True,
)
clear_output()

In [None]:
results_vessel_on_infra_asa = process_groundtruth_on_analyzer(
    InfrastructureAnalyzer, vessel_groundtruth, points_gdf=gfw_gdf, analyzer_params={}
)
results_infra_on_infra = process_groundtruth_on_analyzer(
    InfrastructureAnalyzer,
    infrastructure_groundtruth,
    points_gdf=gfw_gdf,
    analyzer_params={},
)
results_dark_on_infra = process_groundtruth_on_analyzer(
    InfrastructureAnalyzer,
    dark_vessel_groundtruth,
    points_gdf=gfw_gdf,
    analyzer_params={},
)
clear_output()

In [None]:
results_vessel_on_dark_asa = process_groundtruth_on_analyzer(
    DarkAnalyzer,
    vessel_groundtruth,
    points_gdf=sar_detections_infra_vess_gdf,
    analyzer_params={},
)

results_infra_on_dark = process_groundtruth_on_analyzer(
    DarkAnalyzer,
    infrastructure_groundtruth,
    points_gdf=sar_detections_infra_vess_gdf,
    analyzer_params={},
)

results_dark_on_dark_asa = process_groundtruth_on_analyzer(
    DarkAnalyzer,
    dark_vessel_groundtruth,
    points_gdf=sar_detections_gdf,
    analyzer_params={},
)

results_dark_on_dark_asa = apply_labeling(
    results_dark_on_dark_asa,
    dark_vessel_groundtruth,
    label_dark_vessel_results_with_distance,
)
clear_output()

In [None]:
print("Infra sources on Infra slicks:", len(results_infra_on_infra))
print("Dark sources on Infra slicks:", len(results_infra_on_vessel))
print("Dark sources on Dark slicks:", len(results_vessel_on_vessel_asa))
print("Infra sources on Dark slicks:", len(results_vessel_on_infra_asa))

In [None]:
results_vessel_on_vessel_asa["st_name"] = results_vessel_on_vessel_asa[
    "st_name"
].astype(int)

In [None]:
results_vessel_on_vessel_asa = apply_labeling(
    results_vessel_on_vessel_asa, vessel_groundtruth, label_results_with_st_name
)

In [None]:
results_infra_on_infra["st_name"] = results_infra_on_infra["structure_id"]
results_vessel_labeled = apply_labeling(
    results_vessel_on_vessel_asa, vessel_groundtruth, label_results_with_st_name
)
results_infra_labeled = apply_labeling(
    results_infra_on_infra, infrastructure_groundtruth, label_results_with_st_name
)
results_dark_labeled = apply_labeling(
    results_dark_on_dark_asa,
    dark_vessel_groundtruth,
    label_dark_vessel_results_with_distance,
)

In [None]:
results_infra_on_vessel["truth"] = False
results_infra_on_vessel["slick_type"] = 2
results_infra_on_vessel["source_type"] = 1
# results_infra_on_dark[['slick_id', 'source_type', 'slick_type', 'truth', 'coincidence_score']]

results_vessel_on_infra_asa["truth"] = False
results_vessel_on_infra_asa["slick_type"] = 1
results_vessel_on_infra_asa["source_type"] = 2
# results_dark_on_infra[['slick_id', 'source_type', 'slick_type', 'truth', 'coincidence_score']]

results_infra_labeled["slick_type"] = 2
results_infra_labeled["source_type"] = 2
results_infra_labeled_truth = results_infra_labeled[results_infra_labeled["truth"]]
results_infra_labeled_truth[
    ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
]

results_vessel_labeled["slick_type"] = 1
results_vessel_labeled["source_type"] = 1
results_vessel_labeled_truth = results_vessel_labeled[results_vessel_labeled["truth"]]
len(
    results_vessel_labeled_truth[
        ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
    ]
)

In [None]:
results_infra_on_dark["truth"] = False
results_infra_on_dark["slick_type"] = 2
results_infra_on_dark["source_type"] = 3
# results_infra_on_dark[['slick_id', 'source_type', 'slick_type', 'truth', 'coincidence_score']]

results_dark_on_infra["truth"] = False
results_dark_on_infra["slick_type"] = 3
results_dark_on_infra["source_type"] = 2
# results_dark_on_infra[['slick_id', 'source_type', 'slick_type', 'truth', 'coincidence_score']]

results_infra_labeled["slick_type"] = 2
results_infra_labeled["source_type"] = 2
results_infra_labeled_truth = results_infra_labeled[results_infra_labeled["truth"]]
results_infra_labeled_truth[
    ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
]

results_dark_labeled["slick_type"] = 3
results_dark_labeled["source_type"] = 3
results_dark_labeled_truth = results_dark_labeled[results_dark_labeled["truth"]]
len(
    results_dark_labeled_truth[
        ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
    ]
)

In [None]:
results_full_infra_on_vessel = pd.concat(
    [results_infra_on_vessel, results_infra_labeled_truth], ignore_index=True
)
results_full_vessel_on_infra = pd.concat(
    [results_vessel_labeled_truth, results_vessel_on_infra_asa], ignore_index=True
)
results_full_dark_on_infra = pd.concat(
    [results_dark_labeled_truth, results_dark_on_infra], ignore_index=True
)
results_full_infra_on_dark = pd.concat(
    [results_infra_on_dark, results_infra_labeled_truth], ignore_index=True
)

In [None]:
os.makedirs(save_location)

In [None]:
results_full_infra_on_vessel[
    ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
].to_csv(save_location + "/infra_slick_vessel_source.csv", index="False")

In [None]:
results_full_vessel_on_infra[
    ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
].to_csv(save_location + "/vessel_slick_infra_source.csv", index="False")

In [None]:
results_full_dark_on_infra[
    ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
].to_csv(save_location + "/dark_slick_infra_source.csv", index="False")

In [None]:
results_full_infra_on_dark[
    ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
].to_csv(save_location + "/infra_slick_dark_source.csv", index="False")

In [None]:
results_dark_on_vessel_asa["st_name"] = results_dark_on_vessel_asa["st_name"].astype(
    int
)

In [None]:
filtered_vessel_results_on_dark = (
    results_dark_on_vessel_asa.merge(
        dark_vessel_groundtruth.dropna(subset=["mmsi"]).assign(  # Drop NaN values first
            st_name=lambda df: df["mmsi"].astype(int)
        ),  # Convert only valid values
        on=["slick_id", "st_name"],
        how="left",
        indicator=True,
    )
    .query('_merge == "left_only"')
    .drop(columns=["_merge"])
)

filtered_vessel_results_on_dark = filtered_vessel_results_on_dark.rename(
    columns={"coincidence_score_x": "coincidence_score"}
)
filtered_vessel_results_on_dark = filtered_vessel_results_on_dark.rename(
    columns={"scene_id_x": "scene_id"}
)
results_dark_on_dark_asa["source_type"] = 3
results_dark_on_dark_asa["slick_type"] = 3
filtered_vessel_results_on_dark["source_type"] = 1
filtered_vessel_results_on_dark["slick_type"] = 3
filtered_vessel_results_on_dark["truth"] = False

dark_truth_with_vessel = pd.concat(
    [
        filtered_vessel_results_on_dark,
        results_dark_on_dark_asa[results_dark_on_dark_asa["truth"]],
    ]
)

In [None]:
filtered_dark_results_on_vessel = (
    results_vessel_on_dark_asa.merge(
        vessel_groundtruth.dropna(subset=["st_name"]).assign(  # Drop NaN values first
            ssvid=lambda df: df["st_name"].astype(int)
        ),  # Convert only valid values
        on=["slick_id", "ssvid"],
        how="left",
        indicator=True,
    )
    .query('_merge == "left_only"')
    .drop(columns=["_merge"])
)

filtered_dark_results_on_vessel = filtered_dark_results_on_vessel.rename(
    columns={"coincidence_score_x": "coincidence_score"}
)
filtered_dark_results_on_vessel = filtered_dark_results_on_vessel.rename(
    columns={"scene_id_x": "scene_id"}
)

results_vessel_on_vessel_asa["source_type"] = 1
results_vessel_on_vessel_asa["slick_type"] = 1
filtered_dark_results_on_vessel["source_type"] = 3
filtered_dark_results_on_vessel["slick_type"] = 1
filtered_dark_results_on_vessel["truth"] = False

vessel_truth_with_dark = pd.concat(
    [
        filtered_dark_results_on_vessel,
        results_vessel_on_vessel_asa[results_vessel_on_vessel_asa["truth"]],
    ]
)

In [None]:
dark_truth_with_vessel[
    ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
].to_csv(save_location + "/dark_slick_vessel_source.csv", index=False)

In [None]:
vessel_truth_with_dark[
    ["slick_id", "source_type", "slick_type", "truth", "coincidence_score"]
].to_csv(save_location + "/vessel_slick_dark_source.csv", index=False)

In [None]:
results_vessel_on_vessel_asa["truth"].any()