In [None]:
import os
import sys
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt

load_dotenv()

# Set the download path for demonstration and ensure the folder exists.
download_path = os.getenv("ASA_DOWNLOAD_PATH")
os.makedirs(download_path, exist_ok=True)

git_path = os.getenv("GIT_FOLDER")
cv3_path = os.getenv("CV3_FOLDER")
sys.path.append(git_path)
sys.path.append(cv3_path)

In [None]:
from cerulean_cloud.cloud_function_asa.utils.analyzer import AISAnalyzer

In [None]:
# Import functions from your script.
from asa_analysis.evaluation.source_analyzer_evaluation import (
    label_results_with_st_name,
    apply_labeling,
    calculate_metrics,
    plot_metrics,
    process_groundtruth_on_analyzer,
    add_missing_groundtruth,
)

In [None]:
def plot_3_gdfs(gdf1, gdf2, gdf3, figsize=(10, 10), title=""):
    fig, ax = plt.subplots(figsize=figsize)
    if not isinstance(gdf1, type(None)):
        gdf1.plot(ax=ax, color="blue", alpha=0.5, edgecolor="black")
    if not isinstance(gdf2, type(None)):
        gdf2.plot(ax=ax, color="red", edgecolor="red", linestyle="--")
    if not isinstance(gdf3, type(None)):
        gdf3.plot(ax=ax, color="green", alpha=1.0, edgecolor="black", marker="o")

    plt.legend()
    plt.title(title)
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.show()

In [None]:
# Load hitl CSV for vessel and infrastructure groundtruth.
csv_path = r"../slick_to_source_2025-3-20.csv"
hitl_df = pd.read_csv(csv_path)

# Extract groundtruth lists for infrastructure and vessels.
infrastructure_groundtruth = hitl_df[
    (hitl_df["type"] == 2) & (hitl_df["hitl_verification"])
]
vessel_groundtruth = hitl_df[(hitl_df["type"] == 1) & (hitl_df["hitl_verification"])]

if "slick" in vessel_groundtruth.columns:
    vessel_groundtruth = vessel_groundtruth.rename(columns={"slick": "slick_id"})
if "slick" in infrastructure_groundtruth.columns:
    infrastructure_groundtruth = infrastructure_groundtruth.rename(
        columns={"slick": "slick_id"}
    )

In [None]:
finetuned_results_files = os.listdir("saves/buffer_finetuning")

In [None]:
finetuned_results_files

In [None]:
buffer_results = {}
for file in finetuned_results_files:
    f_list = file[:-4].split("_")
    b_a_buff = f_list[2] + ", " + f_list[3] + ", " + f_list[4]
    res = pd.read_csv(f"saves/buffer_finetuning/{file}")
    res["st_name"] = res["st_name"].astype(int)
    res_labelled = apply_labeling(res, vessel_groundtruth, label_results_with_st_name)
    res_labelled = add_missing_groundtruth(res_labelled, vessel_groundtruth)
    buffer_results[b_a_buff] = res_labelled
    break

In [None]:
metrics_df = calculate_metrics(buffer_results)
plot_metrics(
    metrics_df,
    title="HOURS_BEFORE, HOURS_AFTER, AIS_BUFFER:",
    legend_title="Before, After, Buffer",
)

In [None]:
finetuned_results_files2 = [
    "vessel_results_1_1_20000.csv",
    "vessel_results_4_2_20000.csv",
    "vessel_results_6_4_20000.csv",
    "vessel_results_8_4_20000.csv",
    "vessel_results_8_6_20000.csv",
    "vessel_results_9_5_20000.csv",
    "vessel_results_10_4_20000.csv",
    "vessel_results_10_6_20000.csv",
    "vessel_results_12_6_20000.csv",
    "vessel_results_14_8_20000.csv",
    "vessel_results_16_8_20000.csv",
]

In [None]:
intervals = [
    file[:-4].split("_")[2] + ", " + file[:-4].split("_")[3]
    for file in finetuned_results_files2
]

In [None]:
before_after_results = {}
for file in finetuned_results_files2:
    f_list = file[:-4].split("_")
    b_a_buff = f_list[2] + ", " + f_list[3] + ", " + f_list[4]
    res = pd.read_csv(f"saves/before_after_finetuning/{file}")
    res["st_name"] = res["st_name"].astype(int)
    res_labelled = apply_labeling(res, vessel_groundtruth, label_results_with_st_name)
    res_labelled = add_missing_groundtruth(res_labelled, vessel_groundtruth)
    before_after_results[b_a_buff] = res_labelled

In [None]:
metrics_df = calculate_metrics(before_after_results)
plot_metrics(
    metrics_df,
    title="HOURS_BEFORE, HOURS_AFTER, AIS_BUFFER:",
    legend_title="Before, After, Buffer",
)

In [None]:
runtimes = [g.iloc[0]["runtime"] for i, g in before_after_results.items()]
plt.plot(intervals, runtimes)
plt.xlabel("Intervals")
plt.ylabel("Runtime")
plt.title("Runtime vs Intervals")  # Optional, adds a title
plt.show()

In [None]:
b_a_buff

In [None]:
spreads = [1, 100, 1000, 2000, 5000, 7500, 10000, 15000, 20000, 50000, 100000]
spread_results = {}
for spread in spreads:
    res = pd.read_csv(f"saves/spread_finetuning/vessel_results_spread_8_6_{spread}.csv")
    res["st_name"] = res["st_name"].astype(int)
    res_labelled = apply_labeling(res, vessel_groundtruth, label_results_with_st_name)
    res_labelled = add_missing_groundtruth(res_labelled, vessel_groundtruth)
    spread_results[str(spread)] = res_labelled

In [None]:
len(res_labelled[res_labelled["truth"]])
res_labelled[res_labelled["truth"]][
    res_labelled[res_labelled["truth"]]["coincidence_score"] == 0
]

In [None]:
83 / 88

In [None]:
metrics_df = calculate_metrics(spread_results)
plot_metrics(
    metrics_df,
    title="Metrics at different Spread Rates",
    legend_title="Before, After, Buffer",
    value_font_size=8,
)

In [None]:
slick_ids = vessel_groundtruth["slick_id"].values
slick_id = slick_ids[0]

In [None]:
kwargs = {"hours_before": 10, "hours_after": 6, "ais_buffer": 100}

results_vessel_local, analyzer = process_groundtruth_on_analyzer(
    AISAnalyzer,
    vessel_groundtruth[vessel_groundtruth["slick_id"] == slick_id],
    analyzer_params=kwargs,
    reuse_ais_gdf=True,
    return_analyzer=True,
)
results_vessel_local["st_name"] = results_vessel_local["st_name"].astype(int)
results_vessel_local = apply_labeling(
    results_vessel_local, vessel_groundtruth, label_results_with_st_name
)
results_vessel_local = add_missing_groundtruth(results_vessel_local, vessel_groundtruth)
# truth = results_vessel_local[results_vessel_local['truth']]

In [None]:
graces = [1, 100, 500, 1000, 2500, 5000, 10000]
grace_results = {}
for grace in graces:
    res = pd.read_csv(f"saves/grace_finetuning/vessel_results_grace_{grace}.csv")
    res["st_name"] = res["st_name"].astype(int)
    res_labelled = apply_labeling(res, vessel_groundtruth, label_results_with_st_name)
    res_labelled = add_missing_groundtruth(res_labelled, vessel_groundtruth)
    grace_results[str(grace)] = res_labelled

In [None]:
metrics_df = calculate_metrics(grace_results)
plot_metrics(
    metrics_df,
    title="Metrics at different Grace Distances",
    legend_title="Before, After, Buffer",
    value_font_size=8,
)

In [None]:
drifts = [0, 1, 2, 3, 4, 5, 6, 7]
drift_results = {}
for drift in drifts:
    res = pd.read_csv(f"saves/drift_finetuning/vessel_results_drift_{drift}.csv")
    res["st_name"] = res["st_name"].astype(int)
    res_labelled = apply_labeling(res, vessel_groundtruth, label_results_with_st_name)
    res_labelled = add_missing_groundtruth(res_labelled, vessel_groundtruth)
    drift_results[str(drift)] = res_labelled

In [None]:
metrics_df = calculate_metrics(drift_results)
plot_metrics(
    metrics_df,
    title="Metrics at different Max Slick Drifting Hours",
    legend_title="Before, After, Buffer",
    value_font_size=8,
)

In [None]:
runtimes = [g.iloc[0]["runtime"] for i, g in drift_results.items()]
plt.plot(drifts, runtimes)
plt.xlabel("Drift Parameter")
plt.ylabel("Runtime (seconds)")
plt.title("FULL ASA Runtime vs Drift Parameter")  # Optional, adds a title
plt.show()

In [None]:
71 / 83

In [None]:
results_vessel_local