## Data visualization

In [1]:
import os
import time

import fsspec
import geopandas as gpd
import holoviews as hv
import hvplot.pandas
import pandas as pd
import panel as pn
import xarray as xr
from dotenv import load_dotenv
from pyproj import CRS

load_dotenv(override=True)

# NOTE: access tokens to the data are available upon request from Floris Calkoen
sas_token = os.getenv("AZURE_STORAGE_SAS_TOKEN")
account_name = "coclico"
storage_options = {"account_name": account_name, "credential": sas_token}

In [2]:
TEST_PREDICTIONS_PREFIX = "az://typology/test/*.parquet"

fs = fsspec.filesystem("az", **storage_options)
files = fs.glob(TEST_PREDICTIONS_PREFIX)
test_layer_options = {f.split("/")[-1].replace(".parquet", ""): f for f in files}

In [3]:
file_browser = pn.widgets.Select(options=list(test_layer_options.keys()))
file_browser

In [4]:
def fetch_data(fs, urlpath, storage_options):
    with fs.open(urlpath, mode="rb", **storage_options) as f:
        df = gpd.read_parquet(f)
    return df


df = fetch_data(fs, test_layer_options[file_browser.value], storage_options)

In [64]:
import json

fs = fsspec.filesystem("az", **storage_options)
files1 = fs.glob("az://typology/labels/*.json")
files2 = fs.glob("az://typology/labels2/*.json")
files3 = fs.glob("az://typology/labels3/*.json")


def get_signed_url(container, record_name: str) -> str:
    """Constructs the signed HTTPS URL with the SAS token."""
    return f"{container}/{record_name}?{storage_options['credential']}"


def read_record(container, record_name: str) -> dict:
    """Reads a record from the Azure storage backend using HTTPS."""
    # Open the file using https to avoid issues in Panel apps
    signed_url = get_signed_url(container, record_name)
    with fsspec.open(signed_url, mode="r") as f:
        record = json.load(f)
    return record


records1 = []
for urlpath in files1:
    record_name = urlpath.split("/")[-1]
    r = read_record(
        "https://coclico.blob.core.windows.net/typology/labels", record_name
    )
    records1.append(r)

records2 = []
for urlpath in files2:
    record_name = urlpath.split("/")[-1]
    r = read_record(
        "https://coclico.blob.core.windows.net/typology/labels2", record_name
    )
    records2.append(r)

records3 = []
for urlpath in files3:
    record_name = urlpath.split("/")[-1]
    r = read_record(
        "https://coclico.blob.core.windows.net/typology/labels3", record_name
    )
    records3.append(r)

In [65]:
import geopandas as gpd
import shapely.wkt

gdf1 = gpd.GeoDataFrame.from_records(records1)
gdf1["geometry"] = gdf1["geometry"].apply(shapely.wkt.loads)
gdf1 = gdf1.set_geometry("geometry")
gdf1 = gdf1.set_crs(epsg=4326)

gdf2 = gpd.GeoDataFrame.from_records(records2)
gdf2["geometry"] = gdf2["geometry"].apply(shapely.wkt.loads)
gdf2 = gdf2.set_geometry("geometry")
gdf2 = gdf2.set_crs(epsg=4326)

gdf3 = gpd.GeoDataFrame.from_records(records3)
gdf3["geometry"] = gdf3["geometry"].apply(shapely.wkt.loads)
gdf3 = gdf3.set_geometry("geometry")
gdf3 = gdf3.set_crs(epsg=4326)

In [63]:
gdf1.uuid.duplicated().sum()

70

In [76]:
import panel as pn
pn.widgets.DiscreteSlider(options=["low", "medium", "high"], name="Confidence", value="medium")

In [38]:
gdf3.shape

(1086, 18)

In [39]:
gdf1.shape

(1113, 16)

In [40]:
gdf1.transect_id.nunique()

1078

In [41]:
gdf3.transect_id.nunique()

1078

In [55]:
gdf1[gdf1["uuid"].isin(gdf3.uuid)].reset_index(drop=True).equals(
    gdf3.reset_index(drop=True).drop(columns=["confidence", "is_validated"])
)

True

In [43]:
gdf1.groupby("user").transect_id.nunique().equals(
    gdf3.groupby("user").transect_id.nunique()
)

True

In [36]:
gdf2.shape

(1106, 18)

In [50]:
gdf1[gdf1["transect_id"] == "cl50994s00tr00397146"]

Unnamed: 0,uuid,user,transect_id,lon,lat,geometry,datetime_created,datetime_updated,shore_type,coastal_type,landform_type,is_built_environment,has_defense,is_challenging,comment,link
1010,92fcef258ca6,floris-calkoen,cl50994s00tr00397146,110.932495,1.522849,"LINESTRING (110.93066 1.51399, 110.93433 1.53171)",2024-10-24T13:28:50.733999+00:00,2024-10-24T13:28:50.733999+00:00,muddy_sediments,inlet,,True,False,False,,
1011,43c46aaf3328,floris-calkoen,cl50994s00tr00397146,110.932495,1.522849,"LINESTRING (110.93066 1.51399, 110.93433 1.53171)",2024-10-24T13:28:50.733999+00:00,2024-10-24T13:31:00.968840+00:00,muddy_sediments,inlet,,True,True,False,,


In [51]:
gdf3[gdf3["transect_id"] == "cl50994s00tr00397146"]

Unnamed: 0,uuid,user,transect_id,lon,lat,geometry,datetime_created,datetime_updated,shore_type,coastal_type,landform_type,is_built_environment,has_defense,is_challenging,comment,link,confidence,is_validated
988,43c46aaf3328,floris-calkoen,cl50994s00tr00397146,110.932495,1.522849,"LINESTRING (110.93066 1.51399, 110.93433 1.53171)",2024-10-24T13:28:50.733999+00:00,2024-10-24T13:31:00.968840+00:00,muddy_sediments,inlet,,True,True,False,,,medium,False


In [47]:
r1 = gdf1[~gdf1["uuid"].isin(gdf2.uuid)]
r1
# gdf1[gdf1["transect_id"].isin(r1.transect_id)]
# gdf2[gdf2["transect_id"].isin(r1.transect_id)]

Unnamed: 0,uuid,user,transect_id,lon,lat,geometry,datetime_created,datetime_updated,shore_type,coastal_type,landform_type,is_built_environment,has_defense,is_challenging,comment,link
13,15099a5b2753,floris-calkoen,cl02385s00tr01276339,-122.043831,36.950954,"LINESTRING (-122.04528 36.95989, -122.04239 36...",2024-10-25T14:45:48.396143+00:00,2024-10-25T14:45:48.396143+00:00,rocky_shore_platform_or_large_boulders,bedrock_plain,,True,True,False,,
16,fa30e00d2675,floris-calkoen,cl02385s00tr01277939,-122.026497,36.953918,"LINESTRING (-122.03383 36.96075, -122.01916 36...",2024-10-25T14:45:19.047681+00:00,2024-10-25T14:45:19.047681+00:00,rocky_shore_platform_or_large_boulders,bedrock_plain,,True,False,False,,
481,2277c511b1f6,floris-calkoen,cl30909s00tr00199761,-19.090302,63.404961,"LINESTRING (-19.09243 63.41388, -19.08818 63.3...",2024-10-23T09:18:51.706057+00:00,2024-10-23T09:18:51.706057+00:00,sandy_gravel_or_small_boulder_sediments,sediment_plain,,False,False,False,Volcanic sand.,https://www.google.com/maps/place/Reynisfjara+...
484,a45f68365244,floris-calkoen,cl30909s00tr00200761,-19.07037,63.404308,"LINESTRING (-19.06737 63.41318, -19.07337 63.3...",2024-10-23T09:18:09.056026+00:00,2024-10-23T09:18:09.056026+00:00,sandy_gravel_or_small_boulder_sediments,sediment_plain,,False,False,False,Volcanic sand.,"https://www.google.com/maps/@63.4035619,-19.04..."
990,0a7d66e2b674,floris-calkoen,cl48218s00tr01229437,73.91935,15.089984,"LINESTRING (73.92414 15.08224, 73.91455 15.09773)",2024-10-24T09:55:05.304693+00:00,2024-10-24T09:55:05.304693+00:00,no_sediment_or_shore_platform,cliffed_or_steep,,False,False,False,,
1010,92fcef258ca6,floris-calkoen,cl50994s00tr00397146,110.932495,1.522849,"LINESTRING (110.93066 1.51399, 110.93433 1.53171)",2024-10-24T13:28:50.733999+00:00,2024-10-24T13:28:50.733999+00:00,muddy_sediments,inlet,,True,False,False,,
1066,b2358b30f7c9,rosh,cl43742s00tr00399368,27.900759,42.888348,"LINESTRING (27.88851 42.88844, 27.913 42.88826)",2024-10-16T12:23:23.399543+00:00,2024-10-16T12:23:23.399543+00:00,sandy_gravel_or_small_boulder_sediments,cliffed_or_steep,mainland_coast,False,False,True,poor image quality,


In [22]:
gdf2.shape

(1106, 18)