see https://kraina-ai.github.io/srai/latest/examples/regionalizers/voronoi_regionalizer/

Seems that there is a maximum of particles you can load in (also maybe there is a prolbem with points at the same place (too low precision?))
But it sort of works still. Maybe a workaround would be to start

In [None]:
#update reading in packages when rerunning this cell
%load_ext autoreload
%autoreload 2

import geopandas as gpd
import numpy as np
import plotly.express as px
from shapely.geometry import Point, shape
from shapely import geometry
from shapely.ops import unary_union
from srai.loaders.osm_loaders import OSMOnlineLoader
from srai.loaders.osm_loaders.filters.popular import get_popular_tags
from functional import seq

from srai.constants import WGS84_CRS
from srai.plotting.folium_wrapper import plot_regions
from srai.regionalizers import VoronoiRegionalizer, geocode_to_region_gdf, AdministrativeBoundaryRegionalizer
import srai as srai
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors 
import branca.colormap as cm_branca
import sys
sys.path.append("/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/src")
from analysis_functions import  make_PDF, Haversine

from datetime import datetime, timedelta
import xarray as xr

# set plotscale
plt.style.use('../python_style_Meike.mplstyle')

## create sea mask polygon

In [None]:
# importing GEOjson file with boundaries of all countries
# data downloaded from https://ec.europa.eu/eurostat/web/gisco/geodata/administrative-units/countries (make sure you download data in EPSG:4326 =WGS84 projection)

europe = gpd.read_file("/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/europe.geojson")

polygon = []
# list of countries in the domain
country_names = ['France','Germany','Denmark','Belgium','Netherlands','Bailiwick of Jersey','Jersey','Guernsey','Bailiwick of Guernsey','Isle of Man','United Kingdom','Ireland','Norway','Switzerland','Luxembourg','Italy','Liechtenstein','Austria']

for _, row in europe.iterrows():
    if(row['NAME_ENGL'] in country_names):
        print(row['NAME_ENGL'])
        polygon.append(row['geometry'])

land = unary_union(polygon)
landgdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(land,crs=WGS84_CRS))

# define polygon simulation domain
domaingdf = gpd.GeoDataFrame(geometry=[
        geometry.Polygon(
            shell=[
                (9.977004051208496-0.5,61.28188705444336-0.5),
                (-15.996014595031738+0.5,61.28188705444336-0.5),
                (-15.996014595031738+0.5,46.00364303588867+0.5),
                (9.977004051208496-0.5,46.00364303588867+0.5),
                (9.977004051208496-0.5,61.28188705444336-0.5),
            ])],
              crs=WGS84_CRS,
            )

seagdf = domaingdf.overlay(landgdf, how = 'difference')
seagdf.to_file('/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/NWES_sea_mask.geojson', driver='GeoJSON') 


## Or read in already created seamask polygon

In [None]:

seagdf = gpd.GeoDataFrame.from_file('/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/NWES_sea_mask.geojson')
# seagdf.explore()

## create vonoroi for simulation data data

### preprocess data

In [None]:
# import data (test dataset is tracer september)
base_directory = '/storage/shared/oceanparcels/output_data/data_Meike/MR_advection/NWES/'
input_file_tracer_base = (base_directory + '{particle_type}/{loc}_start{y_s:04d}_{m_s:02d}_{d_s:02d}'
                   '_end{y_e:04d}_{m_e:02d}_{d_e:02d}_RK4_{land_handling}.zarr')

runtime =  timedelta(days=30)# timedelta(days=10)
loc = 'NWES'
runtime=timedelta(30)
nparticles = 88347 # 52511
chunck_time = 100
land_handling = 'anti_beaching'
coriolis = True
starttime = datetime(2023, 9, 1, 0, 0, 0, 0)
endtime = starttime + runtime

input_file_tracer = input_file_tracer_base.format(loc=loc,
                                                        y_s=starttime.year,
                                                        m_s=starttime.month,
                                                        d_s=starttime.day,
                                                        y_e=endtime.year,
                                                        m_e=endtime.month,
                                                        d_e=endtime.day,
    
                                                        land_handling = land_handling, 
                                                        cor_on = coriolis,
                                                        particle_type = 'tracer')
print(input_file_tracer)
ds = xr.open_dataset(input_file_tracer,
                        engine='zarr',
                        chunks={'trajectory':nparticles, 'obs':chunck_time},
                        drop_variables=['B','tau','z'],
                        decode_times=False) #,decode_cf=False)




In [None]:

T=700
pointlist = [Point(np.round(lon,5), np.round(lat,5)) for lon, lat in zip(ds.lon[:,T].values,ds.lat[:,T].values) if np.isnan(lon)==False]
pointlist_unique = list(set(pointlist))

print(len(pointlist)-len(pointlist_unique))
# print(len(pointlist_unique))

begin = 0#70255
end=len(pointlist_unique) #77255
seeds_gdf = gpd.GeoDataFrame(
    {"geometry": pointlist_unique[begin:end]},
    index=list(range(len(pointlist_unique[begin:end]))),
    crs=WGS84_CRS,
)

### Create vonoroi diagrams and calculate their area

In [None]:
vr = VoronoiRegionalizer(seeds=seeds_gdf)
sea_results = vr.transform(gdf = seagdf)

In [None]:
# calculate area
change_units = sea_results.to_crs(epsg=25832)
sea_results["area"] = change_units.geometry.area/1E6/36.129062164
sea_results["density"] = 1/sea_results["area"]
sea_results = sea_results.sort_index()
# 3395

In [None]:
print(sea_results["area"].min())
print(sea_results["area"].max())
print

In [None]:

# Normalize area values to range between 0 and 1
vmin, vmax = sea_results["density"].min(), sea_results["density"].max()
vmin = 0.1
vmax = 100
norm = colors.LogNorm(vmin=vmin, vmax=vmax)

color_list = [colors.to_hex(cm.magma(norm(v))) for v in np.linspace(vmin, vmax, 12)]

colormap = cm_branca.LinearColormap(
    colors=color_list,
    vmin=vmin,
    vmax=vmax,
    caption="Polygon Area"
)
# norm = colors.Normalize(vmin=12400, vmax=20000)

# Generate colors based on normalized area values
sea_results["color"] = sea_results["area"].apply(lambda x: colors.to_hex(cm.magma(norm(x))))

# Use the 'color' column in your plot function
folium_map = plot_regions(
    sea_results,
    colormap=list(sea_results["color"]),  # Passing as a list of colors
    tiles_style="CartoDB positron",
    show_borders=False
)

# sea_results.explore(
#     m=folium_map,
#     style_kwds=dict(
#         color="#444",  # Border color (can be ignored if weight is 0)
#         weight=0,  # Set the line width to zero to remove borders
#         fillOpacity=1
#     )
# )


colormap.add_to(folium_map)
# # Add colormap to the map for reference
folium_map

In [None]:
fig, ax = plt.subplots()
ax.plot(np.sort(list(sea_results['density'])),'o')
ax.set_xscale('log')
# ax.set_xlim(0,10)

In [None]:
mean = np.mean(list(sea_results['density']))
std = np.std(list(sea_results['density']))
print(mean)
print(std)

In [None]:
np.sort(list(sea_results['density']))[10]

In [None]:
#analysis area list
# see https://epsg.io/25832-1149 for epsg 25832 projection seems fine, but maybe find someone who has more experience with projections
arealist = sea_results.to_crs(epsg =25832).area
# print(arealist.type)
areas=[]
for _, area in arealist.items():
    areas.append(area/10**6)

    
areas = np.array(areas)
print(areas.size)

In [None]:
fig, ax = plt.subplots()
ax.plot(np.sort(areas),'o')
mean_area=np.mean(areas)#np.sort(areas)[1000:-1000])
print(mean_area)

In [None]:
bins, pdf = make_PDF(np.array(list(sea_results['density'])),nbins=200000000,norm=True)
# mean_area=np.mean(areas)
# print(mean_area)
fig, ax = plt.subplots()
ax.plot((bins+bins[1]),pdf[:],'--o',color='navy')
ax.axvline(1,color='grey',zorder=-5)
# ax.set_xscale('log')
ax.set_xlabel('area / $\\langle$ area $\\rangle$')
ax.set_ylabel('PDF')
ax.set_xlim(0,0.2)
ax.axvline(1/36.129062164	)

In [None]:
binold = bins
pdfold = pdf
mean_area_old = 225

In [None]:
fig, ax = plt.subplots()
ax.plot((bins[1:]+bins[1])/mean_area,pdf[1:],'--o',color='navy')
ax.plot((binold[1:]+binold[1])/mean_area_old,pdfold[1:],'--o',color='red')


# testing initial position 
The vonoroi cells at early timesteps where not hexagonal which was suspisous? So below I check this with making a subset hexagonal distribution. Found a mistake in the initialization which I updated and now the vonoroi cells also become hexagons 



In [None]:
# needed packages
#update reading in packages when rerunning this cell
%load_ext autoreload
%autoreload 2

import h3
sys.path.append("/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/release")
import h3_tools
import matplotlib.pyplot as plt
import cartopy as cart



In [None]:


# set particles on hexagonal grid in region
seagdf = gpd.GeoDataFrame.from_file('/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/NWES_sea_mask.geojson')
NWES_domain = { 
    "type":"Polygon",
    "coordinates": [
   [[9.977004051208496-0.5,61.28188705444336-0.5],
     [-15.996014595031738+0.5,61.28188705444336-0.5],
     [-15.996014595031738+0.5,46.00364303588867+0.5],
     [9.977004051208496-0.5,46.00364303588867+0.5],
     [9.977004051208496-0.5,61.28188705444336-0.5]]
     ]}

NWES_domain_Flipped = {
      "type": "Polygon",
      "coordinates": [[[lat, lon] for lon, lat in NWES_domain["coordinates"][0]]]}
NWESParticles = h3_tools.initGrid(NWES_domain, h3_res=3)
NWES_shape = shape(NWES_domain)
print(NWESParticles.centroid_lons[0])
# print(f"Number of particles: {NWESParticles.size}")

# # plot partciles together with region
# fig = plt.figure(figsize=(10, 8))
# ax = plt.axes(projection=cart.crs.PlateCarree())
# ax.add_feature(cart.feature.LAND)
# ax.add_feature(cart.feature.OCEAN)
# ax.add_feature(cart.feature.COASTLINE)
# ax.add_feature(cart.feature.BORDERS, linestyle=':')
# ax.gridlines(draw_labels=True, linestyle='--', color='gray', alpha=0.5, linewidth=0.5)

# ax.scatter(NWESParticles.centroid_lons, NWESParticles.centroid_lats, transform=cart.crs.PlateCarree(), s=0.5, c='r')
# ax.add_geometries([NWES_shape], cart.crs.PlateCarree(), facecolor='lightblue', edgecolor='black', alpha=0.5)
# ax.set_xlim(-20,15)
# ax.set_ylim(40,70)

# plt.show()

# #set mask for new dataset
mask = xr.open_dataset('/storage/shared/oceanparcels/input_data/CopernicusMarineService/NORTHWESTSHELF_ANALYSIS_FORECAST_PHY_004_013/CMEMS_v6r1_NWS_PHY_NRT_NL_01hav3D_20231204_20231204_R20231205_HC01.nc').isel(time=0).isel(depth=0)
lats, lons = np.meshgrid(mask.latitude.values,mask.longitude.values,indexing='ij') 
full_water =~np.isnan(mask.uo.values.T)

# fig = plt.figure(figsize=(10, 8))
# ax = plt.axes(projection=cart.crs.PlateCarree())
# ax.add_feature(cart.feature.LAND)
# ax.add_feature(cart.feature.OCEAN)
# ax.add_feature(cart.feature.COASTLINE)
# ax.add_feature(cart.feature.BORDERS, linestyle=':')
# ax.gridlines(draw_labels=True, linestyle='--', color='gray', alpha=0.5, linewidth=0.5)

# ax.scatter(lons[full_water.T], lats[full_water.T], transform=cart.crs.PlateCarree(), s=0.5, c='r')
# ax.add_geometries([NWES_shape], cart.crs.PlateCarree(), facecolor='lightblue', edgecolor='black', alpha=0.5)
# ax.set_xlim(-20,15)
# ax.set_ylim(40,70)

# plt.show()

# # use mask on particles
NWESParticles.mask(lons, lats, full_water.T)

# fig = plt.figure(figsize=(10, 8))
# ax = plt.axes(projection=cart.crs.PlateCarree())
# ax.add_feature(cart.feature.LAND)
# ax.add_feature(cart.feature.OCEAN)
# ax.add_feature(cart.feature.COASTLINE)
# ax.add_feature(cart.feature.BORDERS, linestyle=':')
# ax.gridlines(draw_labels=True, linestyle='--', color='gray', alpha=0.5, linewidth=0.5)

# ax.scatter(NWESParticles.centroid_lons, NWESParticles.centroid_lats, transform=cart.crs.PlateCarree(), s=0.5, c='r')
# ax.add_geometries([NWES_shape], cart.crs.PlateCarree(), facecolor='lightblue', edgecolor='black', alpha=0.5)

# ax.set_xlim(-20,15)
# ax.set_ylim(40,70)

# plt.show()
print(f"Number of particles: {NWESParticles.size}")

In [None]:
NWESParticles.centroid_lons

In [None]:
pointlist_initial = [Point(lon, lat) for lon, lat  in zip(NWESParticles.centroid_lons, NWESParticles.centroid_lats)]

In [None]:
seeds_initial = gpd.GeoDataFrame(
    {"geometry": pointlist_initial},
    index=list(range(len(pointlist_initial))),
    crs=WGS84_CRS,
)
print(seeds_initial)

In [None]:
vr_initial = VoronoiRegionalizer(seeds=seeds_initial)
sea_initial_results = vr_initial.transform(gdf = seagdf)

In [None]:
change_units = sea_initial_results.to_crs(epsg=3395)
sea_initial_results["area"] = change_units.geometry.area/1E6
sea_initial_results = sea_initial_results.sort_index()
print(sea_initial_results["area"].min())
print(sea_initial_results["area"].max())

In [None]:
import matplotlib.cm as cm
import matplotlib.colors as colors 
import branca.colormap as cm_branca

# Normalize area values to range between 0 and 1
vmin, vmax = sea_initial_results["area"].min(), sea_initial_results["area"].max()
vmin = 30000
vmax = 40000
norm = colors.Normalize(vmin=vmin, vmax=vmax)

color_list = [colors.to_hex(cm.viridis(norm(v))) for v in np.linspace(vmin, vmax, 12)]

colormap = cm_branca.LinearColormap(
    colors=color_list,
    vmin=vmin,
    vmax=vmax,
    caption="Polygon Area"
)
# norm = colors.Normalize(vmin=12400, vmax=20000)

# Generate colors based on normalized area values
sea_initial_results["color"] = sea_initial_results["area"].apply(lambda x: colors.to_hex(cm.viridis(norm(x))))

# Use the 'color' column in your plot function
folium_map = plot_regions(
    sea_initial_results,
    colormap=list(sea_initial_results["color"]),  # Passing as a list of colors
    tiles_style="CartoDB positron"
)


colormap.add_to(folium_map)
# Add colormap to the map for reference
folium_map

In [None]:
sea_initial_results.sort_index()

In [None]:
folium_map = plot_regions(
    sea_initial_results, 
    colormap=colormap, 
    tiles_style="CartoDB positron"
)

# Add the polygons with color based on area
sea_initial_results.explore(
    m=folium_map,
    style_kwds=lambda feature: {
        "color": "#444",
        "opacity": 0,
        "fillColor": colormap(feature["properties"]["area"]),
        "fillOpacity": 1
    },
    marker_kwds=dict(radius=1),
)

# Add colormap to the map for reference
colormap.add_to(folium_map)