see https://kraina-ai.github.io/srai/latest/examples/regionalizers/voronoi_regionalizer/

If particles are at the same place (or too close) it gives error so deleted particles that end up at the same place (however these are only a minority)


In [None]:
#update reading in packages when rerunning this cell
%load_ext autoreload
%autoreload 2

import geopandas as gpd
import numpy as np

from shapely.geometry import Point, shape, Polygon, MultiPolygon
from shapely import geometry
from shapely.ops import unary_union
from srai.loaders.osm_loaders import OSMOnlineLoader
from srai.loaders.osm_loaders.filters.popular import get_popular_tags
from functional import seq

from srai.constants import WGS84_CRS
from srai.plotting.folium_wrapper import plot_regions
from srai.regionalizers import VoronoiRegionalizer, geocode_to_region_gdf, AdministrativeBoundaryRegionalizer
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors 

import cartopy.crs as ccrs
import cartopy.feature as cfeature


import branca.colormap as cm_branca
import sys
sys.path.append("/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/src")
from analysis_functions import  make_PDF, make_lognormal_PDF
from voronoi_functions import * 

from datetime import datetime, timedelta
import xarray as xr

# set plotscale
plt.style.use('../python_style_Meike.mplstyle')

## create sea mask polygon

In [None]:
# importing GEOjson file with boundaries of all countries
# data downloaded from https://ec.europa.eu/eurostat/web/gisco/geodata/administrative-units/countries (make sure you download data in EPSG:4326 =WGS84 projection)

europe = gpd.read_file("/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/europe.geojson")
# list of countries in the domain
country_names = ['France','Germany','Denmark','Belgium','Netherlands','Bailiwick of Jersey','Jersey','Guernsey','Bailiwick of Guernsey','Isle of Man','United Kingdom','Ireland','Norway','Switzerland','Luxembourg','Italy','Liechtenstein','Austria']

lon_min = -15.996014595031738+0.5
lon_max = 9.977004051208496-0.5
lat_min = 46.00364303588867+0.5
lat_max = 61.28188705444336-0.5

domaingdf = create_simulation_domain(
    lon_min = lon_min,
    lon_max = lon_max,
    lat_min = lat_min,
    lat_max = lat_max
    )

seagdf = create_sea_mask_polygon(region = domaingdf, land_boundaries = europe, countries_name_engl = country_names)
# seagdf.explore()
seagdf.to_file('/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/NWES_sea_mask.geojson', driver='GeoJSON') 
domaingdf.to_file('/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/NWES_sim_domain.geojson', driver='GeoJSON') 


## Or read in already created seamask polygon

In [None]:

seagdf = gpd.GeoDataFrame.from_file('/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/NWES_sea_mask.geojson')
# seagdf.explore()

## create vonoroi for simulation data data

### preprocess data

In [None]:
# import data (test dataset is tracer september)
base_directory = '/storage/shared/oceanparcels/output_data/data_Meike/MR_advection/NWES/'
input_file_tracer_base = (base_directory + '{particle_type}/{loc}_start{y_s:04d}_{m_s:02d}_{d_s:02d}'
                   '_end{y_e:04d}_{m_e:02d}_{d_e:02d}_RK4_{land_handling}.zarr')

runtime =  timedelta(days=30)# timedelta(days=10)
loc = 'NWES'
runtime=timedelta(30)
nparticles = 88347 # 52511
chunck_time = 100
land_handling = 'anti_beaching'
coriolis = True
starttime = datetime(2023, 9, 1, 0, 0, 0, 0)
endtime = starttime + runtime

input_file_tracer = input_file_tracer_base.format(loc=loc,
                                                        y_s=starttime.year,
                                                        m_s=starttime.month,
                                                        d_s=starttime.day,
                                                        y_e=endtime.year,
                                                        m_e=endtime.month,
                                                        d_e=endtime.day,
    
                                                        land_handling = land_handling, 
                                                        cor_on = coriolis,
                                                        particle_type = 'tracer')
print(input_file_tracer)
ds = xr.open_dataset(input_file_tracer,
                        engine='zarr',
                        chunks={'trajectory':nparticles, 'obs':chunck_time},
                        drop_variables=['B','tau','z'],
                        decode_times=False) #,decode_cf=False)




In [None]:
inputfile =  '/storage/shared/oceanparcels/output_data/data_Meike/MR_advection/NWES/inertial_SM_Rep_constant/NWES_start2023_09_01_end2023_10_01_RK4__Rep_0000_B0680_tau2994_anti_beaching_cor_True_gradient_True.zarr'

ds = xr.open_dataset(inputfile,
                        engine='zarr',
                        chunks={'trajectory':nparticles, 'obs':chunck_time},
                        drop_variables=['B','tau','z'],
                        decode_times=False) #,decode_cf=False)


In [None]:
import random 
T=719
pointlist = from_dataset_to_points(ds = ds, T= T, sea_domain=seagdf)
pointlist_selection = random.sample(pointlist, 1000)
seedsgdf = make_unique_seeds(points = pointlist_selection)

In [None]:
voronoi_cells = make_regional_voronoi_tesselation(unique_seeds=seedsgdf, sea_region=seagdf)


In [None]:
outputfile = '/storage/shared/oceanparcels/output_data/data_Meike/MR_advection/NWES/inertial_SM_Rep_constant/voronoi_data/NWES_start2023_09_01_T0719h_Rep_0000B0680_tau2994_cor_True_gradient_True.geojson'
voronoi_cells.to_file(outputfile, driver='GeoJSON') 

### Create vonoroi diagrams and calculate their area

### Create vonoroi diagrams and calculate their area

In [None]:
sea_results.to_file('data/geojson_sea_results.geojson', driver='GeoJSON') 

In [None]:
sea_results.to_file('data/shp_sea_result.shp') 

In [None]:
sea_results.to_file('data/gpkg_sea_result.gpkg', driver='GPKG', layer='name')  

In [None]:
sea_results.to_parquet('data/parquet_sea_ressults.parquet')

In [None]:
test_read = gpd.read_file('data/test_sea_result.shp')

In [None]:
test_read_parquet = gpd.read_parquet('data/parquet_sea_ressults.parquet')

In [None]:
# import python modules
import os

# directory name from which
# we are going to extract our files with its size
path = "/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/analysis/data"

# Get list of all files only in the given directory
fun = lambda x : os.path.isfile(os.path.join(path,x))
files_list = filter(fun, os.listdir(path))

# Create a list of files in directory along with the size
size_of_file = [
	(f,os.stat(os.path.join(path, f)).st_size)
	for f in files_list
]

# Iterate over list of files along with size 
# and print them one by one.
for f,s in size_of_file:
	print("{} : {}MB".format(f, round(s/(1024*1024),3)))

#  parquet is smallest so will use that one to save data (still big though)

In [None]:

# Normalize area values to range between 0 and 1
vmin, vmax = sea_results["density"].min(), sea_results["density"].max()
vmin = 0.1
vmax = 100
norm = colors.LogNorm(vmin=vmin, vmax=vmax)

color_list = [colors.to_hex(cm.magma(norm(v))) for v in np.linspace(vmin, vmax, 12)]

colormap = cm_branca.LinearColormap(
    colors=color_list,
    vmin=vmin,
    vmax=vmax,
    caption="Polygon Area"
)
# norm = colors.Normalize(vmin=12400, vmax=20000)

# Generate colors based on normalized area values
sea_results["color"] = sea_results["area"].apply(lambda x: colors.to_hex(cm.magma(norm(x))))

# Use the 'color' column in your plot function
folium_map = plot_regions(
    sea_results,
    colormap=list(sea_results["color"]),  # Passing as a list of colors
    tiles_style="CartoDB positron",
    show_borders=False
)

# sea_results.explore(
#     m=folium_map,
#     style_kwds=dict(
#         color="#444",  # Border color (can be ignored if weight is 0)
#         weight=0,  # Set the line width to zero to remove borders
#         fillOpacity=1
#     )
# )


colormap.add_to(folium_map)
# # Add colormap to the map for reference
folium_map

In [None]:
fig, ax = plt.subplots(figsize=(25, 15), subplot_kw={"projection": ccrs.PlateCarree()})
ax.add_feature(cfeature.LAND, edgecolor="black", color="lightgray")
plot_voronoi(fig = fig,ax = ax, 
             voronoi_cells = voronoi_cells , 
             color_scale_type= 'area', 
             colormap = cm.magma, 
             vmin = 0.01, 
             vmax = 10, 
             colormap_scale='log')

gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
            linewidth=0, color='gray', alpha=0.5, linestyle='--')
gl.top_labels = False
gl.right_labels = False
gl.xlabel_style = {'size': 15}
gl.ylabel_style =  {'size': 15}

In [None]:
fig, ax = plt.subplots()
bins, pdf = make_PDF(np.array(voronoi_cells['density'].values),nbins=100, norm = True, vmin = 0, vmax = 0.01)
ax.plot(bins,pdf,'--o')
# ax.set_xscale('log')


In [None]:
logbins, logpdf = make_lognormal_PDF(np.array(voronoi_cells['density'].values),nbins=100, norm = True)
plt.plot(logbins,logpdf,'--o')
plt.xscale('log')

In [None]:
pdf, bin_edges = np.histogram(np.array(voronoi_cells['area'].values), bins=1000)#s, range=(vmin, vmax))

In [None]:
min = 0
max = 15 
nbins = 16
test = np.array([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15])

dx = (max - min) / nbins
pdf, bin_edges = np.histogram(test, bins=nbins, range = (min,max))
bins = bin_edges[:-1]+0.5*dx
norm = test.size
plt.plot(bins,pdf/norm)


In [None]:
# geometric mean (I think better)
print(np.exp(np.mean(np.log(np.array(voronoi_cells['density'].values)))))
print(np.exp(np.mean(np.log(np.array(voronoi_cells['area'].values)))))


In [None]:
mean_density = np.mean(np.sort(np.array(voronoi_cells['density'].values))[1:-1])
mean_area = np.mean(np.sort(np.array(voronoi_cells['area'].values))[1:-1])
print(f'<rho> = {mean_density}, 1/<A> = {1/mean_area}')
print(f'<A> = {mean_area}, 1/<rho> = {1/mean_density}')

In [None]:
voronoi_cells

In [None]:
minx, miny, maxx, maxy = seagdf.total_bounds
print(minx)
print(miny)

In [None]:
seedsgdf

In [None]:
#analysis area list
# see https://epsg.io/25832-1149 for epsg 25832 projection seems fine, but maybe find someone who has more experience with projections
arealist = sea_results.to_crs(epsg =25832).area
# print(arealist.type)
areas=[]
for _, area in arealist.items():
    areas.append(area/10**6)

    
areas = np.array(areas)
print(areas.size)

In [None]:
fig, ax = plt.subplots()
ax.plot(np.sort(areas),'o')
mean_area=np.mean(areas)#np.sort(areas)[1000:-1000])
print(mean_area)

In [None]:
bins, pdf = make_PDF(np.array(list(sea_results['density'])),nbins=200000000,norm=True)
# mean_area=np.mean(areas)
# print(mean_area)
fig, ax = plt.subplots()
ax.plot((bins+bins[1]),pdf[:],'--o',color='navy')
ax.axvline(1,color='grey',zorder=-5)
# ax.set_xscale('log')
ax.set_xlabel('area / $\\langle$ area $\\rangle$')
ax.set_ylabel('PDF')
ax.set_xlim(0,0.2)
ax.axvline(1/36.129062164	)

In [None]:
binold = bins
pdfold = pdf
mean_area_old = 225

In [None]:
fig, ax = plt.subplots()
ax.plot((bins[1:]+bins[1])/mean_area,pdf[1:],'--o',color='navy')
ax.plot((binold[1:]+binold[1])/mean_area_old,pdfold[1:],'--o',color='red')


# testing initial position 
The vonoroi cells at early timesteps where not hexagonal which was suspisous? So below I check this with making a subset hexagonal distribution. Found a mistake in the initialization which I updated and now the vonoroi cells also become hexagons 



In [None]:
# needed packages
#update reading in packages when rerunning this cell
%load_ext autoreload
%autoreload 2

import h3
sys.path.append("/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/release")
import h3_tools
import matplotlib.pyplot as plt
import cartopy as cart



In [None]:


# set particles on hexagonal grid in region
seagdf = gpd.GeoDataFrame.from_file('/nethome/4291387/Maxey_Riley_advection/Maxey_Riley_advection/input_data/NWES_sea_mask.geojson')
NWES_domain = { 
    "type":"Polygon",
    "coordinates": [
   [[9.977004051208496-0.5,61.28188705444336-0.5],
     [-15.996014595031738+0.5,61.28188705444336-0.5],
     [-15.996014595031738+0.5,46.00364303588867+0.5],
     [9.977004051208496-0.5,46.00364303588867+0.5],
     [9.977004051208496-0.5,61.28188705444336-0.5]]
     ]}

NWES_domain_Flipped = {
      "type": "Polygon",
      "coordinates": [[[lat, lon] for lon, lat in NWES_domain["coordinates"][0]]]}
NWESParticles = h3_tools.initGrid(NWES_domain, h3_res=3)
NWES_shape = shape(NWES_domain)
print(NWESParticles.centroid_lons[0])
# print(f"Number of particles: {NWESParticles.size}")

# # plot partciles together with region
# fig = plt.figure(figsize=(10, 8))
# ax = plt.axes(projection=cart.crs.PlateCarree())
# ax.add_feature(cart.feature.LAND)
# ax.add_feature(cart.feature.OCEAN)
# ax.add_feature(cart.feature.COASTLINE)
# ax.add_feature(cart.feature.BORDERS, linestyle=':')
# ax.gridlines(draw_labels=True, linestyle='--', color='gray', alpha=0.5, linewidth=0.5)

# ax.scatter(NWESParticles.centroid_lons, NWESParticles.centroid_lats, transform=cart.crs.PlateCarree(), s=0.5, c='r')
# ax.add_geometries([NWES_shape], cart.crs.PlateCarree(), facecolor='lightblue', edgecolor='black', alpha=0.5)
# ax.set_xlim(-20,15)
# ax.set_ylim(40,70)

# plt.show()

# #set mask for new dataset
mask = xr.open_dataset('/storage/shared/oceanparcels/input_data/CopernicusMarineService/NORTHWESTSHELF_ANALYSIS_FORECAST_PHY_004_013/CMEMS_v6r1_NWS_PHY_NRT_NL_01hav3D_20231204_20231204_R20231205_HC01.nc').isel(time=0).isel(depth=0)
lats, lons = np.meshgrid(mask.latitude.values,mask.longitude.values,indexing='ij') 
full_water =~np.isnan(mask.uo.values.T)

# fig = plt.figure(figsize=(10, 8))
# ax = plt.axes(projection=cart.crs.PlateCarree())
# ax.add_feature(cart.feature.LAND)
# ax.add_feature(cart.feature.OCEAN)
# ax.add_feature(cart.feature.COASTLINE)
# ax.add_feature(cart.feature.BORDERS, linestyle=':')
# ax.gridlines(draw_labels=True, linestyle='--', color='gray', alpha=0.5, linewidth=0.5)

# ax.scatter(lons[full_water.T], lats[full_water.T], transform=cart.crs.PlateCarree(), s=0.5, c='r')
# ax.add_geometries([NWES_shape], cart.crs.PlateCarree(), facecolor='lightblue', edgecolor='black', alpha=0.5)
# ax.set_xlim(-20,15)
# ax.set_ylim(40,70)

# plt.show()

# # use mask on particles
NWESParticles.mask(lons, lats, full_water.T)

# fig = plt.figure(figsize=(10, 8))
# ax = plt.axes(projection=cart.crs.PlateCarree())
# ax.add_feature(cart.feature.LAND)
# ax.add_feature(cart.feature.OCEAN)
# ax.add_feature(cart.feature.COASTLINE)
# ax.add_feature(cart.feature.BORDERS, linestyle=':')
# ax.gridlines(draw_labels=True, linestyle='--', color='gray', alpha=0.5, linewidth=0.5)

# ax.scatter(NWESParticles.centroid_lons, NWESParticles.centroid_lats, transform=cart.crs.PlateCarree(), s=0.5, c='r')
# ax.add_geometries([NWES_shape], cart.crs.PlateCarree(), facecolor='lightblue', edgecolor='black', alpha=0.5)

# ax.set_xlim(-20,15)
# ax.set_ylim(40,70)

# plt.show()
print(f"Number of particles: {NWESParticles.size}")

In [None]:
NWESParticles.centroid_lons

In [None]:
pointlist_initial = [Point(lon, lat) for lon, lat  in zip(NWESParticles.centroid_lons, NWESParticles.centroid_lats)]

In [None]:
seeds_initial = gpd.GeoDataFrame(
    {"geometry": pointlist_initial},
    index=list(range(len(pointlist_initial))),
    crs=WGS84_CRS,
)
print(seeds_initial)

In [None]:
vr_initial = VoronoiRegionalizer(seeds=seeds_initial)
sea_initial_results = vr_initial.transform(gdf = seagdf)

In [None]:
change_units = sea_initial_results.to_crs(epsg=3395)
sea_initial_results["area"] = change_units.geometry.area/1E6
sea_initial_results = sea_initial_results.sort_index()
print(sea_initial_results["area"].min())
print(sea_initial_results["area"].max())

In [None]:
import matplotlib.cm as cm
import matplotlib.colors as colors 
import branca.colormap as cm_branca

# Normalize area values to range between 0 and 1
vmin, vmax = sea_initial_results["area"].min(), sea_initial_results["area"].max()
vmin = 30000
vmax = 40000
norm = colors.Normalize(vmin=vmin, vmax=vmax)

color_list = [colors.to_hex(cm.viridis(norm(v))) for v in np.linspace(vmin, vmax, 12)]

colormap = cm_branca.LinearColormap(
    colors=color_list,
    vmin=vmin,
    vmax=vmax,
    caption="Polygon Area"
)
# norm = colors.Normalize(vmin=12400, vmax=20000)

# Generate colors based on normalized area values
sea_initial_results["color"] = sea_initial_results["area"].apply(lambda x: colors.to_hex(cm.viridis(norm(x))))

# Use the 'color' column in your plot function
folium_map = plot_regions(
    sea_initial_results,
    colormap=list(sea_initial_results["color"]),  # Passing as a list of colors
    tiles_style="CartoDB positron"
)


colormap.add_to(folium_map)
# Add colormap to the map for reference
folium_map

In [None]:
sea_initial_results.sort_index()

In [None]:
folium_map = plot_regions(
    sea_initial_results, 
    colormap=colormap, 
    tiles_style="CartoDB positron"
)

# Add the polygons with color based on area
sea_initial_results.explore(
    m=folium_map,
    style_kwds=lambda feature: {
        "color": "#444",
        "opacity": 0,
        "fillColor": colormap(feature["properties"]["area"]),
        "fillOpacity": 1
    },
    marker_kwds=dict(radius=1),
)

# Add colormap to the map for reference
colormap.add_to(folium_map)

In [None]:
# test reading in data from script 
test = gpd.GeoDataFrame.from_file('/storage/shared/oceanparcels/output_data/data_Meike/MR_advection/NWES/tracer/voronoi_data/NWES_start2023_09_01_T0719h.geojson')

In [None]:
bins, pdf  = make_lognormal_PDF(test['density'].values,nbins =100, norm =True)
fig, ax = plt.subplots()
ax.plot(bins, pdf,'--o')
ax.set_xscale('log')