In [None]:
import numpy as np
import xarray as xr

In [3]:
# Create sample data: 3x3 grid with 30 days
lat = np.arange(3)
lon = np.arange(3)
time = np.arange(30)

# Generate random events (different count per location)
data = np.random.random((3, 3, 30)) > 0.8
da = xr.DataArray(data, coords=[lat, lon, time], dims=["lat", "lon", "time"])

In [21]:
# case VLType from netCDF4
import numpy as np
from netCDF4 import Dataset, VLType
import os
import tempfile

# Create a temporary directory to ensure write permissions
temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, 'event_positions.nc')

try:
    # Create a new NetCDF file with HDF5 format
    nc = Dataset(file_path, 'w', format='NETCDF4')

    # Define dimensions
    lat_dim = nc.createDimension('lat', len(lat))
    lon_dim = nc.createDimension('lon', len(lon))

    # Use the data from the previous cell
    sample_data = data  # Using the boolean array from previous cell

    # Create VLEN type for variable-length arrays of integers
    # VLType requires 3 arguments: dataset, datatype, and a name for the type
    vlen_type = VLType(nc, np.int32, 'event_indices')

    # Create variable with VLEN type
    event_positions = nc.createVariable('event_positions', vlen_type, ('lat', 'lon'))
    event_positions.long_name = 'Time positions of events'
    event_positions.units = 'time index'

    # Fill with event positions
    for i in range(len(lat)):
        for j in range(len(lon)):
            # Get positions of True values
            indices = np.where(sample_data[i, j, :])[0]
            # Store as variable length array
            event_positions[i, j] = indices

    # Create a variable for event counts
    event_counts = nc.createVariable('event_counts', np.int32, ('lat', 'lon'))
    event_counts.long_name = 'Number of events'
    event_counts.units = 'count'

    # Fill with counts
    for i in range(len(lat)):
        for j in range(len(lon)):
            event_counts[i, j] = np.sum(sample_data[i, j, :])

    nc.close()
    print(f"Successfully saved NetCDF file to: {file_path}")
except Exception as e:
    print(f"Error saving NetCDF file: {str(e)}")

Successfully saved NetCDF file to: /tmp/event_positions.nc


In [None]:
ds_vlt = xr.open_dataset(file_path)
ds_vlt

In [29]:
ds_vlt.event_positions[0, 0]

In [None]:
# Create empty object array
events_array = np.empty((3, 3), dtype=object)

# Fill with arrays of different lengths
for i in range(3):
    for j in range(3):
        events_array[i, j] = np.random.choice(30, size=np.random.randint(1, 10))

# Create xarray DataArray with object dtype
da = xr.DataArray(
    events_array,
    coords={'lat': range(3), 'lon': range(3)},
    dims=['lat', 'lon']
)

# This works fine in memory
print(da.sel(lat=1, lon=2))

<xarray.DataArray ()> Size: 8B
array(array([12, 13, 24, 21, 23, 13]), dtype=object)
Coordinates:
    lat      int64 8B 1
    lon      int64 8B 2
