In [1]:
import xarray as xr
import numpy as np

In [8]:
def get_dataset_size(ds):
    """Calculate the size of the dataset in memory. Code generated by AI."""
    size_bytes = ds.nbytes
    size_mb = size_bytes / (1024 * 1024)
    size_gb = size_mb / 1024
    return size_bytes, size_mb, size_gb

In [2]:
def generate_xarray_dataset(nx=1531, ny=3836, num_vars=60):
    """test netcdf memory requirements. Code generated by AI."""
    # Create coordinates
    x = np.arange(nx)
    y = np.arange(ny)

    # Initialize an empty dictionary to store variables
    data_vars = {}

    # Generate data for each variable
    for i in range(num_vars):
        var_name = f"var_{i+1:02d}"
        data = np.random.rand(ny, nx)  # Random data with shape (y, x)
        data_vars[var_name] = xr.DataArray(data, dims=["y", "x"])

    # Create the dataset
    ds = xr.Dataset(data_vars=data_vars, coords={"x": x, "y": y})

    return ds


# Generate the dataset
dataset = generate_xarray_dataset()

# Print basic information about the dataset
print(dataset)

# Optionally, you can save the dataset to a netCDF file
# dataset.to_netcdf('my_dataset.nc')

<xarray.Dataset> Size: 3GB
Dimensions:  (x: 1531, y: 3836)
Coordinates:
  * x        (x) int64 12kB 0 1 2 3 4 5 6 ... 1524 1525 1526 1527 1528 1529 1530
  * y        (y) int64 31kB 0 1 2 3 4 5 6 ... 3829 3830 3831 3832 3833 3834 3835
Data variables: (12/60)
    var_01   (y, x) float64 47MB 0.7765 0.6972 0.9397 ... 0.1708 0.9281 0.2102
    var_02   (y, x) float64 47MB 0.7813 0.1953 0.717 ... 0.1385 0.6088 0.9443
    var_03   (y, x) float64 47MB 0.1913 0.2922 0.6537 ... 0.951 0.2383 0.8777
    var_04   (y, x) float64 47MB 0.4951 0.5844 0.1716 ... 0.6754 0.8291 0.7249
    var_05   (y, x) float64 47MB 0.5504 0.0277 0.2132 ... 0.71 0.5847 0.9827
    var_06   (y, x) float64 47MB 0.8139 0.2557 0.9237 ... 0.8163 0.9135 0.2995
    ...       ...
    var_55   (y, x) float64 47MB 0.5073 0.8398 0.844 ... 0.7439 0.7439 0.7681
    var_56   (y, x) float64 47MB 0.6664 0.1288 0.9335 ... 0.1832 0.6852 0.1857
    var_57   (y, x) float64 47MB 0.2302 0.05561 0.2571 ... 0.6241 0.4211 0.05165
    var_58   (y,

In [9]:
size_bytes, size_mb, size_gb = get_dataset_size(dataset)
print(f"\nDataset size in memory:")
print(f"  {size_bytes:,} bytes")
print(f"  {size_mb:.2f} MB")
print(f"  {size_gb:.2f} GB")


Dataset size in memory:
  2,819,042,616 bytes
  2688.45 MB
  2.63 GB
