In [21]:
import numpy as np
import pandas as pd
import xarray as xr
import dask.array as da


#############################################
# Function
#############################################
def day_cumsum_reaches_threshold_linear(
    degree_days, start_index, start_time_values, threshold
):
    cumsum = np.cumsum(degree_days[start_index:])
    threshold_reached = np.where(cumsum >= threshold)[0]
    if len(threshold_reached) == 0:
        print("error")
        return np.datetime64("NaT", "ns")
    first_reached_index = threshold_reached[0]
    result_date = start_time_values[start_index + first_reached_index]
    return result_date


#############################################
# Input data
#############################################

vday_cumsum_reaches_threshold_linear = np.vectorize(day_cumsum_reaches_threshold_linear)


time = pd.date_range("2000-01-01", periods=50, freq="D").to_numpy(
    dtype="datetime64[ns]"
)
lat = np.linspace(-90, 90, 10)
lon = np.linspace(-180, 180, 10)
degree_days = xr.DataArray(
    da.random.random((10, 10, 50)),
    coords=[lat, lon, time],
    dims=["lat", "lon", "time"],
)
start_dates = xr.DataArray(
    np.random.choice(time[:5], size=(10, 10)), coords=[lat, lon], dims=["lat", "lon"]
)
start_indices = np.array(
    [np.where(degree_days.time.values == d)[0][0] for d in start_dates.values.flatten()]
).reshape(start_dates.shape)
threshold = 15

#############################################
# Apply function
#############################################


result_raw = xr.apply_ufunc(
    day_cumsum_reaches_threshold_linear,
    degree_days,
    start_indices,
    degree_days.time.values.astype("datetime64[ns]"),
    threshold,
    input_core_dims=[["time"], [], ["time"], []],
    output_core_dims=[[]],
    vectorize=True,
    dask="parallelized",
    output_dtypes=["datetime64[ns]"],
)

result_raw.compute()

TypeError: Cannot cast NumPy timedelta64 scalar from metadata [ns] to  according to the rule 'same_kind'

In [22]:
degree_days

Unnamed: 0,Array,Chunk
Bytes,39.06 kiB,39.06 kiB
Shape,"(10, 10, 50)","(10, 10, 50)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 39.06 kiB 39.06 kiB Shape (10, 10, 50) (10, 10, 50) Dask graph 1 chunks in 1 graph layer Data type float64 numpy.ndarray",50  10  10,

Unnamed: 0,Array,Chunk
Bytes,39.06 kiB,39.06 kiB
Shape,"(10, 10, 50)","(10, 10, 50)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [18]:
import dask

dask

<module 'dask' from '/home/thom/miniforge3/envs/fruitflypheno_hdf5/lib/python3.12/site-packages/dask/__init__.py'>

In [12]:
len(result_raw.values[0][0])

TypeError: object of type 'numpy.float64' has no len()

In [None]:
# REPRO EXAMPLE
import numpy as np
import pandas as pd
import xarray as xr
import dask.array as da


# Define a simple function to apply
def example_function(data, index, time_values):
    if index >= len(data):
        return np.nan  # Return NaN if index is out of range
    return time_values[index].view("int64")  # Return as int64 to avoid dtype issues


# Generate example data
time = pd.date_range("2000-01-01", periods=10, freq="D").to_numpy(
    dtype="datetime64[ns]"
)
data = xr.DataArray(
    da.random.random((5, 5, 10), chunks=(5, 5, -1)),  # Random data with time dimension
    coords=[np.arange(5), np.arange(5), time],
    dims=["lat", "lon", "time"],
)
indices = xr.DataArray(
    np.random.randint(0, 10, size=(5, 5)),
    coords=[np.arange(5), np.arange(5)],
    dims=["lat", "lon"],
)

# Apply the function using xr.apply_ufunc
result_raw = xr.apply_ufunc(
    example_function,
    data,
    indices,
    time.astype("datetime64[ns]"),  # Ensure correct dtype
    input_core_dims=[["time"], [], ["time"]],
    output_core_dims=[[]],
    vectorize=True,
    dask="parallelized",
    output_dtypes=[np.float64],  # Output as float64 to handle NaN values
)

# Print the results
print("Raw Result:")
print(result_raw)

In [35]:
import numpy as np
import pandas as pd
import xarray as xr
import dask.array as da


# Define a simple function to apply
def example_function(data, index, time_values):
    if index >= len(data):
        return np.nan  # Return NaN if index is out of range
    return time_values[index]  # Return as int64 to avoid dtype issues


# Generate example data
time = pd.date_range("2000-01-01", periods=10, freq="D").to_numpy(
    dtype="datetime64[ns]"
)
data = xr.DataArray(
    da.random.random((5, 5, 10), chunks=(5, 5, -1)),  # Random data with time dimension
    coords=[np.arange(5), np.arange(5), time],
    dims=["lat", "lon", "time"],
)
indices = xr.DataArray(
    np.random.randint(0, 10, size=(5, 5)),
    coords=[np.arange(5), np.arange(5)],
    dims=["lat", "lon"],
)

# Apply the function using xr.apply_ufunc
result_raw = xr.apply_ufunc(
    example_function,
    data,
    indices,
    time.astype("datetime64[ns]"),  # Ensure correct dtype
    input_core_dims=[["time"], [], ["time"]],
    output_core_dims=[[]],
    vectorize=True,
    dask="parallelized",
    output_dtypes=["datetime64[ns]"],  
)

# Print the results
print("Raw Result:")
result_raw.compute()

Raw Result:


TypeError: Cannot cast NumPy timedelta64 scalar from metadata [ns] to  according to the rule 'same_kind'

In [29]:
result_raw.compute()

TypeError: Cannot cast NumPy timedelta64 scalar from metadata [ns] to  according to the rule 'same_kind'

In [24]:
def convert_to_datetime64_ns(value):
    if np.isnan(value):
        return np.datetime64("NaT", "ns")
    return np.datetime64(int(value), "ns")


result = xr.apply_ufunc(
    np.vectorize(convert_to_datetime64_ns),
    result_raw,
    dask="parallelized",
    output_dtypes=[np.datetime64],
)

In [25]:
result.compute()