####################################
# Our first attempts of detecting heat waves using Daymet Data - Hawaii region
####################################

In [1]:
import dask_gateway

cluster = dask_gateway.GatewayCluster()
client = cluster.get_client()
cluster.scale(40)
print(cluster.dashboard_link)

/services/dask-gateway/clusters/prod.0a37b8122baf41788f3dcc01ddf5e838/status


In [2]:
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
from matplotlib.patches import Rectangle

from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import fsspec

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = 12,8

In [4]:
################################
# Read the dataset (only reads the metadata) - HAWAII
################################

# Ref: dataset at
# https://daac.ornl.gov/DAYMET/guides/Daymet_Daily_V4.html#datasetoverview

import pystac
import fsspec
import xarray as xr

account_name = "daymeteuwest"
container_name = "daymet-zarr"

collection = pystac.Collection.from_file(
    "https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-daily-hi"
)
asset = collection.assets["zarr-https"]

store = fsspec.get_mapper(asset.href)
ds = xr.open_zarr(store, **asset.extra_fields["xarray:open_kwargs"])

arr_tmax = ds['tmax'].values
print('shape z,y,x=', arr_tmax.shape)

shape z,y,x= (14965, 584, 284)


In [3]:
################################
# PART II - run the algorithm for each x,y coordinates
################################

# # algorithm coefs:
TEMP_DIFF_THRESHOLD = 1 # Celcius (or K)
PERSISTED_FOR_MIN = 3 # days
    
def flag_heat_periods(iX:int, iY:int) -> np.array:
    """
    # This is a slightly different logic then in the first version. 
    # It 'flags' the time array with True:a day was a part of the 
    # 'long' and 'hot' windows
    # those fit into our heat extreme criteria, and False: not.
    # Again it runs for a given, single location (pixel)
    """
    
    xr_tmax = ds['tmax'].isel(x=iX, y=iY) 
    xr_tmax_avg = xr_tmax.rolling(time=15, center=True).mean()
    xr_tmax_diff = xr_tmax - xr_tmax_avg

    df_tmax = pd.DataFrame(xr_tmax.to_pandas())
    df_avg = pd.DataFrame(xr_tmax_avg.to_pandas())
    df_diff = pd.DataFrame(xr_tmax_diff.to_pandas())

    #############
    # algorithm
    #############

    df = pd.concat([df_tmax, df_avg, df_diff], axis=1)
    df.columns = ['tmax', 'mov_avg', 'diff']

    df['hot'] = df['diff'] > TEMP_DIFF_THRESHOLD 
    df['label'] = df['hot'].diff().ne(False).cumsum()
    df = df.reset_index().reset_index()
    summer_months = [5,6,7,8,9]
    df['isSummer'] = df['time'].dt.month.isin(summer_months)

    dff = df[df['isSummer'] & df['hot']].dropna(subset=['diff']) 

    dfg = dff.groupby('label').agg({
        'index':[np.min,np.max,len],  
    })
    dfg.columns = ['i1','i2','count']
    dfg = dfg[dfg['count'] >= PERSISTED_FOR_MIN]
    dfg = dfg.drop('count', axis=1)
    dfg = dfg.reset_index(drop=True)
    
    arr = np.zeros((len(tmax),), dtype=int) 
    for _, (i, j) in dfg.iterrows():
        arr[i:j+1] = 1 # +1 because does not index inclusively

    return arr

In [78]:
%%time
################################
# Populate a 3D array with flagged heat events
################################

tmax = ds['tmax']

# create grid for all x,y coordinate pairs [0,1],[0,2],..[283,583]
shape_yx = tmax.data.shape[1:] # np.shape order -> zyx
arr_y = np.arange(shape_yx[0])
arr_x = np.arange(shape_yx[1])
all_coordinates = np.array(np.meshgrid(arr_x, arr_y)).T.reshape(-1, 2)

# create empty 3D array to fill with heat event flags
arr3d = np.zeros(tmax.data.shape).astype(int)

# loop through all iX,iY pairs
for i, j in all_coordinates:
    
    no_data = np.isnan(tmax.isel(x=i, y=j).values).all()
    if no_data:
        arr1d = np.zeros((len(tmax),), dtype=int)
    else:
        arr1d = flag_heat_periods(i, j)

    arr3d[:,j,i] = arr1d  

np.save('./arr3d.npy', arr3d) # persist expensive output
arr3d.shape, arr3d.sum() # just to print total number of True's

CPU times: user 8.62 s, sys: 237 ms, total: 8.85 s
Wall time: 1min 49s


((14965, 5, 5), 1532)