In [29]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob as glob
import datetime as dt

from metpy.calc import relative_humidity_from_dewpoint
from metpy.units import units
import metpy

In [3]:
# This function calculate webulb temperature. It asks for temperature in celcius, and relative humidity in %
# ex: tmp = 20, rh = 50
def Wetbulb(tmp, rh):
    wetbulb_tmp = ( tmp * np.arctan(0.151977 * (rh + 8.313659)**(1/2)) ) + np.arctan(tmp + rh) - np.arctan(rh - 1.676331) + (0.00391838 * (rh)**(3/2)) * np.arctan(0.023101 * rh) - 4.686035
    return wetbulb_tmp

def Heat_Index(T, RH):
    """
    https://www.wpc.ncep.noaa.gov/html/heatindex_equation.shtml

    Calculates heat index for an array
    
    Inputs:
        RH (DataArray) - Should be in decimal format
        T  (DataArray) - Should be in Kelvins
        
    Outputs:
        hi_alone (DataArray) - Heat index array (in F)
    """
    # Convert to Fahrenheit
    T_F = ((T - 273.15) * 1.8) + 32

    # Convert to relative humidity
    RH_p = RH * 100
    RH_p = RH_p.rename('relative_humidity')
    
    # Standard heat index
    heat_index = 0.5 * (T_F + 61.0 + ((T_F-68.0)*1.2) + (RH_p*0.094))
    heat_index = heat_index.rename('heat_index')

    # Combining temperature, relative humidity, and heat index into a dataset
    hi_set = xr.combine_by_coords((heat_index,T_F,RH_p))
        
    # Heat index for heat index above 80
    heat_index_80 = (-42.379 + 2.04901523*T_F + 10.14333127*RH_p - 0.22475541*T_F*RH_p 
          - 6.83783e-3*T_F**2 - 5.481717e-2*RH_p**2 + 1.22874e-3*T_F**2*RH_p 
          + 8.5282e-4*T_F*RH_p**2 - 1.99e-6*T_F**2*RH_p**2)
    hi_set['heat_index>80'] = heat_index_80
    
    # Replacing heat indices above 80 with the new equation
    hi_set['heat_index'] = xr.where(hi_set['heat_index']>80,
                                    hi_set['heat_index>80'],
                                    hi_set['heat_index']
                                    )
    
    # Heat index for relative humidity under 13% and temps between 80 and 112 F
    heat_index_13 = heat_index_80 - ((13-RH_p)/4) * np.sqrt((17 - abs(T_F - 95))/17)
    hi_set['heat_index_RH<13'] = heat_index_13
    
    hi_set['heat_index'] = xr.where(((hi_set['relative_humidity']<13) & 
                                         (hi_set['t2m']>80) & 
                                         (hi_set['t2m']<112)),
                                    hi_set['heat_index_RH<13'],
                                    hi_set['heat_index'])
    
    # Heat index for relative humidity over 85% and temps between 80 and 87 F
    heat_index_85 = heat_index_80 + ((RH_p-85)/10) * ((87-T_F)/5)
    hi_set['heat_index_RH>85'] = heat_index_85
    hi_set['heat_index'] = xr.where(((hi_set['relative_humidity']>85) & 
                                         (hi_set['t2m']>80) & 
                                         (hi_set['t2m']<87)),
                                    hi_set['heat_index_RH>85'],
                                    hi_set['heat_index'])
    
    # Picking out the heat index dataarray alone
    hi_alone = hi_set['heat_index']

    return hi_alone

In [4]:
def Load_Data(file_path):
    ddict_list = glob.glob(file_path)
    ddict_list = sorted(ddict_list)
    ddict = []
    for ds in ddict_list:
        ds = xr.open_dataset(ds)
        ddict.append(ds)
    return ddict

# dewpoint
ds_dp = Load_Data("/data/cristi/a/kchoo3/ERA5/reanalysis/analysis-ready/ERA5_MIDWEST_2m_dewpoint_temperature_*.nc")
full_ds_dp = xr.concat(ds_dp, dim='time')

# temperature
ds_tmp = Load_Data("/data/cristi/a/kchoo3/ERA5/reanalysis/analysis-ready/ERA5_MIDWEST_2m_temperature_*.nc")
full_ds_tmp = xr.concat(ds_tmp, dim='time')

In [7]:
full_ds = xr.merge([full_ds_dp, full_ds_tmp])

In [8]:
full_ds = full_ds.rename({'2m_temperature': 't2m','2m_dewpoint_temperature': 'd2m'})

In [9]:
full_ds

In [33]:
heat_ds = full_ds

# Assigning variables
heat_ds = heat_ds.assign(t2m_c=heat_ds.t2m-273.15) 
heat_ds = heat_ds.assign(d2m_c=heat_ds.d2m-273.15) 

# rh = (heat_ds.d2m_c/heat_ds.t2m_c) * 100
rh = relative_humidity_from_dewpoint(heat_ds.t2m_c * units.degC,heat_ds.d2m_c * units.degC) * 100
rh = rh.metpy.dequantify()
heat_ds = heat_ds.assign(tmp_wb=Wetbulb(heat_ds.t2m_c,rh)) 

tmp_f = (heat_ds.t2m_c * 9/5) + 32
heat_ds = heat_ds.assign(heat_index=Heat_Index(heat_ds.t2m,rh / 100)) 

# Make Daily highs
max_tmp = heat_ds.t2m_c.groupby(heat_ds.time.dt.date).max()
max_wb  = heat_ds.tmp_wb.groupby(heat_ds.time.dt.date).max()
max_hi  = heat_ds.heat_index.groupby(heat_ds.time.dt.date).max()

  result_data = func(*input_data)


In [34]:
# adding attributes and descriptions
max_tmp.attrs['units'] = 'Kelvin'
max_tmp.attrs['description'] = 'Temperature in Kelvin'

max_wb.attrs['units'] = 'Celsius'
max_wb.attrs['description'] = 'Wet-bulb temperature in Celsius'

max_hi.attrs['units'] = 'Fahrenheit'
max_hi.attrs['description'] = 'NOAAs Heat index'


ds = xr.Dataset({
    't2m': max_tmp,
    'wb': max_wb,
    'hi': max_hi
}, attrs={
    'title': 'Combined Maximum Temperature Data',
    'summary': 'ERA5 Dataset containing maximum daily of temperature, wet-bulb temperature, and heat index from 1940-2024 in June-July-August.'}
)
# ds.to_netcdf('daily_max.nc')
ds['date'] = np.array(ds['date'].values, dtype='datetime64[ns]')

ds.to_netcdf('/data/cristi/a/kchoo3/ERA5/reanalysis/analysis-ready/daily_max/ERA5_MIDWEST_heat_metrics_1940-2024_2025-04-15_daily_max.nc')