In [1]:
import xarray as xr
import pathlib
import numpy as np
import salem
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
from scipy import stats
from scipy.stats import gaussian_kde
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error
import sys

plt.rcParams['font.size'] = 22

if 'win' in sys.platform:
    path = "E:/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/"
    aws_path = "E:/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/AWS_Obleitner/"
else:
    import geopandas as gpd
    path = "/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/"
    aws_path = "/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/AWS_Obleitner/"
extra = "CORDEX-DKRZ/"

def fix_netcdf_dates(ds):
    ds = ds.sel(time=slice("1999-01-01","2010-01-01T01:00"))

    ## Shift values by +1 and add new timestamp
    missing_time = ds.isel(time= [-1]) #sel using list to preserve time dimension
    if missing_time.time != np.datetime64("2010-01-01T00:00"):
        # replace time value with next day
        timestep = np.datetime64("2010-01-01T00:00")
        missing_time["time"] = ("time", np.reshape(timestep, (1)))
        fixed = xr.concat([ds, missing_time], dim="time", data_vars="minimal", coords="minimal")
        return fixed
    else:
        return ds


In [2]:
## Load every single field individually
prec = xr.open_dataset(path+extra+"cosmo_1999_2010_1h_RRR.nc")
snowfall = xr.open_dataset(path+extra+"cosmo_1999_2010_1h_SNOWFALL-CSPYfix.nc")
pressure = xr.open_dataset(path+extra+"cosmo_1999_2010_1h_PRESlinint.nc")

In [3]:
## First load AWS data
aws_lower = pd.read_csv(aws_path+"Fix_HEFlower_01102003_24102004.csv", parse_dates=True, index_col="time")
aws_upper = pd.read_csv(aws_path+"Fix_HEFupper_01102003_24102004.csv", parse_dates=True, index_col="time")
aws_upper['T'] = aws_upper['T']+273.15
aws_lower['T'] = aws_lower['T']+273.15

In [None]:
# Get proj and outlines
if 'win' in sys.platform:
    dsr = salem.open_metum_dataset("E:/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/for_Niklas/cosmo_2009_1h.nc")
    try:
        hef = gpd.read_file("E:/OneDrive/PhD/PhD/Data/Hintereisferner/Static/RGI6/HEF_RGI6.shp")
    except:
        hef = salem.read_shapefile("E:/OneDrive/PhD/PhD/Data/Hintereisferner/Static/RGI6/HEF_RGI6.shp")
else:
    dsr = salem.open_metum_dataset("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/for_Niklas/cosmo_2009_1h.nc")
    hef = gpd.read_file("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Static/RGI6/HEF_RGI6.shp")
reproj_hef = hef.to_crs(dsr.pyproj_srs)
bounds = reproj_hef.bounds
print(bounds)

centroid = reproj_hef.dissolve().centroid
print(centroid)

idx_lat = np.argmin(np.abs(prec.rlat.values  - centroid.y.values))
idx_lon = np.argmin(np.abs(prec.rlon.values  - centroid.x.values))
print(idx_lat, idx_lon)

In [None]:
## figure out how to get solid precipitation from AWSs
#sfc
snowcover = aws_lower[['sfc']].join(aws_upper[['sfc']], lsuffix='low', rsuffix='high')
snowcover['adj_sfclow'] = snowcover['sfclow'] - snowcover['sfclow'][0]
snowcover['adj_sfchigh'] = snowcover['sfchigh'] - snowcover['sfchigh'][0]
print(snowcover)
#get differences since its cumulative
snowcover['incr_low'] =  snowcover['adj_sfclow'].diff()
snowcover['incr_high'] =  snowcover['adj_sfchigh'].diff()
    
## Get only dates with positive increment in both
snowcover['incr_high'].loc[snowcover['incr_high'] <= 0] = np.nan
snowcover['incr_low'].loc[snowcover['incr_low'] <= 0] = np.nan

aws_lower['SF'] = snowcover['incr_low']
aws_upper['SF'] = snowcover['incr_high']
aws_lower

In [None]:
aws_comb = aws_lower[['T','RH','LWI','SF','SWI','P']].join(aws_upper[['T','RH','LWI','SF','SWI','P']], lsuffix='low', rsuffix='high')
elev_diff = 3048 - 2640 
print(elev_diff)

##!! Note that we do not apply any corrections so far !!##
aws_comb['lapse_t2m'] = (aws_comb['Thigh'] - aws_comb['Tlow']) / elev_diff
aws_comb['lapse_rh'] = (aws_comb['RHhigh'] - aws_comb['RHlow']) / elev_diff
aws_comb['lapse_lw'] = (aws_comb['LWIhigh'] - aws_comb['LWIlow']) / elev_diff
#difference with NaN results in NaNs
aws_comb['lapse_sf'] = (aws_comb['SFhigh'] - aws_comb['SFlow']) / elev_diff
aws_comb

In [None]:
cosmo_sf = snowfall.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))
box_size = 1
cosmo_sf = cosmo_sf.isel(rlat= slice(idx_lat-box_size,idx_lat+box_size+1), rlon=slice(idx_lon-box_size, idx_lon+box_size+1))
cosmo_sf
#3x3

In [None]:
## First quick check: compare snowfall snowfall - units both in [m]
fig, axes = plt.subplots(2,1, sharex=True, figsize=(16,9), dpi=300)
axes[0].plot(cosmo_sf['time'], cosmo_sf['SNOW'].mean(dim=['rlat','rlon']), label="COSMO-DKRZ Snowfall [m]")
axes[1].plot(snowcover['incr_low'], label="SFC increment at lower AWS")
axes[1].plot(snowcover['incr_high'], label="SFC increment at upper AWS")
for ax in axes.flatten():
    ax.legend()
    ax.set_ylim(0, 0.18)
    ax.set_ylabel("Snowfall [m]")


In [None]:
## Load REL HUM
relhum = xr.open_dataset(path+extra+"cosmo_1998-2010_1h_hurs.nc") #values in 0-100
relhumfix = fix_netcdf_dates(relhum)
print(relhumfix.time)

## Load tas
tas = xr.open_dataset(path+extra+"cosmo_1998-2010_1h_tas.nc")
tasfix = fix_netcdf_dates(tas)
print(tasfix.time)


In [None]:
#given at every half-hour - time_bnds from 00h to 01h -> values should be assigned to next hour
time_range = pd.date_range("1998-11-01T01:00:00", "2010-01-01", freq="1H")
clt = xr.open_dataset(path+extra+"cosmo_1998-2010_1h_clt.nc") #values in 0-100
clt['time'] = ('time', time_range)
cltfix = fix_netcdf_dates(clt)
print(cltfix)


In [None]:
#given at every half-hour - time_bnds from 00h to 01h -> values should be assigned to next hour
time_range = pd.date_range("1998-11-01T01:00:00", "2010-01-01", freq="1H")
## Load Inc SW and CloudCover
incsw = xr.open_dataset(path+extra+"cosmo_1998-2010_1h_rsds.nc")
incsw['time'] = ('time', time_range)
incswfix = fix_netcdf_dates(incsw)
print(incswfix)

In [None]:
incsw = xr.open_dataset(path+extra+"cosmo_1998-2010_1h_rsds.nc")
incsw['time'] = ('time', time_range)
incsw.sel(time=slice("1999-01-01",None))

In [None]:
# Load static
static = xr.open_dataset(path+extra+"cosmo_1998-2010_fx_orog.nc")
static

In [None]:
prec['SNOWFALL'] = snowfall['SNOW'] #[in units m]  
try:
    prec['PRES'] = pressure['ps'] / 100 #Pa to hPa
except:
    prec['PRES'] = pressure['ps_fixed'] / 100 #Pa to hPa
prec['RH2'] = relhumfix['hurs']
prec['T2'] = tasfix['tas']
prec['N'] = cltfix['clt']/100
prec['orog'] = static['orog']
prec['RAIN'] = prec['RRR'] - prec['SNOWFALL']/1000 #to kg/m2 == mm
prec['G'] = incswfix['rsds']


print("Assigned the data to a singular file")


In [17]:
#numpy polyfit to get linear regr slope
def lin_regr(x,y):
    #fit = np.polyfit(x,y,1)
    
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    
    #r_value ** 2 in case of simple linear regression is equal to coefficient of determination
    return xr.DataArray(slope)

def lin_regr_r2(x,y):
    
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

    return xr.DataArray(r_value**2)



## Load box size for lapse rates
box_size=1

def create_box_sized_ds(ds, box_size, centroid):
    dsr_hef = ds.isel(rlat= slice(idx_lat-box_size,idx_lat+box_size+1), rlon=slice(idx_lon-box_size, idx_lon+box_size+1))
    
    print(f"Using boxsize {box_size}, dataset consists of {len(dsr_hef.rlat) * len(dsr_hef.rlon)} grid cells.")
    
    #calculate lapse rates
    hsurf2 = dsr_hef.orog.values
    hsurf2_new = np.expand_dims(hsurf2, axis=0)
    hsurf2_new = np.repeat(hsurf2_new, len(dsr_hef.time.values), axis=0)
    dsr_hef['OROG2'] = (('time','rlat','rlon'), hsurf2_new)

    # get r2 as well and only apply lapse rate where clear relationship visible?

    #combine dim into one
    stack_ds = dsr_hef.stack(latlon=['rlat','rlon'])
    
    # calculate lapse rates over time
    lr_t2m = xr.apply_ufunc(lin_regr, stack_ds.OROG2, stack_ds.T2,dask='parallelized',
                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    lr_rh2 = xr.apply_ufunc(lin_regr, stack_ds.OROG2, stack_ds.RH2,dask='parallelized',
                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    #lr_lwin = xr.apply_ufunc(lin_regr, stack_ds.HSURF2, stack_ds.ATHB_S ,dask='parallelized',
    #                         vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    lr_clt = xr.apply_ufunc(lin_regr, stack_ds.OROG2, stack_ds.N ,dask='parallelized',
                             vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    lr_tp = xr.apply_ufunc(lin_regr, stack_ds.OROG2, stack_ds.RRR, dask='parallelized',
                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    #lr_rain = xr.apply_ufunc(lin_regr, stack_ds.OROG2, stack_ds.RAIN, dask='parallelized',
    #                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    lr_sf = xr.apply_ufunc(lin_regr, stack_ds.OROG2, stack_ds.SNOWFALL, dask='parallelized',
                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    
    # calculate r2 values over time
    
    lr_t2m_r2 = xr.apply_ufunc(lin_regr_r2, stack_ds.OROG2, stack_ds.T2,dask='parallelized',
                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    lr_rh2_r2 = xr.apply_ufunc(lin_regr_r2, stack_ds.OROG2, stack_ds.RH2,dask='parallelized',
                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    #lr_lwin = xr.apply_ufunc(lin_regr, stack_ds.HSURF2, stack_ds.ATHB_S ,dask='parallelized',
    #                         vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    lr_clt_r2 = xr.apply_ufunc(lin_regr, stack_ds.OROG2, stack_ds.N ,dask='parallelized',
                            vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    lr_tp_r2 = xr.apply_ufunc(lin_regr_r2, stack_ds.OROG2, stack_ds.RRR, dask='parallelized',
                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    #lr_rain_r2 = xr.apply_ufunc(lin_regr, stack_ds.OROG2, stack_ds.RAIN, dask='parallelized',
    #                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    lr_sf_r2 = xr.apply_ufunc(lin_regr_r2, stack_ds.OROG2, stack_ds.SNOWFALL, dask='parallelized',
                        vectorize=True, input_core_dims=[['latlon'], ['latlon']])
    
    # Create single file at centroid from which to distribute using lapse rates 
    x_cords, y_cords = centroid.iloc[0].xy
    x_cord = x_cords[0]
    y_cord = y_cords[0]
    print(x_cord, y_cord)

    ds_closest = dsr_hef.sel(rlat=y_cord, rlon=x_cord, method='nearest')
    # add lapse rates
    ds_closest['lr_t2m'] = lr_t2m
    ds_closest['lr_rh2'] = lr_rh2
    ds_closest['lr_clt'] = lr_clt
    #ds_closest['lr_rain'] = lr_rain
    ds_closest['lr_tp'] = lr_tp 
    ds_closest['lr_sf'] = lr_sf
    # add r2 scores
    ds_closest['lr_t2m_r2'] = lr_t2m_r2
    ds_closest['lr_rh2_r2'] = lr_rh2_r2
    ds_closest['lr_clt_r2'] = lr_clt_r2
    #ds_closest['lr_rain'] = lr_rain
    ds_closest['lr_tp_r2'] = lr_tp_r2 
    ds_closest['lr_sf_r2'] = lr_sf_r2

    return ds_closest


In [None]:
ds_box1 = create_box_sized_ds(prec, box_size=1, centroid=centroid)
ds_box2 = create_box_sized_ds(prec, box_size=2, centroid=centroid)


In [None]:
## Plot mean diurnal cycles to ensure which time format files are in
aws_comb['mean_t2m'] = aws_comb[['Tlow','Thigh']].mean(axis=1)
mean_diurnal_aws = aws_comb.groupby(aws_comb.index.hour).mean()

mean_diurnal_cosmo = ds_box1.groupby(ds_box1.time.dt.hour).mean()
mean_diurnal_aws.head(5)

In [None]:
aws_comb_wintertime = aws_comb.loc["2003-10-26":"2004-03-28"]
print(aws_comb_wintertime.head(5))
print("\n---------------------------")
aws_comb_wintertime['new_time'] = aws_comb_wintertime.reset_index()['time'].dt.tz_localize('UTC').dt.tz_convert('Europe/Berlin')
print(aws_comb_wintertime.new_time.head(5))
winter_diurnal_aws = aws_comb_wintertime.groupby(aws_comb_wintertime.index.hour).mean()
aws_comb_summertime = aws_comb.loc["2004-03-29":"2004-10-24"]
summer_diurnal_aws = aws_comb_summertime.groupby(aws_comb_summertime.index.hour).mean()

In [None]:
fig, ax = plt.subplots(2,1, sharex=True, figsize=(16,9), dpi=150)
#ax.plot(mean_diurnal_aws.index, mean_diurnal_aws.Thigh, label="AWSmean")
ax[0].plot(mean_diurnal_aws.index, mean_diurnal_aws.SWIhigh, label="AWS SWin")
#ax.plot(winter_diurnal_aws.shift(1).index, winter_diurnal_aws.shift(1).SWIhigh, label="AWS SWI Winter")
#ax.plot(summer_diurnal_aws.shift(1).index, summer_diurnal_aws.shift(1).SWIhigh, label="AWS SWI Summer")
ax[0].plot(mean_diurnal_cosmo.hour, mean_diurnal_cosmo.G, label="COSMO SWin")
#ax.plot(mean_diurnal_cosmo.shift(hour=-1).hour, mean_diurnal_cosmo.shift(hour=-1).G, label="COSMO-1h")
#ax.plot(mean_diurnal_cosmo.shift(hour=-2).hour, mean_diurnal_cosmo.shift(hour=-2).T2, label="COSMO")
ax[0].set_xticks(np.arange(0,23+1))
ax[0].set_ylabel("Inc. SW [Wm-2]")
#ax.grid()
#ax.legend()

ax[1].plot(mean_diurnal_aws.index, mean_diurnal_aws.Thigh-273.15, label="AWS T2M")
#ax.plot(winter_diurnal_aws.shift(1).index, winter_diurnal_aws.shift(1).SWIhigh, label="AWS SWI Winter")
#ax.plot(summer_diurnal_aws.shift(1).index, summer_diurnal_aws.shift(1).SWIhigh, label="AWS SWI Summer")
ax[1].plot(mean_diurnal_cosmo.hour, mean_diurnal_cosmo.T2-273.15, label="COSMO T2M")
ax[1].set_ylabel("2m air temp. [°C]")

for xs in ax:
    xs.grid()
    xs.legend()

In [None]:
#make files comparable
cosmo_hef_box1 = ds_box1.isel(time=slice(None, -2)) #.sel(time=slice("2003-10-01","2004-10-24T00:00:00")) # UTC to local time - but AWS data seems more in UTC time than COSMO does .. unclear about actual timestamp
cosmo_hef_box2 = ds_box2.isel(time=slice(None, -2)) #.sel(time=slice("2003-10-01","2004-10-24T00:00:00")) # UTC to local time
cosmo_hef_box1

In [None]:
## plot monthly lapse rates
monthly_lr_box1 = cosmo_hef_box1[['lr_t2m','lr_t2m_r2']].resample(time="1MS").mean()
monthly_lr_box2 = cosmo_hef_box2[['lr_t2m','lr_t2m_r2']].resample(time="1MS").mean()
monthly_aws = aws_comb.resample("1MS").mean()

print(aws_comb['lapse_t2m'].mean())
print(monthly_aws['lapse_t2m'].mean())


In [None]:
## First plot: compare monthly lapse rates
fig, ax = plt.subplots(3,1, figsize=(16,9), dpi=300)
ax[0].plot(monthly_aws.index, monthly_aws['lapse_t2m'], color="red", marker='.', alpha=0.5, label="AWS")
ax[0].plot(monthly_lr_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00")).time, monthly_lr_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'],
           marker='.', color="blue",  alpha=0.5, label="COSMO 9-cell")
ax[0].plot(monthly_lr_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00")).time, monthly_lr_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'] - monthly_aws['lapse_t2m'],
           marker='.', color="orange", label="COSMO - AWS", zorder=-1)
ax[0].axhline(y=np.nanmean(monthly_lr_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'] - monthly_aws['lapse_t2m']), color="magenta", linestyle="dashed",label="Mean Bias °Cm$^{-1}$")
ax[0].axhline(y=-0.0065, color="black", linestyle="dashed",label="-0.0065 °Cm$^{-1}$")
ax[0].legend()
ax[0].set_title("T2M Lapse Rates for: " + "9-cell-box" + " in °C/m")
ax[0].grid()

r2 = r2_score(monthly_aws['lapse_t2m'], monthly_lr_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'])
mae = mean_absolute_error(monthly_aws['lapse_t2m'], monthly_lr_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'])
rmse = root_mean_squared_error(monthly_aws['lapse_t2m'],monthly_lr_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'])
ax[0].annotate("R²: {},\nMAE: {}°C/km,\nRMSE: {}°C/km".format(round(r2,2),round(mae*1000,4),round(rmse*1000,4)),
             xy=(0.73,0.72), xycoords='axes fraction', fontsize=13)

ax[1].plot(monthly_aws.index, monthly_aws['lapse_t2m'], color="red",marker='.', alpha=0.5, label="AWS")
ax[1].plot(monthly_lr_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00")).time, monthly_lr_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'],
           marker='.',color="green", alpha=0.5, label="COSMO 25-cell")
ax[1].plot(monthly_lr_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00")).time, monthly_lr_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'] - monthly_aws['lapse_t2m'],
           marker='.',color="orange", label="COSMO - AWS", zorder=-1)
ax[1].axhline(y=np.nanmean(monthly_lr_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'] - monthly_aws['lapse_t2m']), color="magenta", linestyle="dashed",label="Mean Bias °Cm$^{-1}$")
ax[1].axhline(y=-0.0065, color="black", linestyle="dashed",label="-0.0065 °Cm$^{-1}$")
ax[1].legend()
ax[1].set_title("T2M Lapse Rates for: " + "25-cell-box" + " in °C/m")
ax[1].grid()

r2 = r2_score(monthly_aws['lapse_t2m'], monthly_lr_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'])
mae = mean_absolute_error(monthly_aws['lapse_t2m'], monthly_lr_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'])
rmse = root_mean_squared_error(monthly_aws['lapse_t2m'],monthly_lr_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))['lr_t2m'])
ax[1].annotate("R²: {},\nMAE: {}°C/m,\nRMSE: {}°C/km".format(round(r2,2),round(mae*1000,4),round(rmse*1000,4)),
             xy=(0.73,0.72), xycoords='axes fraction', fontsize=13)

ax[2].plot(monthly_lr_box1.time, monthly_lr_box1['lr_t2m'],marker='.', color="blue",label="9-cell-box")
ax[2].plot(monthly_lr_box2.time, monthly_lr_box2['lr_t2m'],marker='.', color="green",label="25-cell-box")
ax[2].axhline(y=-0.0065, color="black", linestyle="dashed",label="-0.0065 °Cm$^{-1}$")
ax[2].legend()
ax[2].set_title("T2M Lapse Rates for: " + "9-cell-box and 25-cell-box" + " in °C/m")
ax[2].grid()
ax[2].set_xticks(pd.date_range("1999-01-01","2010-01-01T00:00:00", freq="6MS"))
for label in ax[2].get_xticklabels():
    label.set_rotation(45)

x_axis_ticks = pd.date_range("2003-10-01","2004-10-24T00:00:00", freq="2M")
for axes in [ax[0], ax[1]]:
    axes.set_xticks(x_axis_ticks)

In [26]:
hef_box1_crop = cosmo_hef_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))
hef_box2_crop = cosmo_hef_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))

dict_labels_9cell = {
    "lapse_t2m": "T2M Lapse Rates for: " + "9-cell-box" + " in °C m$^{-1}$",
    "lapse_sf": "SF Lapse Rates for: " + "9-cell-box" + " in m m$^{-1}$",
    "lapse_rh": "RH2 Lapse Rates for: " + "9-cell-box" + " in % m$^{-1}$"
    }

dict_labels_25cell = {
    "lapse_t2m": "T2M Lapse Rates for: " + "25-cell-box" + " in °C m$^{-1}$",
    "lapse_sf": "SF Lapse Rates for: " + "25-cell-box" + " in m m$^{-1}$",
    "lapse_rh": "RH2 Lapse Rates for: " + "25-cell-box" + " in % m$^{-1}$"
    }

dict_labels_both = {
    "lapse_t2m": "T2M Lapse Rates for: " + "9-cell-box and 25-cell-box" + " in °C m$^{-1}$",
    "lapse_sf": "SF Lapse Rates for: " + "9-cell-box and 25-cell-box" + " in m m$^{-1}$",
    "lapse_rh": "RH2 Lapse Rates for: " + "9-cell-box and 25-cell-box" + " in % m$^{-1}$"
    }

dict_units = {
    "lapse_t2m": "°C m$^{-1}$",
    "lapse_sf": "m m$^{-1}$",
    "lapse_rh": "% m$^{-1}$"
    }

dict_units_km = {
    "lapse_t2m": "K km⁻¹",
    "lapse_sf": "m km⁻¹",
    "lapse_rh": "% km⁻¹"
}

In [27]:
# --- The Final Plotting Function ---
def plot_lapse_rate_comparison(aws_comb, hef_box1_crop, hef_box2_crop, aws_var, cosmo_var,
                             variable_name="T2M Lapse Rate", r2_filter=None, save_path=None):
    """
    Generates a comprehensive 6-panel plot to compare COSMO model lapse rates
    and optionally saves the figure to a file.
    """
    scaling_factor = 1000.0
    cosmo_time = hef_box1_crop.time
    
    aws_data_aligned = aws_comb[aws_var].reindex(cosmo_time, method='nearest') * scaling_factor

    if r2_filter is None:
        box1_data = hef_box1_crop[cosmo_var] * scaling_factor
        box2_data = hef_box2_crop[cosmo_var] * scaling_factor
    else:
        print(f"Applying R² filter >= {r2_filter}")
        box1_data = hef_box1_crop[cosmo_var].where(hef_box1_crop[cosmo_var + "_r2"] >= r2_filter) * scaling_factor
        box2_data = hef_box2_crop[cosmo_var].where(hef_box2_crop[cosmo_var + "_r2"] >= r2_filter) * scaling_factor

    fig, axes = plt.subplots(
        2, 3,
        figsize=(24, 13),
        dpi=300,
        gridspec_kw={'width_ratios': [3, 1.5, 1.5]}
    )
    
    plot_data = [
        {'cosmo': box1_data, 'label': 'COSMO 9-cell', 'ax_row': axes[0], 'color': 'blue'},
        {'cosmo': box2_data, 'label': 'COSMO 25-cell', 'ax_row': axes[1], 'color': 'green'}
    ]

    for item in plot_data:
        cosmo_d = item['cosmo']
        label = item['label']
        ax_row = item['ax_row']

        valid_mask = (~np.isnan(cosmo_d) & ~np.isnan(aws_data_aligned)).to_numpy()
        aws_clean = aws_data_aligned[valid_mask]
        cosmo_clean = cosmo_d.to_numpy()[valid_mask]
        
        unit_str = dict_units_km.get(aws_var, "")

        ax = ax_row[0]
        ax.plot(cosmo_time, aws_data_aligned, color='black', alpha=0.8, label='AWS', linewidth=2.0)
        ax.plot(cosmo_time, cosmo_d, color=item['color'], alpha=0.7, label=label)
        
        if aws_var == "lapse_t2m":
            ax.axhline(-6.5, color='orange', linestyle='--', label=f'Environm. Lapse Rate (-6.5 {unit_str})')
        
        ax.set_ylabel(f'{variable_name} ({unit_str})')
        ax.grid(True, linestyle='--', alpha=0.6)
        ax.legend(fontsize=18)
        
        locator = mdates.AutoDateLocator(minticks=5, maxticks=10)
        formatter = mdates.ConciseDateFormatter(locator)
        ax.xaxis.set_major_locator(locator)
        ax.xaxis.set_major_formatter(formatter)
        
        ax = ax_row[1]
        
        xmax = max(aws_clean.max(), cosmo_clean.max())
        
        # Plot COSMO CDF
        sorted_cosmo = np.sort(cosmo_clean)
        y_cosmo = np.linspace(0, 1, len(cosmo_clean))
        ax.plot(np.append(sorted_cosmo, xmax), np.append(y_cosmo, 1.0), color=item['color'], label=label, lw=2)

        # Plot AWS CDF
        sorted_aws = np.sort(aws_clean)
        y_aws = np.linspace(0, 1, len(aws_clean))
        ax.plot(np.append(sorted_aws, xmax), np.append(y_aws, 1.0), color='black', label='AWS', lw=2)
        
        ax.set_xlabel(f'Lapse Rate ({unit_str})')
        ax.set_ylabel('Probability (%)')
        ax.grid(True, linestyle='--', alpha=0.6)

        ax = ax_row[2]
        xy = np.vstack([aws_clean, cosmo_clean])
        z = gaussian_kde(xy)(xy)
        
        idx = z.argsort()
        aws_clean, cosmo_clean, z = aws_clean[idx], cosmo_clean[idx], z[idx]
        
        ax.scatter(aws_clean, cosmo_clean, c=z, s=20, alpha=0.6, cmap='viridis')
        
        lim_min = min(aws_clean.min(), cosmo_clean.min())
        lim_max = max(aws_clean.max(), cosmo_clean.max())
        lims = [lim_min, lim_max]
        
        ax.plot(lims, lims, 'k--', alpha=0.75, zorder=0)
        ax.set_xlabel(f'AWS ({unit_str})')
        ax.set_ylabel(f'{label} ({unit_str})')
        ax.grid(True, linestyle='--', alpha=0.6)
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(lims)
        ax.set_ylim(lims)

        mean_bias = np.mean(cosmo_clean - aws_clean)
        rmse = np.sqrt(mean_squared_error(aws_clean, cosmo_clean))
        stats_text = f"Mean Bias: {mean_bias:.3f} {unit_str}\nRMSE: {rmse:.3f} {unit_str}"
        ax.text(0.05, 0.95, stats_text, transform=ax.transAxes, fontsize=18,
                verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

    subplot_labels = ['a)', 'b)', 'c)', 'd)', 'e)', 'f)']
    for i, ax in enumerate(axes.flatten()):
        ax.text(-0.11, 1.0, subplot_labels[i], transform=ax.transAxes, 
                fontsize=22, va='bottom', ha='left')

    fig.tight_layout(pad=1.5)

    if save_path:
        plt.savefig(save_path, bbox_inches="tight")
        print(f"Plot successfully saved to: {save_path}")

In [None]:
# --- Call the new plotting function ---
print("--- Plotting without R² filter ---")
plot_lapse_rate_comparison(
    aws_comb=aws_comb, 
    hef_box1_crop=hef_box1_crop, 
    hef_box2_crop=hef_box2_crop, 
    aws_var="lapse_t2m", 
    cosmo_var="lr_t2m",
    save_path=None
)



In [None]:
# --- Call the new plotting function ---
plot_lapse_rate_comparison(
    aws_comb=aws_comb, 
    hef_box1_crop=hef_box1_crop, 
    hef_box2_crop=hef_box2_crop, 
    aws_var="lapse_t2m", 
    cosmo_var="lr_t2m",
    r2_filter=0.7,
    save_path="/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/lapse_rates_t2m.pdf"
)

In [None]:
plot_lapse_rate_comparison(
    aws_comb=aws_comb, 
    hef_box1_crop=hef_box1_crop, 
    hef_box2_crop=hef_box2_crop, 
    aws_var="lapse_sf", 
    cosmo_var="lr_sf",
    variable_name="SF Lapse Rate"
    #r2_filter=0.7
)

In [None]:
plot_lapse_rate_comparison(
    aws_comb=aws_comb, 
    hef_box1_crop=hef_box1_crop, 
    hef_box2_crop=hef_box2_crop, 
    aws_var="lapse_sf", 
    cosmo_var="lr_sf",
    variable_name="SF Lapse Rate",
    r2_filter=0.7,
    save_path="/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/lapse_rates_sf.pdf"
)

In [None]:
plot_lapse_rate_comparison(
    aws_comb=aws_comb, 
    hef_box1_crop=hef_box1_crop, 
    hef_box2_crop=hef_box2_crop, 
    aws_var="lapse_rh", 
    cosmo_var="lr_rh2",
    variable_name="RH2 Lapse Rate"
    #r2_filter=0.7
)

In [None]:
plot_lapse_rate_comparison(
    aws_comb=aws_comb, 
    hef_box1_crop=hef_box1_crop, 
    hef_box2_crop=hef_box2_crop, 
    aws_var="lapse_rh", 
    cosmo_var="lr_rh2",
    variable_name="RH2 Lapse Rate",
    r2_filter=0.7,
    save_path="/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/lapse_rates_rh2.pdf"
)

In [41]:
cosmo_hef_box1 = cosmo_hef_box1.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))
cosmo_hef_box2 = cosmo_hef_box2.sel(time=slice("2003-10-01","2004-10-24T00:00:00"))

In [None]:
## Create Daytime, nighttime
joint_df_box1 = cosmo_hef_box1.to_dataframe().join(aws_comb)
joint_df_box1['hour'] = joint_df_box1.index.hour
joint_df_box2 = cosmo_hef_box2.to_dataframe().join(aws_comb)
joint_df_box2['hour'] = joint_df_box2.index.hour
joint_df_box1.columns

In [43]:
## Let's proceed with box1 because so far there has been no clear merit to using a larger grid box?

In [44]:
## daytime 9 to 18 local time, nighttime 21 to 3am
daytime_box1 = joint_df_box1.loc[joint_df_box1.hour.isin(np.arange(9,18+1,1))]
nighttime_box1 = joint_df_box1.loc[joint_df_box1.hour.isin([21,22,23,0,1,2,3])]
#
daytime_box2 = joint_df_box2.loc[joint_df_box2.hour.isin(np.arange(9,18+1,1))]
nighttime_box2 = joint_df_box2.loc[joint_df_box2.hour.isin([21,22,23,0,1,2,3])]

In [None]:
## Add Histogram plot
bins = np.linspace(-0.015, 0.015, 100)

fig, ax = plt.subplots(3,1, figsize=(16,9), dpi=300)
ax[0].hist(joint_df_box1['lapse_t2m'], bins, alpha=0.5, label='AWS', color="red")
ax[0].hist(joint_df_box1["lr_t2m"], bins, alpha=0.5, label='COSMO', color="blue")
ax[0].set_title("Full timeseries LR (°C/m)")
ax[0].axvline(np.nanmean(joint_df_box1["lapse_t2m"]), color="darkred", linestyle="dashed")
ax[0].axvline(np.nanmean(joint_df_box1["lr_t2m"]), color="deepskyblue", linestyle="dashed")
ax[0].axvline(-0.0065, color="black", linestyle="dashed")

ax[1].hist(daytime_box1["lapse_t2m"], bins, alpha=0.5, label='AWS', color="red")
ax[1].hist(daytime_box1["lr_t2m"], bins, alpha=0.5, label='COSMO', color="blue")
ax[1].set_title("Daytime LR (°C/m)")
ax[1].axvline(-0.0065, color="black", linestyle="dashed")
ax[1].axvline(np.nanmean(daytime_box1["lapse_t2m"]), color="darkred", linestyle="dashed")
ax[1].axvline(np.nanmean(daytime_box1["lr_t2m"]), color="deepskyblue", linestyle="dashed")

ax[2].hist(nighttime_box1["lapse_t2m"], bins, alpha=0.5, label='AWS', color="red")
ax[2].hist(nighttime_box1["lr_t2m"], bins, alpha=0.5, label='COSMO 9-cell', color="blue")
ax[2].set_title("Nighttime LR (°C/m)")
ax[2].axvline(-0.0065, color="black", linestyle="dashed", label="-0.0065 °Cm$^{-1}$")
ax[2].axvline(np.nanmean(nighttime_box1["lapse_t2m"]), color="darkred", linestyle="dashed", label="AWS Mean")
ax[2].axvline(np.nanmean(nighttime_box1["lr_t2m"]), color="deepskyblue", linestyle="dashed", label="COSMO Mean")

ax[2].legend(loc='upper right')

In [None]:
## Add Histogram plot for box 2
fig, ax = plt.subplots(3,1, figsize=(16,9), dpi=300)
ax[0].hist(joint_df_box2['lapse_t2m'], bins, alpha=0.5, label='AWS', color="red")
ax[0].hist(joint_df_box2["lr_t2m"], bins, alpha=0.5, label='COSMO', color="green")
ax[0].set_title("Full timeseries LR (°C/m)")
ax[0].axvline(np.nanmean(joint_df_box2["lapse_t2m"]), color="darkred", linestyle="dashed")
ax[0].axvline(np.nanmean(joint_df_box2["lr_t2m"]), color="springgreen", linestyle="dashed")
ax[0].axvline(-0.0065, color="black", linestyle="dashed")

ax[1].hist(daytime_box2["lapse_t2m"], bins, alpha=0.5, label='AWS', color="red")
ax[1].hist(daytime_box2["lr_t2m"], bins, alpha=0.5, label='COSMO', color="green")
ax[1].set_title("Daytime LR (°C/m)")
ax[1].axvline(-0.0065, color="black", linestyle="dashed")
ax[1].axvline(np.nanmean(daytime_box2["lapse_t2m"]), color="darkred", linestyle="dashed")
ax[1].axvline(np.nanmean(daytime_box2["lr_t2m"]), color="springgreen", linestyle="dashed")

ax[2].hist(nighttime_box2["lapse_t2m"], bins, alpha=0.5, label='AWS', color="red")
ax[2].hist(nighttime_box2["lr_t2m"], bins, alpha=0.5, label='COSMO 25-cell', color="green")
ax[2].set_title("Nighttime LR (°C/m)")
ax[2].axvline(-0.0065, color="black", linestyle="dashed", label="-0.0065 °Cm$^{-1}$")
ax[2].axvline(np.nanmean(nighttime_box2["lapse_t2m"]), color="darkred", linestyle="dashed", label="AWS Mean")
ax[2].axvline(np.nanmean(nighttime_box2["lr_t2m"]), color="springgreen", linestyle="dashed", label="COSMO Mean")

ax[2].legend(loc='upper right')

In [None]:
t2m_box1 = joint_df_box1[['lapse_t2m','lr_t2m','lr_t2m_r2','hour']]
t2m_box1 = t2m_box1.loc[t2m_box1['lr_t2m_r2'] > 0.7]

## daytime 9 to 18 local time, nighttime 21 to 3am
daytime_t2m_box1 = t2m_box1.loc[joint_df_box1.hour.isin(np.arange(9,18+1,1))]
nighttime_t2m_box1 = t2m_box1.loc[joint_df_box1.hour.isin([21,22,23,0,1,2,3])]

t2m_box1

In [None]:
## Repeat histogram where r2 > 0.8
fig, ax = plt.subplots(3,1, figsize=(16,9), dpi=300)
ax[0].hist(t2m_box1['lapse_t2m'], bins, alpha=0.5, label='AWS', color="red")
ax[0].hist(t2m_box1["lr_t2m"], bins, alpha=0.5, label='COSMO', color="blue")
ax[0].set_title("Full timeseries LR (°C/m), conditioned on R² > 0.7")
ax[0].axvline(np.nanmean(t2m_box1["lapse_t2m"]), color="darkred", linestyle="dashed")
ax[0].axvline(np.nanmean(t2m_box1["lr_t2m"]), color="deepskyblue", linestyle="dashed")
ax[0].axvline(-0.0065, color="black", linestyle="dashed")

ax[1].hist(daytime_t2m_box1["lapse_t2m"], bins, alpha=0.5, label='AWS', color="red")
ax[1].hist(daytime_t2m_box1["lr_t2m"], bins, alpha=0.5, label='COSMO', color="blue")
ax[1].set_title("Daytime LR (°C/m)")
ax[1].axvline(-0.0065, color="black", linestyle="dashed")
ax[1].axvline(np.nanmean(daytime_t2m_box1["lapse_t2m"]), color="darkred", linestyle="dashed")
ax[1].axvline(np.nanmean(daytime_t2m_box1["lr_t2m"]), color="deepskyblue", linestyle="dashed")

ax[2].hist(nighttime_t2m_box1["lapse_t2m"], bins, alpha=0.5, label='AWS', color="red")
ax[2].hist(nighttime_t2m_box1["lr_t2m"], bins, alpha=0.5, label='COSMO 9-cell', color="blue")
ax[2].set_title("Nighttime LR (°C/m)")
ax[2].axvline(-0.0065, color="black", linestyle="dashed", label="-0.0065 °Cm$^{-1}$")
ax[2].axvline(np.nanmean(nighttime_t2m_box1["lapse_t2m"]), color="darkred", linestyle="dashed", label="AWS Mean")
ax[2].axvline(np.nanmean(nighttime_t2m_box1["lr_t2m"]), color="deepskyblue", linestyle="dashed", label="COSMO Mean")

ax[2].legend(loc='upper right')

In [None]:
## Repeat plots for snowfall
## Add Histogram plot - not really helpful
bins = np.linspace(-0.00001, 0.00001, 100)

fig, ax = plt.subplots(3,1, figsize=(16,9), dpi=300)
ax[0].hist(joint_df_box1['lapse_sf'], bins, alpha=0.5, label='AWS', color="red")
ax[0].hist(joint_df_box1["lr_sf"], bins, alpha=0.5, label='COSMO', color="blue")
ax[0].set_title("Full timeseries LR (m/m)")
ax[0].axvline(np.nanmean(joint_df_box1["lapse_sf"]), color="darkred", linestyle="dashed")
ax[0].axvline(np.nanmean(joint_df_box1["lr_sf"]), color="deepskyblue", linestyle="dashed")

ax[1].hist(daytime_box1["lapse_sf"], bins, alpha=0.5, label='AWS', color="red")
ax[1].hist(daytime_box1["lr_sf"], bins, alpha=0.5, label='COSMO', color="blue")
ax[1].set_title("Daytime LR (m/m)")
ax[1].axvline(np.nanmean(daytime_box1["lapse_sf"]), color="darkred", linestyle="dashed")
ax[1].axvline(np.nanmean(daytime_box1["lr_sf"]), color="deepskyblue", linestyle="dashed")

ax[2].hist(nighttime_box1["lapse_sf"], bins, alpha=0.5, label='AWS', color="red")
ax[2].hist(nighttime_box1["lr_sf"], bins, alpha=0.5, label='COSMO 9-cell', color="blue")
ax[2].set_title("Nighttime LR (m/m)")
ax[2].axvline(np.nanmean(nighttime_box1["lapse_sf"]), color="darkred", linestyle="dashed", label="AWS Mean")
ax[2].axvline(np.nanmean(nighttime_box1["lr_sf"]), color="deepskyblue", linestyle="dashed", label="COSMO Mean")

ax[2].legend(loc='upper right')

In [None]:
## Repeat plots for relative humidity
## Add Histogram plot
bins = np.linspace(-0.05, 0.05, 100)

fig, ax = plt.subplots(3,1, figsize=(16,9), dpi=300)
ax[0].hist(joint_df_box1['lapse_rh'], bins, alpha=0.5, label='AWS', color="red")
ax[0].hist(joint_df_box1["lr_rh2"], bins, alpha=0.5, label='COSMO', color="blue")
ax[0].set_title("Full timeseries LR (%/m)")
ax[0].axvline(np.nanmean(joint_df_box1["lapse_rh"]), color="darkred", linestyle="dashed")
ax[0].axvline(np.nanmean(joint_df_box1["lr_rh2"]), color="deepskyblue", linestyle="dashed")

ax[1].hist(daytime_box1["lapse_rh"], bins, alpha=0.5, label='AWS', color="red")
ax[1].hist(daytime_box1["lr_rh2"], bins, alpha=0.5, label='COSMO', color="blue")
ax[1].set_title("Daytime LR (%/m)")
ax[1].axvline(np.nanmean(daytime_box1["lapse_rh"]), color="darkred", linestyle="dashed")
ax[1].axvline(np.nanmean(daytime_box1["lr_rh2"]), color="deepskyblue", linestyle="dashed")

ax[2].hist(nighttime_box1["lapse_rh"], bins, alpha=0.5, label='AWS', color="red")
ax[2].hist(nighttime_box1["lr_rh2"], bins, alpha=0.5, label='COSMO 9-cell', color="blue")
ax[2].set_title("Nighttime LR (%/m)")
ax[2].axvline(np.nanmean(nighttime_box1["lapse_rh"]), color="darkred", linestyle="dashed", label="AWS Mean")
ax[2].axvline(np.nanmean(nighttime_box1["lr_rh2"]), color="deepskyblue", linestyle="dashed", label="COSMO Mean")

ax[2].legend(loc='upper right')