In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import salem
import geopandas as gpd
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [2]:
if 'win' in sys.platform:
    path = "E:/OneDrive - uibk.ac.at/PhD/PhD/Data/Hintereisferner/Climate/CORDEX-DKRZ/"
else:
    path = "/mnt/C4AEBBABAEBB9500/OneDrive - uibk.ac.at/PhD/PhD/Data/Hintereisferner/Climate/CORDEX-DKRZ/"

In [None]:
## First load snowfall data and fix timestamp
ds = xr.open_dataset(path+"cosmo_1998-2010_1d_prsn.nc")
print(ds.time.values[0], ds.time.values[-1])
print(ds.time_bnds.values[0])
#temporary store old values
oldvals = ds['prsn'].values
#clean-up timestamps, given at noon of each day, bounds from 1999-01-01 to 1999-01-02 -> values should be assigned to 01-02 
resampled = ds[['prsn']].resample(time="1D").sum()
print(resampled) 
## Ensure they are equal
print(np.testing.assert_allclose(oldvals, resampled['prsn'], atol=1e-07))
del oldvals

In [None]:
## Prepare pr data
pr = xr.open_dataset(path+"cosmo_1998-2010_1h_pr.nc")
print(pr.time.values[0], pr.time.values[-1])
print(pr.time_bnds.values[0])

#given at every half-hour - time_bnds from 00h to 01h -> values should be assigned to 01h 
time_range = pd.date_range("1998-11-01T01:00:00", "2010-01-01", freq="1H")

pr['time'] = ('time', time_range)
## Convert units
pr['pr'] = pr['pr'] * 3600 #kg/m2s to kg/m2h


In [None]:
### Do a check on magnitude! Noticed one outlier! 

# Get proj and outlines, dsr is single downloaded file as a placeholder
if 'win' in sys.platform:
    dsr = salem.open_metum_dataset("E:/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/for_Niklas/cosmo_2009_1h.nc")
    hef = gpd.read_file("E:/OneDrive/PhD/PhD/Data/Hintereisferner/Static/RGI6/HEF_RGI6.shp")
else:
    dsr = salem.open_metum_dataset("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/for_Niklas/cosmo_2009_1h.nc")
    hef = gpd.read_file("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Static/RGI6/HEF_RGI6.shp")
reproj_hef = hef.to_crs(dsr.pyproj_srs)
bounds = reproj_hef.bounds
print(bounds)

centroid = reproj_hef.dissolve().centroid
print(centroid)

idx_lat = np.argmin(np.abs(pr.rlat.values  - centroid.y.values))
idx_lon = np.argmin(np.abs(pr.rlon.values  - centroid.x.values))
print(idx_lat, idx_lon)

# Create single file at centroid from which to distribute using lapse rates 
x_cords, y_cords = centroid.iloc[0].xy
x_cord = x_cords[0]
y_cord = y_cords[0]
print(x_cord, y_cord)

ds_closest = pr.sel(rlat=y_cord, rlon=x_cord, method='nearest')

fig, ax = plt.subplots(1,1, figsize=(16,9), dpi=150)
ax.plot(ds_closest.time, ds_closest['pr'])
ax.set_ylabel("Total Precipitation rate [kg/m2 h]")

In [None]:
## Get idx of outlier event
time_max = ds_closest.pr.argmax()
print(ds_closest.pr.where(ds_closest.pr > 100, drop=True))
## Find timestep and replace it with neighbouring values
print(np.nanmax(pr.pr.isel(time=time_max.values).values))
time_max



In [None]:
mean_surr = (pr.pr.isel(time=time_max.values-1) + pr.pr.isel(time=time_max.values+1)) / 2
## Fix outlier by hand
## replace this timestep with mean of previous and next timestep
pr["pr"] = xr.where(
    (pr.time == np.datetime64("2005-03-31T23:00:00")), mean_surr, pr["pr"]
)

#repeat plot
del ds_closest
ds_closest = pr.sel(rlat=y_cord, rlon=x_cord, method='nearest')

fig, ax = plt.subplots(1,1, figsize=(16,9), dpi=150)
ax.plot(ds_closest.time, ds_closest['pr'])
ax.set_ylabel("Fixed Total Precipitation rate [kg/m2 h]")


In [None]:
## Store precipitation fields to file! 

pr.to_netcdf(path+"cosmo_1998-2010_1hcleaned_pr.nc")

pr_daily = pr[['pr']].resample(time="1D").sum()
pr_daily.to_netcdf(path+"cosmo_1998-2010_1d_pr.nc")
pr_filled = pr_daily.resample(time="1H").pad()
pr_filled.to_netcdf(path+"cosmo_1998-2010_1hffilled_pr.nc")

#Beware last time step 2010-01-01 will consist of just one value! 

In [None]:
## Current snowfall values need to be shifted by +1 but previous tests showed that this does not work (see 00-05_CORDEX-DKRZ)
## That's why we ignore this in the following steps. We copy the last timestep just in case.

## Shift values by +1 and add new timestamp
missing_time = resampled.isel(time= [-1]) #sel using list to preserve time dimension
print(missing_time)
#replace time value with next day
timestep = np.datetime64("2010-01-01")
missing_time["time"] = ("time", np.reshape(timestep, (1)))
print(missing_time)
fixed = xr.concat([resampled, missing_time], dim="time", data_vars="minimal", coords="minimal")
print(fixed)
print(fixed['prsn'][-3,:,:].values)
"""
#shift fixed values by 1
fixed = fixed.shift(time=1)
print(fixed)
print(fixed['prsn'][-2,:,:].values)
"""

In [10]:
## convert kg/m2s to kg/m2 d
fixed['prsn'] = fixed['prsn'] * 3600 * 24 #only run once!

# repeat fix for timestep in total precipitation for snowfall to get rid of artefacts
mean_prsn = (fixed['prsn'].sel(time="2005-03-30") + fixed['prsn'].sel(time="2005-04-01")) / 2
## Fix outlier by hand
## replace this timestep with mean of previous and next timestep
fixed["prsn"] = xr.where(
    (fixed.time == np.datetime64("2005-03-31")), mean_prsn, fixed["prsn"]
)

In [None]:
fixed.to_netcdf(path+"cosmo_1998-2010_1dcleaned_prsn.nc")

#
filled = fixed.resample(time="1h").pad()
print(filled)
filled.to_netcdf(path+"cosmo_1998-2010_1hffilled_prsn.nc")

## Check timestep where this super large value occurred
print(filled.prsn.isel(time=time_max.values).mean(dim=["rlat","rlon"]).values)
print(pr_filled.pr.isel(time=time_max.values).mean(dim=["rlat","rlon"]).values)

In [None]:
print(filled.prsn.isel(time=time_max.values).mean(dim=["rlat","rlon"]).values)
print(pr_filled.pr.isel(time=time_max.values).mean(dim=["rlat","rlon"]).values)

In [None]:
print(filled)

In [None]:
#pr_1h = xr.open_dataset(path+"cosmo_1998-2010_1hcleaned_pr.nc") #units in kg/m2/h 
#pr
#pr_daily = xr.open_dataset(path+"cosmo_1998-2010_1d_pr.nc") #units in kg/m2/d
#pr_daily
#pr_filled = xr.open_dataset(path+"cosmo_1998-2010_1hffilled_pr.nc") #units in kg/m2/d
#pr_filled

## Crop data to same timeframe starting from 1999-01-01 ##
pr = pr.sel(time=slice("1999-01-01",None))
print(len(pr.time))
pr_daily = pr_daily.sel(time=slice("1999-01-01",None))
print(len(pr_daily.time))
pr_filled = pr_filled.sel(time=slice("1999-01-01",None))
print(len(pr_filled.time))
#Date chosen also because for snowfall we only have data from 1999-01-02 onwards

pr_filled

In [None]:
## Test if the thingys worked
print(np.testing.assert_allclose(pr_daily.pr[0,1,1].values, pr_filled['pr'][0:24,1,1].mean(), atol=1e-07))
print(np.testing.assert_allclose(pr_daily.pr[0,1,1].values, pr.pr[0:24,1,1].sum(), atol=1e-07))

In [16]:
## downscale hourly snowfall field
## implement temperature-based thresholding
if 'win' in sys.platform:
    extrapath = "E:/OneDrive - uibk.ac.at/PhD/PhD/Data/Hintereisferner/Climate/Jennings_etal_2018/"
else:
    extrapath = "/mnt/C4AEBBABAEBB9500/OneDrive - uibk.ac.at/PhD/PhD/Data/Hintereisferner/Climate/Jennings_etal_2018/"

station_locs = pd.read_csv(extrapath+"jennings_et_al_2018_file1_station_locs_elev.csv")
station_thres = pd.read_csv(extrapath+"jennings_et_al_2018_file3_temp50_observed_by_station.csv")

merged_thres = pd.merge(station_locs, station_thres)
merged_thres.dropna(inplace=True)
merged_alps = merged_thres.loc[(merged_thres['Longitude'] >= 6) & (merged_thres['Longitude'] <= 14) & (merged_thres['Latitude'] >= 45.5) & (merged_thres['Latitude'] <= 47.5) & (merged_thres['Elevation'] > 1000)]


In [None]:
merged_alps.temp50.mean()

In [18]:
if 'win' in sys.platform:
    glaciers = salem.read_shapefile("E:/OneDrive - uibk.ac.at/PhD/PhD/Data/Spatial/RGI7/RGI2000-v7.0-G-11_central_europe/RGI2000-v7.0-G-11_central_europe.shp")
    hef = salem.read_shapefile("E:/OneDrive - uibk.ac.at/PhD/PhD/Data/Hintereisferner/Static/RGI6/HEF_RGI6.shp")
else:
    glaciers = salem.read_shapefile("/mnt/C4AEBBABAEBB9500/OneDrive - uibk.ac.at/PhD/PhD/Data/Spatial/RGI7/RGI2000-v7.0-G-11_central_europe/RGI2000-v7.0-G-11_central_europe.shp")
    hef = salem.read_shapefile("/mnt/C4AEBBABAEBB9500/OneDrive - uibk.ac.at/PhD/PhD/Data/Hintereisferner/Static/RGI6/HEF_RGI6.shp")
# Convert glacier geometries to PlateCarree if necessary
if glaciers.crs is not None and glaciers.crs != "EPSG:4326":
    glaciers = glaciers.to_crs("EPSG:4326")  # Convert to lat/lon if needed


In [None]:
# Convert station data to a GeoDataFrame
import geopandas as gpd
from shapely.geometry import Point
stations = gpd.GeoDataFrame(merged_alps, geometry=gpd.points_from_xy(merged_alps["Longitude"], merged_alps["Latitude"]), crs="EPSG:4326")

# Convert both datasets to a projected CRS (UTM Zone 32N, good for Alps)
utm_crs = "EPSG:32632"
stations_reproj = stations.to_crs(utm_crs)
hef_reproj = hef.to_crs(utm_crs)

# Get the single geometry (assuming only one feature in the shapefile)
glacier_geom = hef_reproj.geometry.iloc[0]

# Compute distances from each station to the glacier
stations_reproj["distance_km"] = stations_reproj.geometry.distance(glacier_geom) / 1000  # Convert meters to km

# Find the closest stations (e.g., top 5)
closest_stations = stations_reproj.nsmallest(10, "distance_km")

# Print the closest stations
print(closest_stations[["Latitude", "Longitude", "temp50", "distance_km","Elevation"]])
print(closest_stations['temp50'].mean())

In [None]:
# Load raster using salem
raster = xr.open_dataset(extrapath+"jennings_et_al_2018_file4_temp50_raster.tif")  # Opens as xarray dataset
raster = raster.isel(band=0)
## crop raster to extent (roughly)
crop_raster = raster.sel(x=slice(3, 17), y=slice(49.5, 43.5))

# Extract temperature at glacier centroid
glacier_temp = raster.sel(x=hef.centroid.x.item(), y=hef.centroid.y.item(), method="nearest").band_data.values
print(glacier_temp)


In [None]:
# Extract temperature at station locations
stations["raster_temp"] = stations.apply(lambda row: 
    crop_raster.sel(x=row.geometry.x, y=row.geometry.y, method="nearest").band_data.item(), axis=1)

print(f"Temperature at glacier centroid: {glacier_temp}")
print(stations[["Latitude", "Longitude", "temp50", "raster_temp"]])

# --- PLOTTING ---
fig, ax = plt.subplots(figsize=(16, 9), dpi=150, subplot_kw={"projection": ccrs.PlateCarree()})

vmin = min(crop_raster.band_data.min().values, stations["temp50"].min())  # Get min temperature
vmax = max(crop_raster.band_data.max().values, stations["temp50"].max())  # Get max temperature

# Plot raster with common colormap
img = crop_raster.band_data.plot.imshow(ax=ax, cmap="plasma", vmin=vmin, vmax=vmax, alpha=0.7, add_colorbar=False)

# Plot stations using the same colormap and limits
sc = ax.scatter(stations.geometry.x, stations.geometry.y, c=stations["temp50"], cmap="plasma",
                edgecolors="k", s=50, transform=ccrs.PlateCarree(), vmin=vmin, vmax=vmax, label="Stations")

# Add gridlines
gl = ax.gridlines(draw_labels=True, linestyle="--", linewidth=0.5, color="gray")

# Customize gridline labels
gl.top_labels = False  # Hide labels at the top
gl.right_labels = False  # Hide labels on the right
gl.xlabel_style = {"size": 10, "color": "black"}  # Customize x-axis labels
gl.ylabel_style = {"size": 10, "color": "black"}  # Customize y-axis labels

# Plot glacier
ax.add_geometries([hef.iloc[0].geometry], crs=ccrs.PlateCarree(), edgecolor="black", facecolor="red", linewidth=2, label="Glacier")

# Add features
ax.set_extent([6, 14, 45.5, 47.5], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(cfeature.BORDERS, linestyle=":")
ax.add_feature(cfeature.LAND, edgecolor="black", facecolor="lightgray")

# Add colorbar (shared with both raster & stations)
cbar = plt.colorbar(sc, ax=ax, orientation="horizontal", label="Temperature (°C)")

plt.legend()
plt.show()

In [22]:
## Build percentages, where division by 0 not possible, set to 0
#pr['pr_perc'] = np.divide(pr['pr'], pr_filled['pr'], out=np.zeros_like(pr_filled['pr']),
#                          where=pr_filled['pr']!=0)
pr['pr_daily'] = pr_filled['pr']

In [None]:
## Load snowfall data as comparison
sf = fixed.copy()
print(sf)
#sf_daily = xr.open_dataset(path+"cosmo_1998-2010_1d_prsn.nc").sel(time=slice("1999-01-02",None)) #kg/m2/d
#print(sf_daily)
sf_filled = filled.copy()
print(sf_filled)

In [None]:
print(np.testing.assert_allclose(sf['prsn'][0,1,1], sf_filled['prsn'][0:24,1,1].mean(), atol=1e-07))

In [None]:
## Check daily precipitation vs snowfall sum
#Set NaN where no precipitation is happening
pr['pr_onlyprec'] = (('time','rlat','rlon'), np.where(pr.pr_daily == 0, np.nan, pr.pr_daily))
sf_filled['prsn_onlyprec'] = (('time','rlat','rlon'), np.where(sf_filled.prsn == 0, np.nan, sf_filled.prsn))

## kg/m2 d 
dif_field = (pr.pr_onlyprec) - (sf_filled.prsn_onlyprec)
print(dif_field.argmin(dim="time"))
dif_field.min(dim="time").plot()

In [None]:
fig, ax = plt.subplots(2,1, figsize=(16,9))
dif_field.plot.hist(bins=np.arange(round(np.nanmin(dif_field)-1), np.nanmax(dif_field)+1, 1), ax=ax[0], edgecolor='black', density=True)
ax[0].set_xlabel("Tot PR - SF [kg/m2 d]")
#ax[0].set_xlim(-30,50)
ax[0].set_ylabel("Frequency")

dif_field.where(dif_field < 0).plot.hist(bins=np.arange(round(np.nanmin(dif_field)-1), np.nanmax(dif_field)+1, 1), ax=ax[1], edgecolor='black', density=True)
ax[1].set_xlabel("(Tot PR - SF [kg/m2 d]) < 0")
# Manually setting xticks to have one for each bar
ax[1].set_xticks(np.arange(round(np.nanmin(dif_field)-1), np.nanmax(dif_field)+1, 1))
ax[1].set_xlim(-20,1)
ax[1].set_ylabel("Frequency")

#Why does it look so different now?
## Treat negative values as they are and just use percentages derived from total precipitation percentages
## Calculate percentage of occurrences
## For distribution use 1°C first, go towards 1.5°C - distribute mismatched snowfall amounts 

In [None]:
## Count occurrences, where there is more snowfall than precipitation
test = np.where(dif_field < 0, dif_field, np.nan)
print(np.count_nonzero(~np.isnan(test)))
print(np.count_nonzero(~np.isnan(dif_field)))
print("Percentage of negative values =", np.count_nonzero(~np.isnan(test)) / np.count_nonzero(~np.isnan(dif_field)))
print("\n--------------------")

test = np.where(dif_field < -20, dif_field, np.nan)
print(np.count_nonzero(~np.isnan(test)))
print(np.count_nonzero(~np.isnan(dif_field)))
print("Percentage of values < -20 =", np.count_nonzero(~np.isnan(test)) / np.count_nonzero(~np.isnan(dif_field)))
print("\n--------------------")


In [None]:
## Repeat just over HEF cells
if 'win' in sys.platform:
    hef_crop = xr.open_dataset("E:/OneDrive - uibk.ac.at/PhD/PhD/Data/Hintereisferner/Climate/COSMO/COSMO_HEF_crop.nc")
else:
    hef_crop = xr.open_dataset("/mnt/C4AEBBABAEBB9500/OneDrive - uibk.ac.at/PhD/PhD/Data/Hintereisferner/Climate/COSMO/COSMO_HEF_crop.nc")
print(hef_crop)
cropdif = dif_field.sel(rlat=slice(-0.23, -0.17), rlon=slice(0.49, 0.55))
test = np.where(cropdif < 0, cropdif, np.nan)
print(np.count_nonzero(~np.isnan(test)))
print(np.count_nonzero(~np.isnan(cropdif)))
print("Percentage =", np.count_nonzero(~np.isnan(test)) / np.count_nonzero(~np.isnan(cropdif)))

In [None]:
# what happens if we exclude last timestep - basically the same (tested it, not shown)
# repeat figure over HEF 
fig, ax = plt.subplots(2,1, figsize=(16,9))
cropdif.plot.hist(bins=np.arange(round(np.nanmin(cropdif)-1), np.nanmax(cropdif)+1, 1), ax=ax[0], edgecolor='black', density=True)
ax[0].set_xlabel("Tot PR - SF [kg/m2 d]")
#ax[0].set_xlim(-30,50)
ax[0].set_ylabel("Frequency")
ax[0].set_title("Over 3x3 HEF")

cropdif.where(cropdif < 0).plot.hist(bins=np.arange(round(np.nanmin(cropdif)-1), np.nanmax(cropdif)+1, 1), ax=ax[1], edgecolor='black', density=True)
ax[1].set_xlabel("(Tot PR - SF [kg/m2 d]) < 0")
# Manually setting xticks to have one for each bar
ax[1].set_xticks(np.arange(round(np.nanmin(cropdif)-1), np.nanmax(cropdif)+1, 1))
ax[1].set_xlim(-20,1)
ax[1].set_ylabel("Frequency")

In [None]:
test = np.where(cropdif < 0, cropdif, np.nan)
print(np.count_nonzero(~np.isnan(test)))
print(np.count_nonzero(~np.isnan(cropdif)))
print("Percentage of negative values =", np.count_nonzero(~np.isnan(test)) / np.count_nonzero(~np.isnan(cropdif)))
print("\n--------------------")

test = np.where(cropdif < -20, cropdif, np.nan)
print(np.count_nonzero(~np.isnan(test)))
print(np.count_nonzero(~np.isnan(cropdif)))
print("Percentage of values < -20 =", np.count_nonzero(~np.isnan(test)) / np.count_nonzero(~np.isnan(cropdif)))
print("\n--------------------")

In [None]:
## Use Hantel et al., parameterisation to derive temperature-dependent function
#load temperature field
temp = xr.open_dataset(path+"cosmo_1998-2010_1h_tas.nc").sel(time=slice("1999-01-01",None))
## Shift values by +1 and add new timestamp
missing_time = temp.isel(time= [-1]) #sel using list to preserve time dimension
#replace time value with next day
timestep = np.datetime64("2010-01-01T00:00:00")
missing_time["time"] = ("time", np.reshape(timestep, (1)))
print(missing_time)
temp_fix = xr.concat([temp, missing_time], dim="time", data_vars="minimal", coords="minimal")

zero_temperature = 273.15 #K
center_snow_transfer_function = glacier_temp
spread_snow_transfer_function = 1.0 #from Hantel et al.
# Compute temperature-dependent snowfall fraction

f_snow = 0.5 * (-np.tanh((temp_fix.tas - zero_temperature - center_snow_transfer_function) * spread_snow_transfer_function) + 1.0)

In [None]:
temp_fix

In [None]:
empty_field = np.zeros_like(pr['pr'].data)
empty_field.shape

In [34]:
if 'win' in sys.platform:
    empty_field = np.zeros_like(pr['pr'].data)

    for t in range(empty_field.shape[0]):
        empty_field[t,:,:] = np.divide(pr['pr'][t,:,:], pr_filled['pr'][t,:,:], out=np.zeros_like(pr_filled['pr'][t,:,:]),
                            where=pr_filled['pr'][t,:,:]!=0)

    pr['pr_perc'] = (('time','rlat','rlon'), empty_field)
    

In [None]:
### Derive hourly snowfall fields
option = 1
# Option 1: Use total precipitation, split into snow/rain based on temperature transfer function
# Option 2: Distribute daily snowfall amounts based on hourly total precipitation percentages and temperature
if option == 1:
    print("Distributing snowfall based on temperature transfer function")
    #Unit conversion? pr in kg/m2 h - snowfall needs to be in m for COSIPY -> current unit then in SWE but need to transfer it using density_fresh_snow/water_density?
    #SNOWFALL = (RRR[t]/1000.0)*(water_density/density_fresh_snow)*(0.5*(-np.tanh(((T2[t]-zero_temperature) - center_snow_transfer_function) * spread_snow_transfer_function) + 1.0))
    #RAIN = RRR[t]-SNOWFALL*(density_fresh_snow/water_density) * 1000.0
    if 'win' in sys.platform:
        sf_hourly_old = pr['pr_perc'] * sf_filled['prsn']
    sf_hourly = pr['pr'] * f_snow
    hourly_rain = pr['pr'] - sf_hourly

    sf_filled['prsn_1h'] = sf_hourly

In [36]:
if option == 2:
    print("Using daily snowfall fields.")
    ## use percentage to redistribute, technically should only do that where more or equal amount
    ## more does not make sense if pr says 0 but snowfall says something else .., percentage to daily will be 0 then .. can't change it

    #sf_filled['prsn_1h'] = (('time','rlat','rlon'),
    #                        np.where(sf_filled['prsn'] >= pr_1h['pr_daily'],
    #                                 sf_filled['prsn'] * pr_1h['pr_perc'], sf_filled['prsn']))

    ## Can we implement this with a temperature treshold? 

    # Compute unnormalized hourly snowfall
    sf_hourly_old = pr['pr_perc'] * sf_filled['prsn']
    sf_hourly_unnormalized = pr['pr_perc'] * sf_filled['prsn'] * f_snow

    # Compute daily sums for renormalization
    sum_hourly_snow_frac = sf_hourly_unnormalized.groupby(sf_filled.time.dt.date).sum(dim="time")

    # Normalize each hourly value to preserve daily snowfall total #sel date=ds.time.dt.date repeats values
    sf_hourly = sf_hourly_unnormalized / sum_hourly_snow_frac.sel(date=sf_filled.time.dt.date) * sf_filled.prsn

    # Replace NaNs (if any) from division by zero where no snowfall occurs
    sf_hourly = sf_hourly.fillna(0)

    sf_filled['prsn_1h'] = sf_hourly

In [None]:
test = sf_hourly.resample(time="1D").sum()
print(np.allclose(sf.prsn, test.data, atol=1e-4))  # Should return True

## does not match, bceause total prec. and snowfall dataset also didnt match so pr perc is not exactly correct either?
diff = test-sf['prsn']
diff.min(dim=['rlat','rlon']).plot()

In [38]:
if 'win' in sys.platform:
    test = sf_hourly_old.resample(time="1D").sum()
    print(np.allclose(sf.prsn, test.data, atol=1e-4))  # Should return True

    ## does not match, bceause total prec. and snowfall dataset also didnt match so pr perc is not exactly correct either?
    diff = test-sf['prsn']
    diff.min(dim=['rlat','rlon']).plot()
    
    print(sf_hourly_old[0:24,20,13].sum())
    print(sf_filled['prsn'][0:24,20,13].mean())
    print(sf_filled['prsn_1h'][0:24,20,13].sum())

In [None]:
## Do a quick check at one example
print(np.where( (sf_filled['prsn'] >= pr['pr_daily']) & (pr['pr_daily'] > 0) ))
print(sf_filled['prsn_1h'][0:24,20,13].values)
print(pr['pr'][0:24,20,13].values)
print("\n----------------------------")
print(sf_filled['prsn_1h'][0:24,20,13].sum())
print(sf_filled['prsn'][0:24,20,13].mean())


In [40]:
## Look at sf_filled vals to see if it actually happened as we wanted it
test1 = sf_filled['prsn_1h'].isel(time=slice(None,-1)).resample(time="1D").sum()
test2 = sf_filled['prsn'].isel(time=slice(None,-1)).resample(time="1D").mean()

if 'win' in sys.platform:
    test3 = sf_hourly_old.isel(time=slice(None,-1)).resample(time="1D").sum()

In [41]:
if 'win' in sys.platform:
    print(np.allclose(test3, test1, atol=1e-2))  # Should return True

In [42]:
## check why does it not match?

In [43]:
if 'win' in sys.platform:
    #compare to old approach
    diff_old_new = np.abs(test1 - test3)
    print(np.nanmax(diff_old_new))
    diff_old_new.max(dim=['rlat','rlon']).plot()

In [44]:
if 'win' in sys.platform:
    np.testing.assert_allclose(test1, test3, atol=1e-3)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(6,3), dpi=300)
ax.plot(test1.time, (test1-test2).mean(dim=['rlat','rlon']))
ax.set_ylabel("new_prsn - prsn [kg/m2 d]")

print(np.nancumsum((test1-test2).mean(dim=['rlat','rlon'])))
## units is still in [mm] we're okay with that difference

In [46]:
if 'win' in sys.platform:
    ## Check RAIN amounts, where negative - need to fix values to 0
    rain = pr['pr'] - sf_filled['prsn_1h']
    print(rain)

    fig, ax = plt.subplots(1,1, figsize=(6,3), dpi=300)
    ax.plot(rain.time, rain.mean(dim=['rlat','rlon']))

In [None]:
## Fix differences ... (don't do! Results in bad issues)
#sf_filled['prsn_1h'] = ('time','rlat','rlon'), np.where(pr['pr'] < sf_filled['prsn_1h'], pr['pr'], sf_filled['prsn_1h'])
#instead plot timeseries around HEF
fig, ax  = plt.subplots(1,1, figsize=(6,3), dpi=300)
ax.plot( (sf_filled.sel(rlat=slice(-0.24, -0.16), rlon=slice(0.46, 0.56))['prsn_1h'].mean(dim=['rlat','rlon']))/1000 )
ax.set_ylabel("Snowfall [m w.e.]")

In [48]:
## Save files
try:
    sf_filled[['prsn_1h']].rename({'prsn_1h':'SNOWFALL'}).drop_vars('date').to_netcdf(path+"cosmo_1999_2010_1h_SNOWFALL.nc")
except:
    sf_filled[['prsn_1h']].rename({'prsn_1h':'SNOWFALL'}).to_netcdf(path+"cosmo_1999_2010_1h_SNOWFALL.nc")
    
pr[['pr']].rename({'pr':'RRR'}).to_netcdf(path+"cosmo_1999_2010_1h_RRR.nc")


In [49]:
if 'win' in sys.platform:
    snowfall_flat = sf_hourly.values.flatten()
    snowfall_other = (sf_filled['prsn'] * pr['pr_perc']).values.flatten()

    snowfall_flat = np.where(snowfall_flat > 0, snowfall_flat, np.nan)

    temp_flat = temp_fix.tas.values.flatten()
    temp_flat = np.where(np.isnan(snowfall_flat), np.nan, temp_flat)
    snowfall_other = np.where(np.isnan(snowfall_flat), np.nan, snowfall_other)

    snowfall_flat = snowfall_flat[~np.isnan(snowfall_flat)]
    temp_flat = temp_flat[~np.isnan(temp_flat)]
    snowfall_other = snowfall_other[~np.isnan(snowfall_other)]

    # Define temperature bins (e.g., every 0.5K)
    temp_bins = np.arange(temp_flat.min(), temp_flat.max(), 0.5)
    bin_centers = (temp_bins[:-1] + temp_bins[1:]) / 2  # Midpoints for plotting

    # Compute mean snowfall per temperature bin
    df = pd.DataFrame({"temperature": temp_flat, "snowfall": snowfall_flat, "snowfall_other": snowfall_other})
    df["temp_bin"] = pd.cut(df["temperature"], bins=temp_bins, labels=bin_centers)
    mean_snowfall_per_bin = df.groupby("temp_bin")["snowfall"].mean()
    mean_snowfallother_per_bin = df.groupby("temp_bin")["snowfall_other"].mean()

    # Plot binned mean snowfall vs. temperature
    plt.figure(figsize=(8, 6))
    plt.plot(bin_centers, mean_snowfall_per_bin, marker="o", linestyle="-", color="blue", label="Binned Mean Snowfall")
    plt.plot(bin_centers, mean_snowfallother_per_bin, marker="o", linestyle="-", color="red", label="Binned Mean Other Snowfall")

    # Overlay transfer function shape for comparison
    temp_range = np.linspace(temp_flat.min(), temp_flat.max(), 100)
    f_snow_curve = 0.5 * (-np.tanh((temp_range - zero_temperature) * spread_snow_transfer_function) + 1.0)
    #f_snow_curve *= mean_snowfall_per_bin.max()  # Scale for comparison

    #plt.plot(temp_range, f_snow_curve, color="black", linestyle="dashed", label="Transfer Function Shape")

    # Labels & legend
    plt.xlabel("Temperature (K)")
    plt.ylabel("Mean Snowfall (mm)")
    plt.title("Binned Mean Snowfall vs. Temperature")
    plt.legend()
    plt.grid()
    plt.xlim(267,280)

    # Show the plot
    plt.show()

In [52]:
if 'win' in sys.platform:
    plt.figure()
    f_snow_curve = 0.5 * (-np.tanh((temp_range - zero_temperature -center_snow_transfer_function) * spread_snow_transfer_function) + 1.0)
    plt.plot(temp_range, f_snow_curve, 'k--', label="Transfer Function")
    plt.scatter(temp_fix.isel(rlat=slice(20,30), rlon=slice(19,30)).tas.values.flatten(), f_snow.isel(rlat=slice(20,30), rlon=slice(19,30)).values.flatten(), s=1, alpha=0.5, label="Computed f_snow")
    plt.scatter(temp_fix.isel(rlat=slice(20,30), rlon=slice(19,30)).tas.values.flatten(), sf_hourly.isel(rlat=slice(20,30), rlon=slice(19,30)).values.flatten(), s=1, alpha=0.5, label="Snowfall")

    plt.xlabel("Temperature (K)")
    plt.ylabel("Transfer Function Value")
    plt.legend()
    plt.show()


In [None]:
center_snow_transfer_function