In [1]:
import xarray as xr
import salem
import numpy as np

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pathlib
import sys

## Script requires a stored archive of the GEE-derived Landsat albedos

if 'win' in sys.platform:
    path_tif = "E:/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/HEF_Landsat_Albedo/"
    path_shp = "E:/OneDrive/PhD/PhD/Data/Hintereisferner/Static/"
    aws_path = "E:/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/AWS_Obleitner/"
else:
    path_tif = "/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/HEF_Landsat_Albedo/"
    path_shp = "/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Static/"
    aws_path = "/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/AWS_Obleitner/"
    
generate_file = False

if generate_file:
    import rioxarray as rxr
    from rasterio.enums import Resampling

    import geopandas as gpd

In [2]:
#Recorded original coordinates could not be converted into lat/lon coordinates d$
#Reconstructed positions from later recorded GPS points of AWS locations (+/-100$
#HEF lower: 46.813570째 N; 10.788977째 E; 2640 m
#HEF upper: 46.790453째 N; 10.747121째 E; 3048 m

aws_lower = pd.read_csv(aws_path+"Fix_HEFlower_01102003_24102004.csv", parse_dates=True, index_col="time")
aws_upper = pd.read_csv(aws_path+"Fix_HEFupper_01102003_24102004.csv", parse_dates=True, index_col="time")

In [None]:
aws_lower = aws_lower.resample("1D").agg(T=('T', "mean"),Dir=('Dir', "mean"),U=('U', "mean"),SWIsum=('SWI', "sum"), SWImean=("SWI","mean"),SWOsum=('SWO', "sum"),SWOmean=("SWO","mean"),
                                         LWO=('LWO','mean'),LWI=('LWI','mean'),sfc=('sfc', "mean"),RH= ('RH', "mean"),P=('P', "mean"))
aws_lower['alpha'] = aws_lower['SWOsum'] / aws_lower['SWIsum']
aws_lower['sfc'] =  aws_lower['sfc'] - aws_lower['sfc'][0]

aws_upper = aws_upper.resample("1D").agg(T=('T', "mean"),Dir=('Dir', "mean"),U=('U', "mean"),SWIsum=('SWI', "sum"), SWImean=("SWI","mean"),SWOsum=('SWO', "sum"),SWOmean=("SWO","mean"),
                                         LWO=('LWO','mean'),LWI=('LWI','mean'),sfc=('sfc', "mean"),RH= ('RH', "mean"),P=('P', "mean"))
aws_upper['alpha'] = aws_upper['SWOsum'] / aws_upper['SWIsum']
aws_upper['sfc'] = aws_upper['sfc'] - aws_upper['sfc'][0]
aws_upper

In [None]:
list_dfs = []
## Load MetaDataFile and Merge
for fp in pathlib.Path(path_tif).glob('*.csv'):
    print(fp)
    df = pd.read_csv(fp)
    list_dfs.append(df)
    
metadata = pd.concat(list_dfs, ignore_index=True)
metadata
    

In [None]:
metadata['time'] = metadata['NAME'].apply(lambda x: pd.to_datetime(x.split('193028_')[-1]))

metadata = metadata.set_index('time')
metadata_ds = metadata.to_xarray()

metadata_ds

In [6]:
def process_albedo_file(filename, shape):
    alb = salem.open_xr_dataset(filename)
    alb = alb.salem.roi(shape=shape)
    
    return alb
    

In [7]:
if generate_file:

    ## Load Example Albedo file
    data = salem.open_xr_dataset(path_tif+"LE07_193028_20000204.tif")
    print(data)
    #data.data.plot()

    # Load static file and RGI6.0 shapefile
    rgi6 = salem.read_shapefile(path_shp+"/RGI6/HEF_RGI6.shp")
    print(rgi6.crs)
    rgi6_reproj = rgi6.to_crs(data.pyproj_srs)
    #rgi6_reproj.plot()

    print(data.salem.grid)
    print(data)

    #reprojected using QGIS!
    test = salem.open_xr_dataset(path_shp+"reproj_nasadem_test.tif")
    nasadem_reproj = test.salem.subset(shape=rgi6_reproj)
    nasadem_reproj_crop = nasadem_reproj.interp(x=data.x, y=data.y, method="nearest")
    nasadem_reproj_crop = nasadem_reproj_crop.salem.roi(shape=rgi6_reproj)
    nasadem_reproj_crop.data.plot()

    print(nasadem_reproj_crop.salem.grid)
    print(data.salem.grid)

    # Mask the dataset using the shapefile
    masked_albedo = data.salem.roi(shape=rgi6_reproj)
    masked_albedo['HGT'] = nasadem_reproj_crop.data
    masked_albedo

    datasets = []
    data_sources = []

    for fp in pathlib.Path(path_tif).glob('*.tif'):
        filename = str(fp.stem)
        print(filename)
        alb = process_albedo_file(fp, shape=rgi6_reproj)
        
        sensor = filename.split('_')[0]
        date = filename.split('_')[-1]
        timestamp = date[0:4]+'-'+date[4:6]+'-'+date[6:8]
        ts = pd.to_datetime(timestamp)
        
        alb = alb.expand_dims(time=[ts])
        alb = alb.assign_coords(sensor_type=("time", [sensor]))
        
        datasets.append(alb)
        data_sources.append(sensor)
        
    alb_dataset = xr.concat(datasets, dim="time")

    alb_dataset = alb_dataset.rename({"data": "albedo"})
    alb_dataset['month'] = alb_dataset.time.dt.month
    alb_dataset['HGT'] = nasadem_reproj_crop.data
    alb_dataset['albedo'] = alb_dataset['albedo'].where(alb_dataset['albedo'] != -999, np.nan)
    if 'win' in sys.platform:
        alb_dataset.to_netcdf(r"E:\OneDrive\PhD\PhD\Data\Hintereisferner\Climate\HEF_processed_albedos.nc")
    else:
        alb_dataset.to_netcdf("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/HEF_processed_albedos.nc")
else:
    if 'win' in sys.platform:
        alb_dataset = xr.open_dataset(r"E:\OneDrive\PhD\PhD\Data\Hintereisferner\Climate\HEF_processed_albedos.nc")
    else:
        alb_dataset = xr.open_dataset("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/HEF_processed_albedos.nc")


In [None]:
alb_dataset = alb_dataset.sortby('time')
## Merge metadata
alb_dataset['CLOUDCOVER'] = metadata_ds['CLOUDCOVER']
alb_dataset

In [None]:
alb_dataset.HGT.plot()

In [10]:
def compute_histogram(dataset, bins=50):
    timestamp = str(dataset.time.values).split('T')[0]
    sensor = str(dataset.sensor_type.values)
    albedo = dataset.albedo.values.flatten()
    albedo = albedo[~np.isnan(albedo)]
    
    bins = np.linspace(albedo.min(), albedo.max(), 50)  # Adjust the number of bins
    hist, bin_edges = np.histogram(albedo, bins=bins)
    
    fig, ax = plt.subplots(3,1, figsize=(16,9), dpi=300)
    ax[0].hist(albedo, bins=bin_edges, edgecolor="black", alpha=0.7)
    ax[0].set_title(f'Albedo Values on {sensor} {timestamp}')
    ax[0].set_xticks(np.arange(0,1+0.1,0.1))
    ax[0].set_xlim(0,1)
    ax[0].set_ylabel('Frequency')
    ax[0].grid(True)
       
    # Add CDF plot
    sorted_data = np.sort(albedo)
    cdf = np.cumsum(sorted_data) / np.sum(sorted_data)

    percentile_5 = np.percentile(sorted_data, 5)
    percentile_95 = np.percentile(sorted_data, 95)

    ax[1].plot(sorted_data, cdf, color='blue', label='CDF')
    ax[1].axvline(percentile_5, color='red', linestyle='--', label=f'5th Percentile ({percentile_5:.2f})')
    ax[1].axvline(percentile_95, color='green', linestyle='--', label=f'95th Percentile ({percentile_95:.2f})')
    ax[1].set_xticks(np.arange(0,1+0.1,0.1))
    ax[1].set_xlim(0,1)
    ax[1].set_xlabel('Albedo (-)')
    ax[1].set_ylabel('Cumulative Probability')
    ax[1].legend()
    ax[1].grid(True)


    c = ax[2].pcolormesh(dataset['x'], dataset['y'], dataset['albedo'].squeeze(), cmap='viridis', shading='auto', vmin=0, vmax=1)
    ax[2].set_xlabel('X-Coordinate (m)')
    ax[2].set_ylabel('Y-Coordinate (m)')

    # Add a colorbar to the pcolormesh plot
    fig.colorbar(c, ax=ax[2])
    # Adjust layout to avoid overlapping
    fig.tight_layout()


In [None]:
metadata.loc[metadata['system:index'] == "LE07_193028_20000119"]



In [None]:
metadata.SCENE_CENTER_TIME.values #basically all scenes taken around 10h


In [None]:
## Test Load HORAYZON MASK
if 'win' in sys.platform:
    horayzon = salem.open_xr_dataset("E:/OneDrive/PhD/PhD/Data/Hintereisferner/Static/HEF_HORAYZON-LUT_30m.nc")
else:
    horayzon = salem.open_xr_dataset("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Static/HEF_HORAYZON-LUT_30m.nc")
horayzon.sel(time="2020-01-19T10:00:00").sw_dir_cor.plot()

In [14]:
horayzon.rio.set_spatial_dims('lon', 'lat', inplace=True)
horayzon.rio.write_crs(horayzon.pyproj_srs, inplace=True)
test = horayzon.rio.reproject(alb_dataset.pyproj_srs)

In [None]:
test.sel(time="2020-01-19T10:00:00").sw_dir_cor.plot()

In [None]:
# test horayzon mask
img_hrz = test.sel(time="2020-01-19T10:00:00").sw_dir_cor
img_alb = alb_dataset.isel(time=[1]).albedo

img_hrz_aligned = img_hrz.reindex_like(img_alb, method="nearest")  # or method="pad"
img_alb_masked = img_alb.where(img_hrz_aligned != 0, np.nan)

img_alb_masked.plot()



In [None]:
compute_histogram(img_alb_masked.to_dataset())

In [None]:
alb_dataset

In [None]:
compute_histogram(alb_dataset.isel(time=1))

In [None]:
## full histogram over all timesteps
albedo_vals_full = alb_dataset.albedo.values.flatten()
albedo_vals_full = albedo_vals_full[~np.isnan(albedo_vals_full)]

print(albedo_vals_full.min(), albedo_vals_full.max())
bins = np.linspace(albedo_vals_full.min(), albedo_vals_full.max(), 100)  # Adjust the number of bins
hist, bin_edges = np.histogram(albedo_vals_full, bins=bins)

fig, ax = plt.subplots(1,1, figsize=(16,9), dpi=300)
ax.hist(albedo_vals_full, bins=bin_edges, edgecolor="black", alpha=0.7)
ax.set_title(f'Albedo Values over all timesteps')
ax.set_xticks(np.arange(0,1+0.1,0.1))
ax.set_xlim(0,1)
ax.set_ylabel('Frequency')
ax.grid(True)

In [None]:
## Clean Data with HORAYZON and repeat figures
hrz_aligned = test.reindex({'y': alb_dataset['y'], 'x': alb_dataset['x']}, method="nearest")
hrz_aligned

In [None]:
albedo_copy = alb_dataset.copy()
alb_time_shifted = albedo_copy.time + np.timedelta64(10, 'h')

alb_dt = pd.to_datetime(alb_time_shifted.values)

alb_doy = alb_dt.day_of_year
alb_time_of_day = alb_dt.strftime("%H:%M:%S")  # Extract time in HH:MM:SS format
print(alb_doy)
print(alb_time_of_day)

alb_time_2020 = pd.to_datetime(f"2020-01-01") + pd.to_timedelta(alb_doy - 1, unit="D") + pd.to_timedelta(alb_time_of_day)
alb_time_2020

# replace time values in albedo_copy 
albedo_copy['time'] = alb_time_2020
np.testing.assert_allclose(albedo_copy.isel(time=1).albedo, alb_dataset.isel(time=1).albedo)

sw_dir_cor_selected = hrz_aligned.sel(time=albedo_copy.time, method="nearest")['sw_dir_cor']
print(sw_dir_cor_selected)

alb_dataset_filtered = albedo_copy.where(sw_dir_cor_selected != 0, np.nan)
alb_dataset_filtered['time'] = alb_dataset.time
alb_dataset_filtered['HGT'] = alb_dataset.HGT
alb_dataset_filtered
    

In [None]:
alb_dataset_filtered_cali2 = alb_dataset_filtered.where(alb_dataset_filtered['CLOUDCOVER'] <= 30, drop=True)
print(len(alb_dataset_filtered_cali2.time))

In [None]:
"""## Find all cloudy scenes
Cloudy:
2, 11, 24, (25), (27), (31), (40), (44),  47, 48, 52, 64, 70, 71, 72, 80, 86, 90, 94, 95, ((111))
"""
# Total length of the time dimension
n_times = alb_dataset_filtered_cali2.dims['time']

# Create a boolean mask: True for good indices, False for faulty
mask = np.ones(n_times, dtype=bool)
faulty_indices = [2, 11, 24, 25, 27, 31, 40, 44,  47, 48, 52, 64, 70, 71, 72, 80, 86, 90, 94, 95, 111]
mask[faulty_indices] = False

# Select only good indices using isel
alb_dataset_filtered_cali = alb_dataset_filtered_cali2.isel(time=mask)
alb_dataset_filtered_cali

In [25]:
#alb_dataset_filtered_cali = alb_dataset_filtered.where(alb_dataset_filtered['CLOUDCOVER'] <= 20, drop=True) ## find threshold for calibration - 10% used for albedo estimation
non_nan_count = alb_dataset_filtered_cali['albedo'].count(dim=['x', 'y'])
alb_dataset_filtered_cali['non_nan_count'] = non_nan_count
alb_dataset_filtered_cali['median_albedo'] = alb_dataset_filtered_cali['albedo'].mean(dim=['x', 'y']) #median more robust against outliers - but actually we use mean, more consistent with literature
alb_dataset_filtered_cali['std_albedo'] = alb_dataset_filtered_cali['albedo'].std(dim=['x', 'y'])

In [None]:
# Calculate N_Eff
pixel_size = 30 #30m Landsat footprint
pixel_area = pixel_size**2
L_corr = 500 #assume 500m corr.
A_total = alb_dataset_filtered_cali['non_nan_count'] * pixel_area
A_eff = np.pi * L_corr**2  # Effective area per independent sample
N_eff = A_total / A_eff
alb_dataset_filtered_cali['n_eff'] = N_eff

standard_error = alb_dataset_filtered_cali['std_albedo'] / np.sqrt(alb_dataset_filtered_cali['n_eff'])
alb_dataset_filtered_cali['sigma_albedo'] = np.sqrt((standard_error)**2 + (0.017)**2)

alb_dataset_filtered_cali['sigma_albedo'].plot()

In [33]:
if 'win' in sys.platform:
    alb_dataset_filtered_cali.to_netcdf("E:/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/HEF_processed_HRZ-30CC-filter_albedos.nc")
else:
    alb_dataset_filtered_cali.to_netcdf("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/HEF_processed_HRZ-30CC-filter_albedos.nc")


In [None]:
""" LIST CLOUDY from 92 points
scene 1, little bit but not much
scene 2, def. clouds!
scene 10, def. clouds
scene 20 prob. clouds, 23 prob. 30 prob. 35, 36, 40, 55, 69, 73, 74, 75
86
"""

In [None]:
## Find all cloudy scenes
alb_dataset_filtered_cali['albedo'].isel(time=92).plot()

In [None]:
alb_dataset_filtered_cali['albedo'].isel(time=2).plot()

In [None]:
alb_play_data = alb_dataset_filtered.copy().where(alb_dataset_filtered['CLOUDCOVER'] <= 30, drop=True) #10 or 30
alb_play_data['month'] = alb_play_data.time.dt.month
#mean winter, mean summer
def season_label(month):
    if month in [12, 1, 2, 3]:  # Winter months #12,1,2,3
        return 'winter'
    elif month in [6, 7, 8, 9]:  # Summer months
        return 'summer'

alb_play_data['season'] = xr.DataArray([season_label(x) for x in alb_play_data['month'].values], dims='time')
alb_play_data
# Group by glaciological year and season, then compute the mean
#seasonal_mean = alb_play_data.groupby('season').mean(skipna=True)
#seasonal_mean



In [30]:
winter = alb_play_data.where(alb_play_data['season'] == 'winter', drop=True).albedo
#winter = seasonal_mean.sel(season='winter')['albedo']
vals_winter = winter.data.flatten()
vals_winter = vals_winter[~np.isnan(vals_winter)]

summer = alb_play_data.where(alb_play_data['season'] == 'summer', drop=True).albedo
#summer = seasonal_mean.sel(season='summer')['albedo']
vals_summer = summer.data.flatten()
vals_summer = vals_summer[~np.isnan(vals_summer)]


In [None]:
## statistics for winter
vals_winter_filtered = vals_winter
vals_winter_filtered = vals_winter_filtered[~np.isnan(vals_winter_filtered)]
print(np.nanmean(vals_winter_filtered) - 2*np.nanstd(vals_winter))

#cut_winter_vals = np.where(vals_winter_filtered < 0.7, np.nan, vals_winter_filtered)
cut_winter_vals = np.where(vals_winter_filtered < 0.75, np.nan, vals_winter_filtered) #cuffey paterson
cut_winter_vals = np.where(cut_winter_vals > 0.98, np.nan, cut_winter_vals)
cut_winter_vals = cut_winter_vals[~np.isnan(cut_winter_vals)]


In [None]:
## make selection for
fig, ax = plt.subplots(2, 2, figsize=(10, 8), dpi=300, sharex=True,
    gridspec_kw={"height_ratios": (.1, .9), "width_ratios": (1, 1)}
)
plt.subplots_adjust(hspace=0.05, wspace=0.2)  # Adjust spacing

# Assign axes
ax_box = ax[0, 0]  # Top-left (boxplot)
ax_main = ax[1, 0]  # Bottom-left (main plot)
fig.delaxes(ax[0, 1])  # Delete top-right subplot
fig.delaxes(ax[1, 1])  # Delete bottom-right subplot
ax_side = fig.add_subplot(1, 2, 2)  # Right-side plot spanning full height

bins_winter_cut = np.linspace(cut_winter_vals.min(), cut_winter_vals.max(), 100)  # Adjust the number of bins
hist, bin_edges_cut = np.histogram(vals_winter_filtered, bins=bins_winter_cut)

_2th = np.quantile(cut_winter_vals, 0.02)
_25th = np.quantile(cut_winter_vals, 0.25)
_50th = np.quantile(cut_winter_vals, 0.5)
_75th = np.quantile(cut_winter_vals, 0.75)
_98th = np.quantile(cut_winter_vals, 0.98)
#print(_25th,_75th,_95th)
#print(_50th, _95th)

_mask = np.where((cut_winter_vals > _2th) & (cut_winter_vals < _98th))
test_snow = cut_winter_vals[_mask]
print(np.nanmin(test_snow))
print(_2th)
print(np.nanmax(test_snow))
print(_98th)
#Get upper 50% range
print("50th percentile of filtered data:", np.quantile(test_snow, 0.5))
print("Max of filtered data:", np.nanmax(test_snow))
print(np.nanstd(test_snow[np.where((test_snow > np.quantile(test_snow, 0.5)))]))

# the boxplot
flierprops = dict(marker='o', markerfacecolor='black', markersize=3,
                  markeredgecolor='none')
ax_box.boxplot(cut_winter_vals, flierprops=flierprops, vert=False)
ax_box.set_ylabel("")
ax_box.set_yticklabels("")
ax_box.set_yticks([])

# removing borders
ax_box.spines['top'].set_visible(True)
ax_box.spines['right'].set_visible(True)
ax_box.spines['left'].set_visible(True)

# the histogram
ax_main.hist(cut_winter_vals, bins=bin_edges_cut, edgecolor="black", alpha=0.7)
ax_main.axvline(x=_98th, linestyle="--", color="green")
ax_main.axvline(x=np.quantile(test_snow, 0.5), linestyle="--", color="red")
ax_main.set_xlabel("Snow albedo (-)")
ax_main.set_ylabel("Count")
ax_main.set_xticks(np.arange(0.75, 1+0.05, 0.05))
ax_main.set_xlim(0.75, 1)

#the cdf plot
sorted_data = np.sort(cut_winter_vals)
cdf = np.cumsum(sorted_data) / np.sum(sorted_data)


ax_side.plot(sorted_data, cdf, color='blue', label='CDF')
ax_side.axvline(np.quantile(test_snow, 0.5), color='red', linestyle='--', label=f'50th Percentile ({np.quantile(test_snow, 0.5):.2f})')
ax_side.axvline(_98th, color='green', linestyle='--', label=f'98th Percentile ({_98th:.2f})')

# --- FIX DUPLICATE Y-AXIS ---
#ax_side.yaxis.set_ticks_position("left")  # Ensure y-axis ticks appear only on the left
#ax_side.yaxis.set_label_position("left")  # Label stays on the left
#ax_side.tick_params(axis="y", which="both", left=True, right=False)  # Hide right y-ticks

ax_side.set_xlabel("Snow albedo (-)")
ax_side.set_xticks(np.arange(0.75, 1+0.05, 0.05))
ax_side.set_xlim(0.75, 1)

ax_side.set_xlabel('Snow Albedo (-)')
ax_side.set_ylabel('Cumulative Probability')
ax_side.legend()
ax_side.grid(True)
plt.savefig("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/FigS09_freshsnow_satellite_albedo.png", bbox_inches="tight")


In [None]:
## Repeat for ice albedo (from summer scenes in Cuffey and Paterson range )
cut_summer_vals = np.where(vals_summer < 0.1, np.nan, vals_summer) #cuffey paterson
cut_summer_vals = np.where(cut_summer_vals > 0.46, np.nan, cut_summer_vals)
cut_summer_vals = cut_summer_vals[~np.isnan(cut_summer_vals)]
mean = np.nanmean(cut_summer_vals)  # Mean of the distribution
std_dev = np.nanstd(cut_summer_vals)  # Standard deviation of the distribution
print(mean, std_dev)

## make selection for
fig, ax = plt.subplots(2, 2, figsize=(10, 8), dpi=300, sharex=True,
    gridspec_kw={"height_ratios": (.1, .9), "width_ratios": (1, 1)}
)
plt.subplots_adjust(hspace=0.05, wspace=0.2)  # Adjust spacing

# Assign axes
ax_box = ax[0, 0]  # Top-left (boxplot)
ax_main = ax[1, 0]  # Bottom-left (main plot)
fig.delaxes(ax[0, 1])  # Delete top-right subplot
fig.delaxes(ax[1, 1])  # Delete bottom-right subplot
ax_side = fig.add_subplot(1, 2, 2)  # Right-side plot spanning full height

bins_summer_cut = np.linspace(cut_summer_vals.min(), cut_summer_vals.max(), 100)  # Adjust the number of bins
hist, bin_edges_cut = np.histogram(cut_summer_vals, bins=bins_winter_cut)

_2th = np.quantile(cut_summer_vals, 0.02)
_25th = np.quantile(cut_summer_vals, 0.25)
_75th = np.quantile(cut_summer_vals, 0.75)
_98th = np.quantile(cut_summer_vals, 0.98)
#print(_5th, _25th,_75th,_95th)


_mask = np.where((cut_summer_vals > _2th) & (cut_summer_vals < _98th))
test_ice = cut_summer_vals[_mask]
print(np.nanmin(test_ice))
print(_2th)
print(np.nanmax(test_ice))
print(_98th)
#Get upper 50% range
print("50th percentile of filtered data:", np.quantile(test_ice, 0.5))
print("Max of filtered data:", np.nanmin(test_ice))
print("OG. 50th:", np.quantile(cut_summer_vals, 0.5))

# the boxplot
flierprops = dict(marker='o', markerfacecolor='black', markersize=3,
                  markeredgecolor='none')
ax_box.boxplot(cut_summer_vals, flierprops=flierprops, vert=False)
ax_box.set_ylabel("")
ax_box.set_yticklabels("")
ax_box.set_yticks([])

# removing borders
ax_box.spines['top'].set_visible(True)
ax_box.spines['right'].set_visible(True)
ax_box.spines['left'].set_visible(True)

# the histogram
ax_main.hist(cut_summer_vals, bins=bins_summer_cut, edgecolor="black", alpha=0.7)
ax_main.axvline(x=_2th, linestyle="--", color="red")
#ax_main.axvline(x=_25th, linestyle="--", color="black")
#ax_main.axvline(x=_75th, linestyle="--", color="black")
ax_main.axvline(x=np.quantile(test_ice, 0.5), linestyle="--", color="green")
ax_main.set_xlabel("Ice albedo (-)")
ax_main.set_ylabel("Count")
ax_side.set_xticks(np.arange(0.1, 0.4+0.05, 0.05))
ax_side.set_xlim(0.05, 0.45)

#the cdf plot
sorted_data = np.sort(cut_summer_vals)
cdf = np.cumsum(sorted_data) / np.sum(sorted_data)

ax_side.plot(sorted_data, cdf, color='blue', label='CDF')
ax_side.axvline(x=_2th, linestyle="--", color="red", label=f'2th Percentile ({_2th:.2f})')
#ax_side.axvline(_25th, color='black', linestyle='--', label=f'25th Percentile ({_25th:.2f})')
#ax_side.axvline(_75th, color='black', linestyle='--', label=f'75th Percentile ({_75th:.2f})')
ax_side.axvline(x=np.quantile(test_ice, 0.5), linestyle="--", color="green", label=f'50th Percentile ({np.quantile(test_ice, 0.5):.2f})')


# --- FIX DUPLICATE Y-AXIS ---
#ax_side.yaxis.set_ticks_position("left")  # Ensure y-axis ticks appear only on the left
#ax_side.yaxis.set_label_position("left")  # Label stays on the left
#ax_side.tick_params(axis="y", which="both", left=True, right=False)  # Hide right y-ticks

ax_side.set_xlabel("Ice albedo (-)")
ax_side.set_xticks(np.arange(0.1, 0.4+0.05, 0.05))
ax_side.set_xlim(0.05, 0.45)

ax_side.set_xlabel('Ice albedo (-)')
ax_side.set_ylabel('Cumulative Probability')
ax_side.legend()
ax_side.grid(True)
plt.savefig("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/FigS07_ice_satellite_albedo.png", bbox_inches="tight")

In [None]:
## Firn selection ... 
## Repeat for ice albedo (from summer scenes in Cuffey and Paterson range )
cut_firn_vals = np.where(vals_summer < 0.46, np.nan, vals_summer) #cuffey paterson
cut_firn_vals = np.where(cut_firn_vals > 0.75, np.nan, cut_firn_vals)
cut_firn_vals = cut_firn_vals[~np.isnan(cut_firn_vals)]
mean = np.nanmean(cut_firn_vals)  # Mean of the distribution
std_dev = np.nanstd(cut_firn_vals)  # Standard deviation of the distribution
print(mean, std_dev)

## make selection for
fig, ax = plt.subplots(2, 2, figsize=(10, 8), dpi=300, sharex=True,
    gridspec_kw={"height_ratios": (.1, .9), "width_ratios": (1, 1)}
)
plt.subplots_adjust(hspace=0.05, wspace=0.2)  # Adjust spacing

# Assign axes
ax_box = ax[0, 0]  # Top-left (boxplot)
ax_main = ax[1, 0]  # Bottom-left (main plot)
fig.delaxes(ax[0, 1])  # Delete top-right subplot
fig.delaxes(ax[1, 1])  # Delete bottom-right subplot
ax_side = fig.add_subplot(1, 2, 2)  # Right-side plot spanning full height

bins_firn_cut = np.linspace(cut_firn_vals.min(), cut_firn_vals.max(), 100)  # Adjust the number of bins
hist, bin_edges_cut = np.histogram(cut_firn_vals, bins=bins_winter_cut)

_2th = np.quantile(cut_firn_vals, 0.02)
_25th = np.quantile(cut_firn_vals, 0.25)
_75th = np.quantile(cut_firn_vals, 0.75)
_98th = np.quantile(cut_firn_vals, 0.98)
#print(_5th, _25th,_75th,_95th)


_mask = np.where((cut_firn_vals > _2th) & (cut_firn_vals < _98th))
test_firn = cut_firn_vals[_mask]
print(np.nanmin(test_firn))
print(_2th)
print(np.nanmax(test_firn))
print(_98th)
#Get upper 50% range
print("50th percentile of filtered data:", np.quantile(test_firn, 0.5))
print("Min of filtered data:", np.nanmin(test_firn))

# the boxplot
flierprops = dict(marker='o', markerfacecolor='black', markersize=3,
                  markeredgecolor='none')
ax_box.boxplot(cut_firn_vals, flierprops=flierprops, vert=False)
ax_box.set_ylabel("")
ax_box.set_yticklabels("")
ax_box.set_yticks([])

# removing borders
ax_box.spines['top'].set_visible(True)
ax_box.spines['right'].set_visible(True)
ax_box.spines['left'].set_visible(True)

# the histogram
ax_main.hist(cut_firn_vals, bins=bins_firn_cut, edgecolor="black", alpha=0.7)
ax_main.axvline(x=_2th, linestyle="--", color="red")
#ax_main.axvline(x=_25th, linestyle="--", color="black")
#ax_main.axvline(x=_75th, linestyle="--", color="black")
ax_main.axvline(x=np.quantile(test_firn, 0.5), linestyle="--", color="green")
ax_main.set_xlabel("Firn albedo (-)")
ax_main.set_ylabel("Count")
ax_side.set_xticks(np.arange(0.4, 0.7+0.05, 0.05))
ax_side.set_xlim(0.35, 0.75)

#the cdf plot
sorted_data = np.sort(cut_firn_vals)
cdf = np.cumsum(sorted_data) / np.sum(sorted_data)

ax_side.plot(sorted_data, cdf, color='blue', label='CDF')
ax_side.axvline(_2th, color='green', linestyle='--', label=f'2th Percentile ({_2th:.2f})')
#ax_side.axvline(_25th, color='black', linestyle='--', label=f'25th Percentile ({_25th:.2f})')
#ax_side.axvline(_75th, color='black', linestyle='--', label=f'75th Percentile ({_75th:.2f})')
ax_side.axvline(x=np.quantile(test_firn, 0.5), linestyle="--", color="red", label=f'50th Percentile ({np.quantile(test_firn, 0.5):.2f})')

# --- FIX DUPLICATE Y-AXIS ---
#ax_side.yaxis.set_ticks_position("left")  # Ensure y-axis ticks appear only on the left
#ax_side.yaxis.set_label_position("left")  # Label stays on the left
#ax_side.tick_params(axis="y", which="both", left=True, right=False)  # Hide right y-ticks

ax_side.set_xticks(np.arange(0.4, 0.7+0.05, 0.05))
ax_side.set_xlim(0.35, 0.75)

ax_side.set_xlabel('Firn albedo (-)')
ax_side.set_ylabel('Cumulative Probability')
ax_side.legend()
ax_side.grid(True)
plt.savefig("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/FigS08_firn_satellite_albedo.png", bbox_inches="tight")

In [None]:
bins_winter = np.linspace(vals_winter_filtered.min(), vals_winter_filtered.max(), 100)  # Adjust the number of bins
hist, bin_edges = np.histogram(vals_winter_filtered, bins=bins_winter)

#50% of data above
#quant = np.nanquantile(vals_winter_filtered, 0.5)

fig, ax = plt.subplots(2,1, figsize=(16,9), dpi=300, sharex=True)
ax[0].hist(vals_winter_filtered, bins=bin_edges, edgecolor="black", alpha=0.7)
#ax[0].axvline(x=quant, color="black", linestyle='--')
ax[0].set_xticks(np.arange(0,1+0.1,0.1))
ax[0].set_xlim(0,1)
ax[0].set_ylabel('Winter Albedo Frequency')
ax[0].grid(True)
#
bins_summer = np.linspace(vals_summer.min(), vals_summer.max(), 100)  # Adjust the number of bins
hist, bin_edges = np.histogram(vals_summer, bins=bins_summer)

ax[1].hist(vals_summer, bins=bin_edges, edgecolor="black", alpha=0.7)
ax[1].set_xticks(np.arange(0,1+0.05,0.05))
ax[1].set_xlim(0,1)
ax[1].set_ylabel('Summer Albedo Frequency')
ax[1].grid(True)