# Glacier grids from SGI or GLAMOS:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the SGI grid and use OGGM topography. Computing takes a long time because of the conversion to monthly format.
## Setting up:

In [None]:
import os, sys
sys.path.append(os.path.join(os.getcwd(), '../../')) # Add root of repo to import MBM

import pandas as pd
import warnings
from tqdm.notebook import tqdm
import re
import massbalancemachine as mbm
import geopandas as gpd
import matplotlib.pyplot as plt
import geopandas as gpd
import geopandas as gpd

# scripts
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *
from scripts.geodata import *
from scripts.xgb_helpers import *
from scripts.config_CH import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

cfg = mbm.SwitzerlandConfig()

In [None]:
seed_all(cfg.seed)
free_up_cuda()  # in case no memory

# Plot styles:
path_style_sheet = 'scripts/example.mplstyle'
plt.style.use(path_style_sheet)

# Climate columns
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]
# Topographical columns
voi_topographical = [
    "aspect",
    "slope",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
    "topo",
]

In [None]:
glaciers_glamos_dem = os.listdir(
    os.path.join(cfg.dataPath, path_GLAMOS_topo, 'lv95/'))

# Glacier outlines:
glacier_outline_sgi = gpd.read_file(
    os.path.join(cfg.dataPath, path_SGI_topo, 'inventory_sgi2016_r2020',
                 'SGI_2016_glaciers_copy.shp'))  # Load the shapefile
glacier_outline_rgi = gpd.read_file(cfg.dataPath + path_rgi_outlines)

# Sort glaciers by area
gl_area = get_gl_area(cfg)
gl_area['clariden'] = gl_area['claridenL']

In [None]:
geodetic_mb = get_geodetic_MB(cfg)

# get years per glacier
years_start_per_gl = geodetic_mb.groupby(
    'glacier_name')['Astart'].unique().apply(list).to_dict()
years_end_per_gl = geodetic_mb.groupby('glacier_name')['Aend'].unique().apply(
    list).to_dict()

periods_per_glacier, geoMB_per_glacier = build_periods_per_glacier(geodetic_mb)

# Option 1: SGI (2015) grids (for option 2 see below - GLAMOS grids)

## Examine SGI data:
### Glacier masks over SGI aspect:
This is to check that there is a good overlap.

In [None]:
# Glacier outlines:
glacier_outline_sgi = gpd.read_file(
    os.path.join(cfg.dataPath, path_SGI_topo, 'inventory_sgi2016_r2020',
                 'SGI_2016_glaciers.shp'))  # Load the shapefile

# Clear output folder
emptyfolder('figures/SGI_mask/')

for glacier_name in tqdm(years_start_per_gl.keys(),
                         desc="Processing glaciers"):

    # Handle 'clariden' separately due to special ID format
    sgi_id, rgi_id, rgi_shp = get_rgi_sgi_ids(cfg, glacier_name)

    # Skip if no SGI ID
    if not sgi_id:
        print(f'No SGI ID found for {glacier_name}')
        continue

    # Get glacier mask from SGI shapefile
    gdf_mask_gl = glacier_outline_sgi[glacier_outline_sgi['sgi-id'] == sgi_id]

    # Skip if no glacier mask found
    if gdf_mask_gl.empty:
        print(f'No glacier mask found for {glacier_name}')
        continue

    # Locate aspect grid file
    aspect_gl = next((f for f in os.listdir(
        os.path.join(cfg.dataPath, path_SGI_topo, 'aspect')) if sgi_id in f),
                     None)

    # Skip if no aspect file found
    if not aspect_gl:
        print(f'No aspect file found for {glacier_name}')
        continue

    # Load grid file
    metadata_aspect, grid_data_aspect = load_grid_file(
        os.path.join(os.path.join(cfg.dataPath, path_SGI_topo, 'aspect'),
                     aspect_gl))

    # Convert to xarray
    aspect = convert_to_xarray_geodata(grid_data_aspect, metadata_aspect)

    # Transform to WGS84 coordinates
    aspect_wgs84 = transform_xarray_coords_lv95_to_wgs84(aspect)

    # Plot the data
    fig, ax = plt.subplots(figsize=(10, 10))
    aspect_wgs84.plot(ax=ax)
    gdf_mask_gl.plot(ax=ax, alpha=0.5)

    # Save the figure
    output_path = os.path.join('figures', 'SGI_mask', f"{glacier_name}.png")
    plt.savefig(output_path, dpi=300)
    plt.close()

### Compare one example grid of SGI to OGGM:

In [None]:
# Glacier name
glacier_name = 'rhone'

# Get SGI ID and RGI shapefile ID safely
try:
    sgi_id, rgi_id, rgi_shp = get_rgi_sgi_ids(cfg, glacier_name)
except KeyError:
    print(f"Error: {glacier_name} not found in rgi_df")
    sgi_id, rgi_id, rgi_shp = '', '', ''

if not sgi_id or not rgi_id or not rgi_shp:
    print(f"Warning: Missing data for {glacier_name}. Skipping...")
else:
    # Load SGI masked dataset
    ds = xr_SGI_masked_topo(glacier_outline_sgi, sgi_id, cfg)
    if ds is None:
        print(
            f"Warning: Failed to load SGI dataset for {glacier_name}. Skipping..."
        )
    else:
        # Load OGGM dataset
        oggm_path = os.path.join(cfg.dataPath, path_OGGM, 'xr_grids',
                                 f'{rgi_id}.zarr')

        try:
            ds_oggm = xr.open_dataset(oggm_path)
        except FileNotFoundError:
            print(
                f"Error: OGGM dataset not found for {glacier_name}. Skipping..."
            )
            ds_oggm = None

        # Calculate SGI resolution
        dx_sgi, dy_sgi = get_res_from_degrees(ds)
        print(f"Cell size of SGI: {dx_sgi:.2f} x {dy_sgi:.2f} meters")

        if ds_oggm is not None:
            # Calculate OGGM resolution
            dx_oggm = abs(ds_oggm.x[1] - ds_oggm.x[0])
            dy_oggm = abs(ds_oggm.y[1] - ds_oggm.y[0])
            print(f"Cell size of OGGM: {dx_oggm:.2f} x {dy_oggm:.2f} meters")

            # Plot the data
            fig, axs = plt.subplots(2, 4, figsize=(15, 8))

            # SGI Data
            ds.masked_aspect.plot(ax=axs[0, 0],
                                  cmap='twilight_shifted',
                                  add_colorbar=False)
            ds.masked_slope.plot(ax=axs[0, 1],
                                 cmap='cividis',
                                 add_colorbar=False)
            ds.masked_elev.plot(ax=axs[0, 2],
                                cmap='terrain',
                                add_colorbar=False)
            ds.glacier_mask.plot(ax=axs[0, 3],
                                 cmap='binary',
                                 add_colorbar=False)

            axs[0, 0].set_title("Aspect SGI")
            axs[0, 1].set_title("Slope SGI")
            axs[0, 2].set_title("DEM SGI")
            axs[0, 3].set_title("Glacier mask SGI")

            # OGGM Data
            if all(var in ds_oggm
                   for var in ['aspect', 'slope', 'topo', 'glacier_mask']):
                ds_oggm.aspect.plot(ax=axs[1, 0],
                                    cmap='twilight_shifted',
                                    add_colorbar=False)
                ds_oggm.slope.plot(ax=axs[1, 1],
                                   cmap='cividis',
                                   add_colorbar=False)
                ds_oggm.topo.plot(ax=axs[1, 2],
                                  cmap='terrain',
                                  add_colorbar=False)
                ds_oggm.glacier_mask.plot(ax=axs[1, 3],
                                          cmap='binary',
                                          add_colorbar=False)

                axs[1, 0].set_title("Aspect OGGM")
                axs[1, 1].set_title("Slope OGGM")
                axs[1, 2].set_title("DEM OGGM")
                axs[1, 3].set_title("Glacier mask OGGM")
            else:
                print(
                    f"Warning: Some OGGM variables are missing in {oggm_path}")

            # Set axis labels
            for ax in axs.flatten():
                ax.set_xlabel("x")
                ax.set_ylabel("y")
                ax.legend().remove()

            # Optimize layout
            plt.tight_layout()
            plt.show()

In [None]:
# Resample SGI grid:
# Coarson to 30 m resolution
ds_resampled = coarsenDS(ds)

# Calculate resolution
dx_m, dy_m = get_res_from_degrees(ds_resampled)
print(f"Cell size of resampled grid: {dx_m:.2f} x {dy_m:.2f} meters")

# Plot resampled grid
fig, axs = plt.subplots(1, 4, figsize=(15, 6))
ds_resampled.masked_aspect.plot(ax=axs[0], cmap='twilight_shifted')
ds_resampled.masked_slope.plot(ax=axs[1], cmap='cividis', add_colorbar=False)
ds_resampled.masked_elev.plot(ax=axs[2], cmap='terrain', add_colorbar=False)
ds_resampled.glacier_mask.plot(ax=axs[3], cmap='binary', add_colorbar=False)

axs[0].set_title("Aspect")
axs[1].set_title("Slope")
axs[2].set_title("DEM")
axs[3].set_title("Glacier mask")
plt.tight_layout()

## Monthly masked grids - dataframes:

In [None]:
# First create the masked topographical arrays per glacier:
glacier_list = sorted(years_start_per_gl.keys())
RUN = False
if RUN:
    create_sgi_topo_masks(cfg,
                          glacier_list,
                          type='glacier_name',
                          path_save=os.path.join(cfg.dataPath, path_SGI_topo,
                                                 'xr_masked_grids/'))

In [None]:
RUN = False
path_xr_grids = os.path.join(cfg.dataPath, path_SGI_topo, 'xr_masked_grids/')
if RUN:
    emptyfolder(cfg.dataPath + path_glacier_grid_sgi)
    for glacier_name in tqdm(years_start_per_gl.keys(),
                             desc="Processing glaciers"):
        folder_path = os.path.join(cfg.dataPath, path_glacier_grid_sgi, glacier_name)
        os.makedirs(folder_path, exist_ok=True)  # Ensure folder exists

        # Get existing processed years
        existing_files = [
            f for f in os.listdir(folder_path)
            if re.search(r'_grid_(\d{4})\.parquet$', f)
        ]
        existing_years = {
            int(re.search(r'_grid_(\d{4})\.parquet$', f).group(1))
            for f in existing_files
        }

        # Get the longest period dynamically for the current glacier
        if glacier_name in years_start_per_gl and glacier_name in years_end_per_gl:
            geodetic_period = (years_start_per_gl[glacier_name][0],
                               years_end_per_gl[glacier_name][-1])
            print('Geodetic period:', int(geodetic_period[0]), '-',
                  int(geodetic_period[1]))
        else:
            print(f"Skipping {glacier_name}: missing start/end years")
            continue

        # Get available .zarr files for this glacier
        nc_files = [f for f in os.listdir(path_xr_grids) if glacier_name in f]
        nc_files.sort()
        print(f"\nProcessing {glacier_name}:")

        if not nc_files:
            print(f"Warning: No DEM found for {glacier_name}. Skipping...")
            continue

        sgi_id, rgi_id, rgi_shp = get_rgi_sgi_ids(cfg, glacier_name)

        for year in tqdm(range(geodetic_period[0], geodetic_period[1] + 1),
                         desc='years',
                         leave=False):
            # print(f"  - Processing year: {year}")

            # Skip glacier if required data is missing
            if not sgi_id or not rgi_id or not rgi_shp:
                print(
                    f"Warning: Missing SGI ID or RGI shapefile for {glacier_name}. Skipping..."
                )
                continue

            # Load SGI masked grid (previously resampled)
            try:
                path_save = os.path.join(cfg.dataPath, path_SGI_topo,
                                         'xr_masked_grids/')
                path = os.path.join(path_save, f"{glacier_name}.zarr")
                ds_coarsened = xr.open_dataset(path)
            except Exception as e:
                print(f"Error loading dataset for {glacier_name}: {e}")
                continue

            # Create glacier grid
            try:
                df_grid = create_glacier_grid_SGI(glacier_name, year, rgi_id,
                                                  ds_coarsened)
                df_grid.reset_index(drop=True, inplace=True)
                dataset_grid = mbm.data_processing.Dataset(
                    cfg=cfg,
                    data=df_grid,
                    region_name='CH',
                    data_path=cfg.dataPath + path_PMB_GLAMOS_csv)
            except Exception as e:
                print(
                    f"Error creating glacier grid for {glacier_name} in {year}: {e}"
                )
                continue

            # Add climate data
            try:
                era5_climate_data = os.path.join(
                    cfg.dataPath, path_ERA5_raw,
                    'era5_monthly_averaged_data.nc')
                geopotential_data = os.path.join(
                    cfg.dataPath, path_ERA5_raw,
                    'era5_geopotential_pressure.nc')
                dataset_grid.get_climate_features(
                    climate_data=era5_climate_data,
                    geopotential_data=geopotential_data,
                    change_units=True,
                    smoothing_vois={
                        'vois_climate': vois_climate,
                        'vois_other': ['ALTITUDE_CLIMATE']
                    })

            except Exception as e:
                print(
                    f"Error adding climate data for {glacier_name} in {year}: {e}"
                )
                continue

            # Add potential clear sky radiation
            try:
                dataset_grid.get_potential_rad(
                    os.path.join(cfg.dataPath, path_pcsr, 'zarr/'))
            except Exception as e:
                print(
                    f"Error adding clear sky radiation for {glacier_name} in {year}: {e}"
                )
                continue

            # Add OGGM topographic data
            # print('  - Adding OGGM data...')
            try:
                df_y_gl = dataset_grid.data
                df_y_gl.rename(columns={'RGIId': 'RGIId_old'}, inplace=True)

                # Add RGI IDs for OGGM data through intersection with shapefiles
                df_y_gl = mbm.data_processing.utils.get_rgi(
                    data=df_y_gl, glacier_outlines=glacier_outline_rgi)

                # Drop points without RGI ID (outside of RGI outlines)
                df_y_gl = df_y_gl.dropna(subset=['RGIId'])

                # Variables of interest
                voi = ["hugonnet_dhdt", "consensus_ice_thickness", "millan_v"]

                df_y_gl = add_OGGM_features(df_y_gl, voi,
                                            cfg.dataPath + path_OGGM)

                # Add GLWD_ID
                # print('  - Adding GLWD ID...')
                df_y_gl['GLWD_ID'] = df_y_gl.apply(
                    lambda x: mbm.data_processing.utils.get_hash(
                        f"{x.GLACIER}_{x.YEAR}"),
                    axis=1)
                df_y_gl['GLWD_ID'] = df_y_gl['GLWD_ID'].astype(str)

                dataset_grid = mbm.data_processing.Dataset(
                    cfg=cfg,
                    data=df_y_gl,
                    region_name='CH',
                    data_path=cfg.dataPath + path_PMB_GLAMOS_csv)
            except Exception as e:
                print(
                    f"Error adding OGGM data for {glacier_name} in {year}: {e}"
                )
                continue

            # Convert to monthly time resolution
            # print('  - Converting to monthly time resolution...')
            try:
                dataset_grid.convert_to_monthly(
                    meta_data_columns=cfg.metaData,
                    vois_climate=vois_climate + ['pcsr'],
                    vois_topographical=voi_topographical)
                assert 'pcsr' in dataset_grid.data.columns, "Missing 'pcsr' column after conversion"
            except Exception as e:
                print(
                    f"Error converting to monthly resolution for {glacier_name} in {year}: {e}"
                )
                continue

            # Rename columns (because slope & aspect not from OGGM)
            df_oggm = dataset_grid.data
            df_oggm.rename(columns={
                'aspect': 'aspect_sgi',
                'slope': 'slope_sgi'
            },
                           inplace=True)

            # Save gridded dataset
            save_path = os.path.join(folder_path,
                                     f"{glacier_name}_grid_{year}.parquet")

            try:
                # dataset_grid.data.to_csv(save_path, index=False)
                df_oggm.to_parquet(save_path,
                                   engine="pyarrow",
                                   compression="snappy")
            except Exception as e:
                print(
                    f"Error saving dataset for {glacier_name} in {year}: {e}")

In [None]:
# Plot all OGGM variables
glacier_name = 'aletsch'
year = 2016

df = pd.read_parquet(
    os.path.join(cfg.dataPath, path_glacier_grid_sgi, f"{glacier_name}/{glacier_name}_grid_{year}.parquet"))
df = df[df.MONTHS == 'sep']
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
voi = [
    't2m', 'tp', 'ALTITUDE_CLIMATE', 'ELEVATION_DIFFERENCE', 'hugonnet_dhdt',
    'consensus_ice_thickness'
]
axs = axs.flatten()
for i, var in enumerate(voi):
    sns.scatterplot(df,
                    x='POINT_LON',
                    y='POINT_LAT',
                    hue=var,
                    s=5,
                    alpha=0.5,
                    palette='twilight_shifted',
                    ax=axs[i])

# Option 2: GLAMOS grids:

For the geodetic MB and gridded MB products computed by GLAMOS, they did not use the SGI grids (from 2015) but their own yearly DEMs. They're not available for all years, but we still compute monthly grids for these available glaciers and years, in order to make the comparison with geodetic MB fairer.

In [None]:
gdirs, rgidf = initialize_oggm_glacier_directories(
    cfg,
    rgi_region="11",
    rgi_version="6",
    base_url=
    "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5_w_data/",
    log_level='WARNING',
    task_list=None,
)
export_oggm_grids(cfg, gdirs)

## Example of one glacier:

In [None]:
# script to convert lv03 to lv95 for Findelen
# glacier_name = 'findelen'
# sgi_id, rgi_id, rgi_shp = get_rgi_sgi_ids(glacier_name)

# folder_path = os.path.join(path_GLAMOS_topo, 'lv03', glacier_name)

# for fileName in os.listdir(folder_path):
#     year = int(fileName.split('_')[1].split('.grid')[0])  # Extract year from filename

#     # Example file
#     metadata, grid_data = load_grid_file(folder_path + '/' + fileName)

#     # Convert to xarray
#     dem_y = convert_to_xarray_geodata(grid_data, metadata)

#     dem_lv95_y = transform_xarray_coords_lv03_to_lv95(dem_y)

#     # save to lv95 folder:
#     filepath = os.path.join(path_GLAMOS_topo, 'lv95', glacier_name, f'gl_{year}_lv95.grid')

#     save_xarray_to_grid(dem_lv95_y, filepath, nodata_value=-9999)

# dem_lv95_y.plot()

In [None]:
glacier_name = 'findelen'
sgi_id, rgi_id, rgi_shp = get_rgi_sgi_ids(cfg, glacier_name)

folder_path = os.path.join(cfg.dataPath, path_GLAMOS_topo, 'lv95',
                           glacier_name)

# Example file
fileName = 'gl_2008_lv95.grid'
metadata, grid_data = load_grid_file(folder_path + '/' + fileName)

# Convert to xarray
dem_y = convert_to_xarray_geodata(grid_data, metadata)

# Transform the coordinates to WGS84
dem_wgs84_y = transform_xarray_coords_lv95_to_wgs84(dem_y)

# Create a mask where 'elevation' is not NaN (1 if not NaN, 0 if NaN)
ds_gl = xr.Dataset({'dem': dem_wgs84_y})
ds_gl["glacier_mask"] = ds_gl["dem"].notnull().astype(np.uint8)

dx = abs(ds_gl.x[1] - ds_gl.x[0]).values
dy = abs(ds_gl.y[1] - ds_gl.y[0]).values
print(f"Cell size of GLAMOS DEM: {dx} x {dy} meters")

# Extract SGI topo and aspect over GLAMOS DEM
ds = xr_GLAMOS_masked_topo(cfg, sgi_id, ds_gl)

# Coarson to 30 m resolution if needed
ds = coarsenDS(ds)
dx_m, dy_m = get_res_from_degrees(ds)
print(f"Coarsened ds resolution: {dx_m} x {dy_m} meters")

# Plot the masked data
fig, axs = plt.subplots(1, 4, figsize=(15, 6))
ds.masked_aspect.plot(ax=axs[0], cmap='twilight_shifted', add_colorbar=False)
ds.masked_slope.plot(ax=axs[1], cmap='cividis', add_colorbar=False)
ds.masked_elev.plot(ax=axs[2], cmap='terrain', add_colorbar=False)
ds.glacier_mask.plot(ax=axs[3], cmap='binary', add_colorbar=False)

axs[0].set_title("Aspect")
axs[1].set_title("Slope")
axs[2].set_title("DEM")
axs[3].set_title("Glacier mask")
plt.tight_layout()

## Yearly masked grids - xarrays:
Save a .zarr xarray per glacier per year (not in monthly format) needed in the MBM later.

In [None]:
# Define save path and ensure it exists
RUN = False

path_xr_grids = os.path.join(cfg.dataPath, path_GLAMOS_topo,
                             'xr_masked_grids/')
glaciers_glamos_dems = os.listdir(
    os.path.join(cfg.dataPath, path_GLAMOS_topo, 'lv95'))

if RUN:
    emptyfolder(path_xr_grids)
    for glacier_name in tqdm(glaciers_glamos_dems, desc="Processing glaciers"):
        print(f"\nProcessing {glacier_name}...")

        # Handle 'clariden' separately due to special ID format
        sgi_id, rgi_id, rgi_shp = get_rgi_sgi_ids(cfg, glacier_name)

        # Skip glacier if required data is missing
        if not sgi_id or not rgi_shp:
            print(
                f"Warning: Missing SGI ID or shapefile for {glacier_name}. Skipping..."
            )
            continue

        # Define glacier folder path
        folder_path = os.path.join(
            cfg.dataPath, path_GLAMOS_topo, 'lv95',
            'stanna' if glacier_name == 'sanktanna' else glacier_name)

        # Check if folder exists
        if not os.path.exists(folder_path):
            print(
                f"Warning: Folder does not exist: {folder_path}. Skipping...")
            continue

        # Regular expression to extract years from filenames
        pattern = re.compile(r'gl_(\d{4})_lv95\.grid')

        # Extract available years from filenames
        years = sorted({
            int(match.group(1))
            for filename in os.listdir(folder_path)
            if (match := pattern.match(filename))
        })

        if not years:
            print(
                f"Warning: No valid year files found in {folder_path}. Skipping..."
            )
            continue

        printed_resolution_normal = False  # Track whether resolution has been printed
        printed_resolution_res = False  # Track whether resolution has been printed

        for i, year in enumerate(years):
            if year < 1951:  # no ERA5 data available before 1951
                continue

            file_name = f'gl_{year}_lv95.grid'
            file_path = os.path.join(folder_path, file_name)

            try:
                # Load grid file
                metadata, grid_data = load_grid_file(file_path)

                # Convert to xarray
                dem_y = convert_to_xarray_geodata(grid_data, metadata)

                # Transform the coordinates to WGS84
                dem_wgs84_y = transform_xarray_coords_lv95_to_wgs84(dem_y)

                # Create a mask where 'elevation' is not NaN (1 if not NaN, 0 if NaN)
                ds_gl = xr.Dataset({'dem': dem_wgs84_y})
                ds_gl["glacier_mask"] = ds_gl["dem"].notnull().astype(np.uint8)

                # Apply GLAMOS masked topo function
                ds = xr_GLAMOS_masked_topo(cfg, sgi_id, ds_gl)

                # Print resolution only once for the first valid year
                if not printed_resolution_normal:
                    dx_m, dy_m = get_res_from_degrees(ds)
                    print(f"ds normal resolution: {dx_m} x {dy_m} meters")
                    printed_resolution_normal = True  # Ensure it doesn't print again

                # For small glaciers, save as is:
                if dx_m > 20:
                    # Coarsen to 50 m resolution
                    ds_resampled = coarsenDS(ds, target_res_m=50)

                    # Save xarray dataset
                    save_path = os.path.join(path_xr_grids,
                                             f"{glacier_name}_{year}.zarr")

                    ds = ds_resampled  # Use the resampled dataset for further processing

                    # Print resolution of resampled data only once
                    if not printed_resolution_res:
                        dx_m, dy_m = get_res_from_degrees(ds_resampled)
                        print(
                            f"ds_resampled resolution: {dx_m} x {dy_m} meters")
                        printed_resolution_res = True  # Ensure it doesn't print again

                # Save xarray dataset
                save_path = os.path.join(path_xr_grids,
                                         f"{glacier_name}_{year}.zarr")
                ds.to_zarr(save_path)

                # plot the masked data
                if year > 2000:
                    fig, axs = plt.subplots(1, 4, figsize=(15, 6))
                    ds.masked_aspect.plot(ax=axs[0],
                                          cmap='twilight_shifted',
                                          add_colorbar=False)
                    ds.masked_slope.plot(ax=axs[1],
                                         cmap='cividis',
                                         add_colorbar=False)
                    ds.masked_elev.plot(ax=axs[2],
                                        cmap='terrain',
                                        add_colorbar=False)
                    ds.glacier_mask.plot(ax=axs[3],
                                         cmap='binary',
                                         add_colorbar=False)
                    axs[0].set_title("Aspect")
                    axs[1].set_title("Slope")
                    axs[2].set_title("DEM")
                    axs[3].set_title("Glacier mask")

                    # save the figure
                    fig_save_path = os.path.join(cfg.dataPath, 'figures', 'topography',
                                                 glacier_name,
                                                 f"{glacier_name}_{year}.png")
                    os.makedirs(os.path.dirname(fig_save_path), exist_ok=True)
                    plt.savefig(fig_save_path, dpi=300)

                    plt.close()

            except Exception as e:
                print(f"Error processing {glacier_name} in {year}: {e}")

In [None]:
# Plot the masked data
ds = xr.open_dataset(path_xr_grids + 'plainemorte_2010.zarr')
fig, axs = plt.subplots(1, 4, figsize=(15, 6))
ds.masked_aspect.plot(ax=axs[0], cmap='twilight_shifted', add_colorbar=False)
ds.masked_slope.plot(ax=axs[1], cmap='cividis', add_colorbar=False)
ds.masked_elev.plot(ax=axs[2], cmap='terrain', add_colorbar=False)
ds.glacier_mask.plot(ax=axs[3], cmap='binary', add_colorbar=False)

axs[0].set_title("Aspect")
axs[1].set_title("Slope")
axs[2].set_title("DEM")
axs[3].set_title("Glacier mask")
plt.tight_layout()

## Monthly masked grids - dataframes:

In [None]:
too_small_glaciers = ['vorab', 'blauschnee', 'joeri']

ONLY_GEODETIC_YEARS = True

RUN = True
if RUN:
    os.makedirs(cfg.dataPath + path_glacier_grid_glamos,
                exist_ok=True)  # Ensure folder exists
    #emptyfolder(cfg.dataPath + path_glacier_grid_glamos)

    # for glacier_name in tqdm(years_start_per_gl.keys(),
    #                          desc="Processing glaciers"):
    for glacier_name in ['clariden']:
        if glacier_name in too_small_glaciers:
            print(
                f"Skipping {glacier_name}: too small glacier, no aspect & slope"
            )
            continue

        # Get available .zarr files for this glacier
        nc_files = [f for f in os.listdir(path_xr_grids) if glacier_name in f]
        nc_files.sort()

        print(f"\nProcessing {glacier_name}: {len(nc_files)} files found")

        if not nc_files:
            print(
                f"Warning: No GLAMOS DEM found for {glacier_name}. Skipping..."
            )
            continue

        folder_path = os.path.join(cfg.dataPath, path_glacier_grid_glamos,
                                   glacier_name)
        os.makedirs(folder_path, exist_ok=True)  # Ensure folder exists

        # Get existing processed years
        existing_files = [
            f for f in os.listdir(folder_path)
            if re.search(r'_grid_(\d{4})\.parquet$', f)
        ]
        existing_years = {
            int(re.search(r'_grid_(\d{4})\.parquet$', f).group(1))
            for f in existing_files
        }

        # Get the longest period dynamically for the current glacier
        if glacier_name in years_start_per_gl and glacier_name in years_end_per_gl:
            geodetic_period = (years_start_per_gl[glacier_name][0],
                               years_end_per_gl[glacier_name][-1])
            print('Geodetic period:', int(geodetic_period[0]), '-',
                  int(geodetic_period[1]))
        else:
            print(f"Skipping {glacier_name}: missing start/end years")
            continue

        # Identify missing years
        missing_years = []
        for fileName in nc_files:
            match = re.search(r'_(\d{4})\.zarr$', fileName)
            if match:
                year = int(match.group(1))
                if ONLY_GEODETIC_YEARS:
                    if year >= 1951 and year not in existing_years and year in range(
                            geodetic_period[0], geodetic_period[1] + 1):
                        missing_years.append((year, fileName))
                else:
                    if year >= 1951:
                        missing_years.append((year, fileName))

        if not missing_years:
            print(
                f"All years processed for {glacier_name} or no overlap with geodetic period. Skipping..."
            )
            continue
        else:
            print(
                f"Years to process for {glacier_name}: {[y[0] for y in missing_years]}"
            )

        for year, fileName in tqdm(missing_years,
                                   desc="Processing missing years",
                                   leave=False):
            fileName = f"{glacier_name}_{year}.zarr"
            try:
                # Load GLAMOS masked grid
                file_path = os.path.join(path_xr_grids, fileName)
                ds = xr.open_dataset(file_path)

                dx_m, dy_m = get_res_from_degrees(ds)
                # print(f"masked grid resolution: {dx_m} x {dy_m} meters")

                # Handle 'clariden' separately due to its unique ID format
                sgi_id, rgi_id, rgi_shp = get_rgi_sgi_ids(cfg, glacier_name)

                # Skip glacier if required data is missing
                if not sgi_id or not rgi_id or not rgi_shp:
                    print(
                        f"Warning: Missing SGI ID or RGI shapefile for {glacier_name}. Skipping..."
                    )
                    continue

                # Create glacier grid
                df_grid = create_glacier_grid_SGI(glacier_name, year, rgi_id,
                                                  ds)
                df_grid.reset_index(drop=True, inplace=True)
                dataset_grid = mbm.data_processing.Dataset(
                    cfg=cfg,
                    data=df_grid,
                    region_name='CH',
                    data_path=cfg.dataPath + path_PMB_GLAMOS_csv)

                # Add climate data
                era5_climate_data = os.path.join(
                    cfg.dataPath, path_ERA5_raw, 'era5_monthly_averaged_data.nc')
                geopotential_data = os.path.join(
                    cfg.dataPath, path_ERA5_raw, 'era5_geopotential_pressure.nc')
                dataset_grid.get_climate_features(
                    climate_data=era5_climate_data,
                    geopotential_data=geopotential_data,
                    change_units=True,
                    smoothing_vois={
                        'vois_climate': vois_climate,
                        'vois_other': ['ALTITUDE_CLIMATE']
                    })

                # Add potential clear sky radiation
                dataset_grid.get_potential_rad(
                    os.path.join(cfg.dataPath, path_pcsr, 'zarr/'))

                # Process OGGM data
                df_y_gl = dataset_grid.data
                df_y_gl.rename(columns={'RGIId': 'RGIId_old'}, inplace=True)

                # Add RGI IDs through intersection with shapefiles
                df_y_gl = mbm.data_processing.utils.get_rgi(
                    data=df_y_gl, glacier_outlines=glacier_outline_rgi)

                # Drop points without RGI ID
                df_y_gl = df_y_gl.dropna(subset=['RGIId'])

                # Add OGGM features
                voi = ["hugonnet_dhdt", "consensus_ice_thickness", "millan_v"]
                df_y_gl = add_OGGM_features(df_y_gl, voi,
                                            cfg.dataPath + path_OGGM)

                # Add GLWD_ID
                df_y_gl['GLWD_ID'] = df_y_gl.apply(
                    lambda x: mbm.data_processing.utils.get_hash(
                        f"{x.GLACIER}_{x.YEAR}"),
                    axis=1)
                df_y_gl['GLWD_ID'] = df_y_gl['GLWD_ID'].astype(str)

                dataset_grid_oggm = mbm.data_processing.Dataset(
                    cfg=cfg,
                    data=df_y_gl,
                    region_name='CH',
                    data_path=cfg.dataPath + path_PMB_GLAMOS_csv)

                # Convert to monthly time resolution
                dataset_grid_oggm.convert_to_monthly(
                    meta_data_columns=cfg.metaData,
                    vois_climate=vois_climate + ['pcsr'],
                    vois_topographical=voi_topographical)

                assert 'pcsr' in dataset_grid_oggm.data.columns, "Missing 'pcsr' column after conversion"

                # Rename columns
                df_oggm = dataset_grid_oggm.data
                df_oggm.rename(columns={
                    'aspect': 'aspect_sgi',
                    'slope': 'slope_sgi'
                },
                               inplace=True)

                assert 'POINT_ELEVATION' in df_oggm.columns, "Missing 'POINT_ELEVATION' column in the final DataFrame"

                # Save gridded dataset
                save_path = os.path.join(
                    folder_path, f"{glacier_name}_grid_{year}.parquet")
                df_oggm.to_parquet(save_path,
                                   engine="pyarrow",
                                   compression="snappy")
                print(f"Saved: {save_path}")

            except Exception as e:
                print(f"Error processing {glacier_name} ({year}): {e}")

##### Example:

In [None]:
# Load GLAMOS masked grid
glacier_name = 'adler'
year = 2018
fileName = '{glacier_name}_{year}.nc'

folder_path = os.path.join(cfg.dataPath, path_glacier_grid_glamos,
                           glacier_name)
# load the dataset
df = pd.read_parquet(
    os.path.join(folder_path, f"{glacier_name}_grid_{year}.parquet"))

# Variables of interest
voi = [
    "aspect_sgi",
    "slope_sgi",
]
fig, axs = plt.subplots(2, 4, figsize=(15, 10))
voi = [
    't2m', 'tp', 'ALTITUDE_CLIMATE', 'ELEVATION_DIFFERENCE', 'hugonnet_dhdt',
    'consensus_ice_thickness', 'aspect_sgi', 'slope_sgi'
]
axs = axs.flatten()
for i, var in enumerate(voi):
    sns.scatterplot(df,
                    x='POINT_LON',
                    y='POINT_LAT',
                    hue=var,
                    s=5,
                    alpha=0.5,
                    palette='twilight_shifted',
                    ax=axs[i])