# Glacier grids from RGI:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the RGI grid and use OGGM topography. Computing takes a long time because of the conversion to monthly format.

## Setting up:

In [9]:
import pandas as pd
import os
import warnings
from tqdm.notebook import tqdm
import re
import massbalancemachine as mbm
import geopandas as gpd
import matplotlib.pyplot as plt
from cmcrameri import cm
from oggm import utils, workflow
from oggm import cfg as oggmCfg
import geopandas as gpd
import geopandas as gpd
import traceback

# scripts
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *
from scripts.geodata import *
from scripts.xgb_helpers import *
from scripts.config_CH import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

cfg = mbm.SwitzerlandConfig()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
seed_all(cfg.seed)
free_up_cuda()  # in case no memory

# Plot styles:
path_style_sheet = 'scripts/example.mplstyle'
plt.style.use(path_style_sheet)

# Climate columns
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]
# Topographical columns
voi_topographical = [
    "aspect",
    "slope",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
    "topo",
]


### Read PMB data:

In [11]:
# RGI Ids:
# Read glacier ids:
rgi_df = pd.read_csv(path_glacier_ids, sep=',')
rgi_df.rename(columns=lambda x: x.strip(), inplace=True)
rgi_df.sort_values(by='short_name', inplace=True)
rgi_df.set_index('short_name', inplace=True)
rgi_df.head(2)

Unnamed: 0_level_0,full_name,sgi-id,rgi_id_v6_2016_shp,rgi_id.v6,rgi_id.v7,Issue
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
adler,Adler,B56-14,RGI60-11.B56-14,RGI60-11.02764,RGI2000-v7.0-G-11-01075,False
albigna,Albigna,C84-16,RGI60-11.C84-16,RGI60-11.02285,RGI2000-v7.0-G-11-02309,True


In [12]:
# PMB data:
data_glamos = pd.read_csv(path_PMB_GLAMOS_csv + 'CH_wgms_dataset_all.csv')

# Glaciers with data of potential clear sky radiation
# Format to same names as stakes:
glDirect = np.sort([
    re.search(r'xr_direct_(.*?)\.nc', f).group(1)
    for f in os.listdir(path_pcsr + 'csv/')
])

geodetic_mb = get_geodetic_MB()

# filter to glaciers with potential clear sky radiation data
geodetic_mb = geodetic_mb[geodetic_mb.glacier_name.isin(glDirect)]

# get years per glacier
years_start_per_gl = geodetic_mb.groupby(
    'glacier_name')['Astart'].unique().apply(list).to_dict()
years_end_per_gl = geodetic_mb.groupby('glacier_name')['Aend'].unique().apply(
    list).to_dict()

periods_per_glacier = defaultdict(list)
geoMB_per_glacier = defaultdict(list)

# Iterate through the DataFrame rows
for _, row in geodetic_mb.iterrows():
    glacier_name = row['glacier_name']
    start_year = row['Astart']
    end_year = row['Aend']
    geoMB = row['Bgeod']

    # Append the (start, end) tuple to the glacier's list
    # Only if period is longer than 5 years
    if end_year - start_year >= 5:
        periods_per_glacier[glacier_name].append((start_year, end_year))
        geoMB_per_glacier[glacier_name].append(geoMB)

# sort by glacier_list
periods_per_glacier = dict(sorted(periods_per_glacier.items()))
geoMB_per_glacier = dict(sorted(geoMB_per_glacier.items()))

glacier_list = [f for f in list(periods_per_glacier.keys())]

# Sort glaciers by area
gl_area = get_gl_area()
gl_area['clariden'] = gl_area['claridenL']

# Sort the lists by area if available in gl_area
def sort_by_area(glacier_list, gl_area):
    return sorted(glacier_list, key=lambda g: gl_area.get(g, 0), reverse=False)

glacier_list = sort_by_area(glacier_list, gl_area)
# print len and list
print('Number of glaciers:', len(glacier_list))
print('Glaciers:', glacier_list)

Number of glaciers: 30
Glaciers: ['schwarzbach', 'taelliboden', 'sanktanna', 'corvatsch', 'sexrouge', 'murtel', 'plattalva', 'tortin', 'basodino', 'limmern', 'adler', 'hohlaub', 'albigna', 'tsanfleuron', 'silvretta', 'oberaar', 'gries', 'clariden', 'gietro', 'schwarzberg', 'forno', 'plainemorte', 'allalin', 'otemma', 'findelen', 'rhone', 'morteratsch', 'corbassiere', 'gorner', 'aletsch']


In [13]:
# Check which rgis are in the OGGM directory:
oggmCfg.initialize(logging_level="WARNING")
oggmCfg.PARAMS["border"] = 10
oggmCfg.PARAMS["use_multiprocessing"] = True
oggmCfg.PARAMS["continue_on_error"] = True
custom_working_dir = '../../../data/OGGM/'
oggmCfg.PATHS["working_dir"] = custom_working_dir

# Intersect dataframe with list of available glaciers in GLAMOS
# to reduce computation load in OGGM
rgidf = gpd.read_file(utils.get_rgi_region_file(region="11", version="6"))
rgidf = rgidf.loc[rgidf['RGIId'].isin(data_glamos.RGIId.unique())]

# We use the directories with the shop data in it: "W5E5_w_data"
base_url = "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5_w_data/"
gdirs = workflow.init_glacier_directories(
    rgidf,
    from_prepro_level=3,
    prepro_base_url=base_url,
    prepro_border=10,
    reset=True,
    force=True,
)
rgis = list(
    set(data_glamos.RGIId.unique()) & set(gdir.rgi_id for gdir in gdirs))
print('Number of rgis:', len(rgis))

2025-04-04 07:36:16: oggm.cfg: Reading default parameters from the OGGM `params.cfg` configuration file.
2025-04-04 07:36:16: oggm.cfg: Multiprocessing switched OFF according to the parameter file.
2025-04-04 07:36:16: oggm.cfg: Multiprocessing: using all available processors (N=32)
2025-04-04 07:36:16: oggm.cfg: PARAMS['border'] changed from `80` to `10`.
2025-04-04 07:36:16: oggm.cfg: Multiprocessing switched ON after user settings.
2025-04-04 07:36:16: oggm.cfg: PARAMS['continue_on_error'] changed from `False` to `True`.
2025-04-04 07:36:17: oggm.workflow: init_glacier_directories from prepro level 3 on 32 glaciers.
2025-04-04 07:36:17: oggm.workflow: Execute entity tasks [gdir_from_prepro] on 32 glaciers


Number of rgis: 32


## Compute glacier grids:
Add topo, climate variables and convert to monthly (takes a long time).

#### Only for years with geodetic MB:

In [None]:
# Set this flag to enable/disable the script execution
RUN = True

# Manual RGI ID overrides
RGI_OVERRIDES = {'morteratsch': 'RGI60-11.01946', 'pers': 'RGI60-11.01946'}

if RUN:
    try:
        emptyfolder(path_glacier_grid_rgi)
    except Exception as e:
        print(f"Error clearing folder '{path_glacier_grid_rgi}': {e}")

    for glacier_name in tqdm(glacier_list, desc='Processing glaciers'):
        try:
            folder_path = os.path.join(path_glacier_grid_rgi, glacier_name)
            os.makedirs(folder_path, exist_ok=True)  # Ensure folder exists
            print(f'\n{"-" * 35}\nProcessing: {glacier_name}')

            # Retrieve RGI ID with manual overrides if applicable
            rgi_gl = RGI_OVERRIDES.get(glacier_name, rgi_df.at[glacier_name,
                                                               'rgi_id.v6'])

            # Load stake data for the glacier
            data_gl = data_glamos[data_glamos.RGIId == rgi_gl]
            if data_gl.empty:
                raise ValueError(
                    f"No stake data found for glacier '{glacier_name}' (RGI ID: {rgi_gl})"
                )

            dataset_gl = mbm.Dataset(cfg=cfg,
                                     data=data_gl,
                                     region_name='CH',
                                     data_path=path_PMB_GLAMOS_csv)

            # Create gridded glacier dataset from OGGM
            df_grid = dataset_gl.create_glacier_grid_RGI(custom_working_dir)
            if df_grid.empty:
                raise ValueError(
                    f"Failed to generate gridded dataset for glacier '{glacier_name}'"
                )

            df_grid["GLACIER"] = glacier_name
            df_grid.reset_index(drop=True, inplace=True)

            dataset_grid = mbm.Dataset(cfg=cfg,
                                       data=df_grid,
                                       region_name='CH',
                                       data_path=path_PMB_GLAMOS_csv)

            # Paths to climate data
            era5_climate_data = os.path.join(path_ERA5_raw,
                                             'era5_monthly_averaged_data.nc')
            geopotential_data = os.path.join(path_ERA5_raw,
                                             'era5_geopotential_pressure.nc')
            
            # Add climate data
            print('Adding climate data...')
            dataset_grid.get_climate_features(
                climate_data=era5_climate_data,
                geopotential_data=geopotential_data,
                change_units=True)

            # Add potential clear sky radiation
            print('Adding potential clear sky radiation...')
            dataset_grid.get_potential_rad(os.path.join(path_pcsr, 'csv/'))

            
            # Get longest geodetic period for that glacier
            # Get the longest period dynamically for the current glacier
            if glacier_name in years_start_per_gl and glacier_name in years_end_per_gl:
                longest_period = (years_start_per_gl[glacier_name][0],
                                years_end_per_gl[glacier_name][-1])
            else:
                print(f"Skipping {glacier_name}: missing start/end years")
                continue

            # Process each year separately
            for year in range(longest_period[0], longest_period[1] + 1):
                try:
                    print(
                        f'Converting to monthly time resolution for {year}...')
                    df_grid_y = dataset_grid.data[dataset_grid.data.YEAR ==
                                                  year].copy()
                    # Add GLWD_ID
                    df_grid_y['GLWD_ID'] = df_grid_y.apply(
                        lambda x: get_hash(f"{x.GLACIER}_{x.YEAR}"), axis=1)
                    df_grid_y['GLWD_ID'] = df_grid_y['GLWD_ID'].astype(str)

                    dataset_grid_oggm = mbm.Dataset(cfg=cfg,
                                                    data=df_grid_y,
                                                    region_name='CH',
                                                    data_path=path_PMB_GLAMOS_csv)
                    
                    dataset_grid_yearly = mbm.Dataset(
                        cfg=cfg,
                        data=df_grid_y,
                        region_name='CH',
                        data_path=path_PMB_GLAMOS_csv)
                    
                    # Convert to monthly time resolution
                    dataset_grid_yearly.convert_to_monthly(
                        meta_data_columns=cfg.metaData,
                        vois_climate=vois_climate + ['pcsr'],
                        vois_topographical=voi_topographical,
                    )

                    # Ensure 'pcsr' column exists before saving
                    if 'pcsr' not in dataset_grid_yearly.data.columns:
                        raise ValueError(
                            f"'pcsr' column not found in dataset for glacier '{glacier_name}' in year {year}"
                        )

                    # Save the dataset for the specific year
                    save_path = os.path.join(
                        folder_path, f"{glacier_name}_grid_{year}.parquet")
                    print(f'Saving gridded dataset to: {save_path}')
                    dataset_grid_yearly.data.to_parquet(save_path,
                                                        engine="pyarrow",
                                                        compression="snappy")

                except Exception as year_error:
                    print(
                        f"⚠️ Error processing glacier '{glacier_name}' for year {year}: {year_error}"
                    )
                    traceback.print_exc()
                    continue  # Continue with the next year

        except Exception as glacier_error:
            print(
                f"Error processing glacier '{glacier_name}': {glacier_error}")
            traceback.print_exc()
            continue  # Continue processing the next glacier

Processing glaciers:   0%|          | 0/30 [00:00<?, ?it/s]

2025-04-04 07:36:18: oggm.cfg: Reading default parameters from the OGGM `params.cfg` configuration file.
2025-04-04 07:36:18: oggm.cfg: Multiprocessing switched OFF according to the parameter file.
2025-04-04 07:36:18: oggm.cfg: Multiprocessing: using all available processors (N=32)
2025-04-04 07:36:18: oggm.cfg: PARAMS['border'] changed from `80` to `10`.
2025-04-04 07:36:18: oggm.cfg: Multiprocessing switched ON after user settings.
2025-04-04 07:36:18: oggm.cfg: PARAMS['continue_on_error'] changed from `False` to `True`.



-----------------------------------
Processing: schwarzbach


2025-04-04 07:36:18: oggm.workflow: init_glacier_directories from prepro level 3 on 1 glaciers.
2025-04-04 07:36:18: oggm.workflow: Execute entity tasks [gdir_from_prepro] on 1 glaciers
2025-04-04 07:36:18: oggm.workflow: Execute entity tasks [gridded_attributes] on 1 glaciers


Adding climate data...
Adding potential clear sky radiation...
Converting to monthly time resolution for 2010...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/schwarzbach/schwarzbach_grid_2010.parquet
Converting to monthly time resolution for 2011...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/schwarzbach/schwarzbach_grid_2011.parquet
Converting to monthly time resolution for 2012...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/schwarzbach/schwarzbach_grid_2012.parquet
Converting to monthly time resolution for 2013...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/schwarzbach/schwarzbach_grid_2013.parquet
Converting to monthly time resolution for 2014...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/schwarzbach/schwarzbach_grid_2014.parquet
Converting to monthly time resolution for 2015...
Saving gridded da

2025-04-04 07:37:02: oggm.cfg: Reading default parameters from the OGGM `params.cfg` configuration file.
2025-04-04 07:37:02: oggm.cfg: Multiprocessing switched OFF according to the parameter file.
2025-04-04 07:37:02: oggm.cfg: Multiprocessing: using all available processors (N=32)
2025-04-04 07:37:02: oggm.cfg: PARAMS['border'] changed from `80` to `10`.
2025-04-04 07:37:02: oggm.cfg: Multiprocessing switched ON after user settings.
2025-04-04 07:37:02: oggm.cfg: PARAMS['continue_on_error'] changed from `False` to `True`.
2025-04-04 07:37:02: oggm.workflow: init_glacier_directories from prepro level 3 on 1 glaciers.
2025-04-04 07:37:02: oggm.workflow: Execute entity tasks [gdir_from_prepro] on 1 glaciers


Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/schwarzbach/schwarzbach_grid_2021.parquet

-----------------------------------
Processing: taelliboden


2025-04-04 07:37:03: oggm.workflow: Execute entity tasks [gridded_attributes] on 1 glaciers


Adding climate data...
Adding potential clear sky radiation...
Converting to monthly time resolution for 2015...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/taelliboden/taelliboden_grid_2015.parquet
Converting to monthly time resolution for 2016...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/taelliboden/taelliboden_grid_2016.parquet
Converting to monthly time resolution for 2017...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/taelliboden/taelliboden_grid_2017.parquet
Converting to monthly time resolution for 2018...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/taelliboden/taelliboden_grid_2018.parquet
Converting to monthly time resolution for 2019...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/taelliboden/taelliboden_grid_2019.parquet
Converting to monthly time resolution for 2020...
Saving gridded da

2025-04-04 07:37:35: oggm.cfg: Reading default parameters from the OGGM `params.cfg` configuration file.
2025-04-04 07:37:35: oggm.cfg: Multiprocessing switched OFF according to the parameter file.
2025-04-04 07:37:35: oggm.cfg: Multiprocessing: using all available processors (N=32)
2025-04-04 07:37:35: oggm.cfg: PARAMS['border'] changed from `80` to `10`.
2025-04-04 07:37:35: oggm.cfg: Multiprocessing switched ON after user settings.
2025-04-04 07:37:35: oggm.cfg: PARAMS['continue_on_error'] changed from `False` to `True`.
2025-04-04 07:37:35: oggm.workflow: init_glacier_directories from prepro level 3 on 1 glaciers.
2025-04-04 07:37:35: oggm.workflow: Execute entity tasks [gdir_from_prepro] on 1 glaciers


Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/taelliboden/taelliboden_grid_2023.parquet

-----------------------------------
Processing: sanktanna


2025-04-04 07:37:36: oggm.workflow: Execute entity tasks [gridded_attributes] on 1 glaciers


Adding climate data...
Adding potential clear sky radiation...
Converting to monthly time resolution for 2010...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/sanktanna/sanktanna_grid_2010.parquet
Converting to monthly time resolution for 2011...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/sanktanna/sanktanna_grid_2011.parquet
Converting to monthly time resolution for 2012...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/sanktanna/sanktanna_grid_2012.parquet
Converting to monthly time resolution for 2013...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/sanktanna/sanktanna_grid_2013.parquet
Converting to monthly time resolution for 2014...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/sanktanna/sanktanna_grid_2014.parquet
Converting to monthly time resolution for 2015...
Saving gridded dataset to: ../../../d

2025-04-04 07:38:49: oggm.cfg: Reading default parameters from the OGGM `params.cfg` configuration file.
2025-04-04 07:38:49: oggm.cfg: Multiprocessing switched OFF according to the parameter file.
2025-04-04 07:38:49: oggm.cfg: Multiprocessing: using all available processors (N=32)
2025-04-04 07:38:49: oggm.cfg: PARAMS['border'] changed from `80` to `10`.
2025-04-04 07:38:49: oggm.cfg: Multiprocessing switched ON after user settings.
2025-04-04 07:38:49: oggm.cfg: PARAMS['continue_on_error'] changed from `False` to `True`.
2025-04-04 07:38:49: oggm.workflow: init_glacier_directories from prepro level 3 on 1 glaciers.
2025-04-04 07:38:49: oggm.workflow: Execute entity tasks [gdir_from_prepro] on 1 glaciers


Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/sanktanna/sanktanna_grid_2021.parquet

-----------------------------------
Processing: corvatsch


2025-04-04 07:38:49: oggm.workflow: Execute entity tasks [gridded_attributes] on 1 glaciers


Adding climate data...
Adding potential clear sky radiation...
Converting to monthly time resolution for 2015...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/corvatsch/corvatsch_grid_2015.parquet
Converting to monthly time resolution for 2016...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/corvatsch/corvatsch_grid_2016.parquet
Converting to monthly time resolution for 2017...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/corvatsch/corvatsch_grid_2017.parquet
Converting to monthly time resolution for 2018...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/corvatsch/corvatsch_grid_2018.parquet
Converting to monthly time resolution for 2019...
Saving gridded dataset to: ../../../data/GLAMOS/topo/gridded_topo_inputs/RGI_grid/corvatsch/corvatsch_grid_2019.parquet
Converting to monthly time resolution for 2020...
Saving gridded dataset to: ../../../d

## Check grids:

In [None]:
glacier_name = 'rhone'
rgi_gl = RGI_OVERRIDES.get(glacier_name, rgi_df.at[glacier_name, 'rgi_id.v6'])

# Load stake data for that glacier
data_gl = data_glamos[data_glamos.RGIId == rgi_gl]
dataset_gl = mbm.Dataset(cfg=cfg,
                         data=data_gl,
                         region_name='CH',
                         data_path=path_PMB_GLAMOS_csv)

ds, glacier_indices, gdir = dataset_gl.get_glacier_mask(custom_working_dir)
# Plot glacier attributes of oggm:
plotGlAttr(ds, cmap=cm.devon)