# Glacier grids from RGI:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the RGI grid and use OGGM topography. Computing takes a long time because of the conversion to monthly format.

## Setting up:

In [None]:
import os, sys
sys.path.append(os.path.join(os.getcwd(), '../../')) # Add root of repo to import MBM

import pandas as pd
import warnings
from tqdm.notebook import tqdm
import re
import massbalancemachine as mbm
import geopandas as gpd
import matplotlib.pyplot as plt
import geopandas as gpd
import geopandas as gpd

# scripts
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *
from scripts.geodata import *
from scripts.xgb_helpers import *
from scripts.config_CH import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

cfg = mbm.SwitzerlandConfig()

In [None]:
seed_all(cfg.seed)
free_up_cuda()  # in case no memory

# Plot styles:
path_style_sheet = 'scripts/example.mplstyle'
plt.style.use(path_style_sheet)

# Climate columns
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]
# Topographical columns
voi_topographical = [
    "aspect",
    "slope",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
    "topo",
]

### Read PMB data:

In [None]:
geodetic_mb = get_geodetic_MB(cfg)
data_glamos = getStakesData(cfg)

# get years per glacier
years_start_per_gl = geodetic_mb.groupby(
    'glacier_name')['Astart'].unique().apply(list).to_dict()
years_end_per_gl = geodetic_mb.groupby('glacier_name')['Aend'].unique().apply(
    list).to_dict()

periods_per_glacier, geoMB_per_glacier = build_periods_per_glacier(geodetic_mb)

# Sort glaciers by area
gl_area = get_gl_area(cfg)
gl_area['clariden'] = gl_area['claridenL']

glacier_outline_rgi = gpd.read_file(cfg.dataPath + path_rgi_outlines)

glacier_list = [f for f in list(periods_per_glacier.keys())]

# Sort the lists by area if available in gl_area
def sort_by_area(glacier_list, gl_area):
    return sorted(glacier_list, key=lambda g: gl_area.get(g, 0), reverse=False)


glacier_list = sort_by_area(glacier_list, gl_area)
# print len and list
print('Number of glaciers:', len(glacier_list))
print('Glaciers:', glacier_list)

In [None]:
# RGI Ids:
# Read glacier ids:
rgi_df = pd.read_csv(cfg.dataPath+path_glacier_ids, sep=',')
rgi_df.rename(columns=lambda x: x.strip(), inplace=True)
rgi_df.sort_values(by='short_name', inplace=True)
rgi_df.set_index('short_name', inplace=True)
rgi_df.head(2)

In [None]:
gdirs, rgidf = initialize_oggm_glacier_directories(
    cfg,
    rgi_region="11",
    rgi_version="6",
    base_url=
    "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5_w_data/",
    log_level='WARNING',
    task_list=None,
)
export_oggm_grids(cfg, gdirs)

## Compute glacier grids:
Add topo, climate variables and convert to monthly.

#### Only for years with geodetic MB:

In [None]:
import traceback
# Set this flag to enable/disable the script execution
RUN = True

# Manual RGI ID overrides
RGI_OVERRIDES = {'morteratsch': 'RGI60-11.01946', 'pers': 'RGI60-11.01946'}

if RUN:
    try:
        emptyfolder(cfg.dataPath+path_glacier_grid_rgi)
    except Exception as e:
        print(f"Error clearing folder '{cfg.dataPath+path_glacier_grid_rgi}': {e}")

    for glacier_name in tqdm(glacier_list, desc='Processing glaciers'):
        try:
            folder_path = os.path.join(cfg.dataPath, path_glacier_grid_rgi, glacier_name)
            os.makedirs(folder_path, exist_ok=True)  # Ensure folder exists
            print(f'\n{"-" * 35}\nProcessing: {glacier_name}')

            # Retrieve RGI ID with manual overrides if applicable
            if glacier_name == 'clariden':
                rgi_id_v6 = rgi_df.at['claridenU', 'rgi_id.v6']
            else:
                rgi_id_v6 = rgi_df.at[glacier_name, 'rgi_id.v6']
            rgi_gl = RGI_OVERRIDES.get(glacier_name, rgi_id_v6)

            # Load stake data for the glacier
            data_gl = data_glamos[data_glamos.RGIId == rgi_gl]
            if data_gl.empty:
                raise ValueError(
                    f"No stake data found for glacier '{glacier_name}' (RGI ID: {rgi_gl})"
                )

            dataset_gl = mbm.data_processing.Dataset(cfg=cfg,
                                     data=data_gl,
                                     region_name='CH',
                                     region_id=11,
                                     data_path=cfg.dataPath+path_PMB_GLAMOS_csv)

            # Create gridded glacier dataset from OGGM
            df_grid = dataset_gl.create_glacier_grid_RGI(cfg.dataPath+path_OGGM)
            if df_grid.empty:
                raise ValueError(
                    f"Failed to generate gridded dataset for glacier '{glacier_name}'"
                )

            df_grid["GLACIER"] = glacier_name
            df_grid.reset_index(drop=True, inplace=True)

            dataset_grid = mbm.data_processing.Dataset(cfg=cfg,
                                       data=df_grid,
                                       region_name='CH',
                                       region_id=11,
                                       data_path=cfg.dataPath+path_PMB_GLAMOS_csv)

            # Paths to climate data
            era5_climate_data = os.path.join(cfg.dataPath, path_ERA5_raw,
                                             'era5_monthly_averaged_data.nc')
            geopotential_data = os.path.join(cfg.dataPath, path_ERA5_raw,
                                             'era5_geopotential_pressure.nc')

            # Add climate data
            print('Adding climate data...')
            dataset_grid.get_climate_features(
                climate_data=era5_climate_data,
                geopotential_data=geopotential_data,
                change_units=True,
                smoothing_vois={
                    'vois_climate': vois_climate,
                    'vois_other': ['ALTITUDE_CLIMATE']
                })

            # Add potential clear sky radiation
            print('Adding potential clear sky radiation...')
            dataset_grid.get_potential_rad(os.path.join(cfg.dataPath, path_pcsr, 'zarr/'))

            # Get longest geodetic period for that glacier
            # Get the longest period dynamically for the current glacier
            if glacier_name in years_start_per_gl and glacier_name in years_end_per_gl:
                longest_period = (years_start_per_gl[glacier_name][0],
                                  years_end_per_gl[glacier_name][-1])
            else:
                print(f"Skipping {glacier_name}: missing start/end years")
                continue

            # Process each year separately
            for year in range(longest_period[0], longest_period[1] + 1):
                try:
                    print(
                        f'Converting to monthly time resolution for {year}...')
                    df_grid_y = dataset_grid.data[dataset_grid.data.YEAR ==
                                                  year].copy()
                    # Add GLWD_ID
                    df_grid_y['GLWD_ID'] = df_grid_y.apply(
                        lambda x: mbm.data_processing.utils.get_hash(f"{x.GLACIER}_{x.YEAR}"), axis=1)
                    df_grid_y['GLWD_ID'] = df_grid_y['GLWD_ID'].astype(str)

                    dataset_grid_oggm = mbm.data_processing.Dataset(
                        cfg=cfg,
                        data=df_grid_y,
                        region_name='CH',
                        region_id=11,
                        data_path=cfg.dataPath+path_PMB_GLAMOS_csv)

                    dataset_grid_yearly = mbm.data_processing.Dataset(
                        cfg=cfg,
                        data=df_grid_y,
                        region_name='CH',
                        region_id=11,
                        data_path=cfg.dataPath+path_PMB_GLAMOS_csv)

                    # Convert to monthly time resolution
                    dataset_grid_yearly.convert_to_monthly(
                        meta_data_columns=cfg.metaData,
                        vois_climate=vois_climate + ['pcsr'],
                        vois_topographical=voi_topographical,
                    )

                    # Ensure 'pcsr' column exists before saving
                    if 'pcsr' not in dataset_grid_yearly.data.columns:
                        raise ValueError(
                            f"'pcsr' column not found in dataset for glacier '{glacier_name}' in year {year}"
                        )

                    # Save the dataset for the specific year
                    save_path = os.path.join(
                        folder_path, f"{glacier_name}_grid_{year}.parquet")
                    print(f'Saving gridded dataset to: {save_path}')
                    dataset_grid_yearly.data.to_parquet(save_path,
                                                        engine="pyarrow",
                                                        compression="snappy")

                except Exception as year_error:
                    print(
                        f"⚠️ Error processing glacier '{glacier_name}' for year {year}: {year_error}"
                    )
                    traceback.print_exc()
                    continue  # Continue with the next year

        except Exception as glacier_error:
            print(
                f"Error processing glacier '{glacier_name}': {glacier_error}")
            traceback.print_exc()
            continue  # Continue processing the next glacier

## Check grids:

In [None]:
glacier_name = 'schwarzbach'
rgi_gl = RGI_OVERRIDES.get(glacier_name, rgi_df.at[glacier_name, 'rgi_id.v6'])

# Load stake data for that glacier
data_gl = data_glamos[data_glamos.RGIId == rgi_gl]
dataset_gl = mbm.data_processing.Dataset(cfg=cfg,
                         data=data_gl,
                         region_name='CH',
                         region_id=11,
                         data_path=cfg.dataPath+path_PMB_GLAMOS_csv)

ds, glacier_indices, gdir = dataset_gl.get_glacier_mask(cfg.dataPath+path_OGGM)
# Plot glacier attributes of oggm:
plotGlAttr(ds, cmap=cm.devon)