## Setting up:

In [None]:
import pandas as pd
import os
import warnings
from tqdm.notebook import tqdm
import re
import massbalancemachine as mbm
import geopandas as gpd
from shapely.geometry import Polygon, LineString, Point
from scipy.spatial.distance import cdist
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import GroupKFold, KFold, train_test_split, GroupShuffleSplit

import cupy as cp
import matplotlib.pyplot as plt
import seaborn as sns
from cmcrameri import cm
from oggm import cfg, utils, workflow, tasks
import logging
import geopandas as gpd
import xarray as xr

import config
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

In [None]:
seed_all(config.SEED)

# in case no memory
# free_up_cuda()

# Plot styles:
path_style_sheet = 'scripts/example.mplstyle'
plt.style.use(path_style_sheet)

custom_working_dir = '../../../data/OGGM/'

In [None]:
# RGI Ids:
# Read rgi ids:
path_rgi = '../../../data/GLAMOS/CH_glacier_ids_long.csv'
rgi_df = pd.read_csv(path_rgi, sep=',')
rgi_df.rename(columns=lambda x: x.strip(), inplace=True)
rgi_df.sort_values(by='short_name', inplace=True)
rgi_df.set_index('rgi_id.v6', inplace=True)
rgi_df.head(2)

In [None]:
data_glamos = pd.read_csv(path_PMB_GLAMOS_csv + 'CH_wgms_dataset.csv')
rgis = data_glamos.RGIId.unique()
all_gl = list(data_glamos.GLACIER.unique())
data_glamos.head(2)

In [None]:
# Check which rgis are in the OGGM directory:
cfg.initialize(logging_level="WARNING")
cfg.PARAMS["border"] = 10
cfg.PARAMS["use_multiprocessing"] = True
cfg.PARAMS["continue_on_error"] = True
cfg.PATHS["working_dir"] = custom_working_dir

path = utils.get_rgi_region_file(region="11", version="6")
rgidf = gpd.read_file(path)

# We use the directories with the shop data in it: "W5E5_w_data"
base_url = "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5_w_data/"
gdirs = workflow.init_glacier_directories(
    rgidf,
    from_prepro_level=3,
    prepro_base_url=base_url,
    prepro_border=10,
    reset=True,
    force=True,
)
oggm_rgis = [gdir.rgi_id for gdir in gdirs]
rgis_in_oggm = []
for rig in rgis:
    if rig in oggm_rgis:
        rgis_in_oggm.append(rig)
print('Number of rgis: ', len(rgis_in_oggm))
rgis = rgis_in_oggm

## Compute grids for all glaciers:

In [None]:
glDirect = [
    re.search(r'xr_direct_(.*?)\.nc', f).group(1)
    for f in os.listdir(path_direct_save)
]
glDirect.sort()
glDirect

In [None]:
EMPTY = False
if EMPTY:
    emptyfolder(path_glacier_grid)
    rest_rgis = rgis
    rest_gl = glDirect
else:
    already_processed = [
        re.split('_', file)[0] for file in os.listdir(path_glacier_grid)
    ]
    already_processed_rgis = [
        rgi_df[rgi_df.short_name == gl].index.values[0]
        for gl in already_processed
    ]
    rest_rgis = Diff(already_processed_rgis, rgis)
    rest_gl = Diff(glDirect, already_processed)
    print(rest_gl)

for glacierName in tqdm(rest_gl, desc='Processing glaciers'):
    print('\n-----------------------------------')
    print(glacierName)
    rgi_gl = rgi_df[rgi_df.short_name == glacierName].index.values[0]
    if glacierName == 'morteratsch' or glacierName == 'pers':
        rgi_gl = 'RGI60-11.01946'

    # check that rgi_gl is a str
    if type(rgi_gl) != str:
        raise ValueError('RGIId is not a string')

    # Load stake data for that glacier
    data_gl = data_glamos[data_glamos.RGIId == rgi_gl]
    dataset_gl = mbm.Dataset(data=data_gl,
                             region_name='CH',
                             data_path=path_PMB_GLAMOS_csv)

    # Get gridded glacier data from OGGM
    df_grid = dataset_gl.create_glacier_grid(custom_working_dir)

    # Add metadata that is not in WGMS dataset
    df_grid["PERIOD"] = "annual"
    df_grid['GLACIER'] = glacierName

    # Add climate variables and convert to monthly
    vois_climate = ['t2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str']
    voi_topographical = [
        'aspect', 'slope', 'dis_from_border', 'topo', 'hugonnet_dhdt'
    ]

    dataset_grid = mbm.Dataset(data=df_grid,
                               region_name='CH',
                               data_path=path_PMB_GLAMOS_csv)
    # Add climate data:
    era5_climate_data = path_ERA5_raw + 'era5_monthly_averaged_data.nc'
    geopotential_data = path_ERA5_raw + 'era5_geopotential_pressure.nc'
    dataset_grid.get_climate_features(climate_data=era5_climate_data,
                                      geopotential_data=geopotential_data,
                                      change_units=True)

    # Add potential clear sky radiation:
    print('Adding potential clear sky radiation')
    dataset_grid.get_potential_rad(path_direct_save)

    # For each record, convert to a monthly time resolution
    print('Converting to monthly time resolution')
    dataset_grid.convert_to_monthly(meta_data_columns=config.META_DATA,
                                    vois_climate=vois_climate + ['pcsr'],
                                    vois_topographical=voi_topographical)
    assert ('pcsr' in dataset_grid.data.columns)

    # Save gridded dataset:
    print('Saving gridded dataset')
    dataset_grid.data.to_csv(path_glacier_grid + f'{glacierName}_grid.csv',
                             index=False)

In [None]:
path_glacier_grid