# Glacier grids from SGI or GLAMOS:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the SGI grid and use OGGM topography. Computing takes a long time because of the conversion to monthly format.
## Setting up:

In [None]:
import pandas as pd
import os
import warnings
from tqdm.notebook import tqdm
import re
import massbalancemachine as mbm
import geopandas as gpd
import matplotlib.pyplot as plt
import geopandas as gpd
import geopandas as gpd

# scripts
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *
from scripts.geodata import *
from scripts.xgb_helpers import *
from scripts.config_CH import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

cfg = mbm.SwitzerlandConfig()

In [None]:
seed_all(cfg.seed)
free_up_cuda()  # in case no memory

# Plot styles:
path_style_sheet = 'scripts/example.mplstyle'
plt.style.use(path_style_sheet)

# Climate columns
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]
# Topographical columns
voi_topographical = [
    "aspect",
    "slope",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
    "topo",
]

In [None]:
glaciers_glamos_dem = os.listdir(os.path.join(path_GLAMOS_topo, 'lv95/'))

# Glacier outlines:
glacier_outline_sgi = gpd.read_file(
    os.path.join(path_SGI_topo, 'inventory_sgi2016_r2020',
                 'SGI_2016_glaciers_copy.shp'))  # Load the shapefile
glacier_outline_rgi = gpd.read_file(path_rgi_outlines)

# Sort glaciers by area
gl_area = get_gl_area()
gl_area['clariden'] = gl_area['claridenL']

In [None]:
# Load RGI data
rgi_df = pd.read_csv(path_glacier_ids,
                     sep=',').rename(columns=lambda x: x.strip())

# Sort and set index for easier lookup
rgi_df.sort_values(by='short_name', inplace=True)
rgi_df.set_index('short_name', inplace=True)

# Load geodetic mass balance data
geodeticMB = pd.read_csv(f"{path_geodetic_MB_glamos}dV_DOI2024_allcomb.csv")

# # Extract relevant RGI IDs for glaciers in glacier_list
# rgi_gl = data_glamos.loc[data_glamos.GLACIER.isin(glacier_list),
#                          'RGIId'].unique()

# # Map RGI IDs to SGI IDs
# sgi_gl = rgi_df[rgi_df['rgi_id.v6'].isin(
#     rgi_gl)]['sgi-id'].drop_duplicates().values

rgi_df.reset_index(inplace=True)
sgi_gl = rgi_df.loc[rgi_df.short_name.isin(
    glaciers_glamos_dem)]['sgi-id'].unique()

# add clariden
clariden_L_sgi_id = rgi_df[rgi_df.short_name == 'claridenL']['sgi-id'].unique()

# add to sgi_gl
sgi_gl = np.concatenate((sgi_gl, clariden_L_sgi_id))

# Filter geodeticMB for relevant SGI IDs
geodeticMB = geodeticMB[geodeticMB['SGI-ID'].isin(sgi_gl)]

# Create a mapping dictionary for glacier names
sgi_to_glacier_name = rgi_df[[
    'sgi-id', 'short_name'
]].drop_duplicates().set_index('sgi-id')['short_name'].to_dict()

# Add glacier names based on SGI-ID mapping
geodeticMB['glacier_name'] = geodeticMB['SGI-ID'].map(sgi_to_glacier_name)

# Standardize naming convention
geodeticMB['glacier_name'].replace({'claridenU': 'clariden'}, inplace=True)

# filter to glacier_list
geodeticMB = geodeticMB[geodeticMB.glacier_name.isin(glaciers_glamos_dem)]

# Extract unique start and end years per glacier
years_start_per_gl = geodeticMB.groupby(
    'glacier_name')['Astart'].unique().apply(list).to_dict()
years_end_per_gl = geodeticMB.groupby('glacier_name')['A_end'].unique().apply(
    list).to_dict()

glacier_list_geod = years_start_per_gl.keys()
years_start_per_gl, years_end_per_gl

## Regional predictions (all CH glaciers)

In [None]:
sgi_list = [
    re.split('_',
             re.split('.grid', f)[0])[1]
    for f in os.listdir(os.path.join(path_SGI_topo, 'aspect'))
]

# unique SGI IDs
sgi_list = list(set(sgi_list))
print('Number of unique SGI IDs:', len(sgi_list))

glaciers_glamos_dems = os.listdir(os.path.join(path_GLAMOS_topo, 'lv95'))

RUN = False
if RUN:
    # Create SGI topographical masks
    # Note: This function will take a while to run
    # It creates a mask for each glacier in the SGI list
    # and saves them in the specified directory.
    create_sgi_topo_masks(sgi_list,
                          type='sgi_id',
                          path_save=os.path.join(path_SGI_topo,
                                                 'xr_masked_grids_sgi/'))

In [None]:
year = 2016
path_save_monthly = '../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/2016/'

RUN = True
if RUN:
    for sgi_id in tqdm(sgi_list, desc='Processing glaciers'):
        print(f"\n-----------------------------------\nProcessing {sgi_id}")

        # Load SGI masked grid (previously resampled)
        try:
            path_save = os.path.join(path_SGI_topo, 'xr_masked_grids_sgi/')
            path = os.path.join(path_save, f"{sgi_id}.zarr")
            ds_coarsened = xr.open_dataset(path)
        except Exception as e:
            print(f"Error loading dataset for {sgi_id}: {e}")
            continue

        # Create glacier grid
        try:
            rgi_id = None
            df_grid = create_glacier_grid_SGI(sgi_id, year, rgi_id,
                                              ds_coarsened)
            df_grid.reset_index(drop=True, inplace=True)
            dataset_grid = mbm.Dataset(cfg=cfg,
                                       data=df_grid,
                                       region_name='CH',
                                       data_path=path_PMB_GLAMOS_csv)
        except Exception as e:
            print(f"Error creating glacier grid for {sgi_id} in {year}: {e}")
            continue

        # Add climate data
        try:
            era5_climate_data = os.path.join(path_ERA5_raw,
                                             'era5_monthly_averaged_data.nc')
            geopotential_data = os.path.join(path_ERA5_raw,
                                             'era5_geopotential_pressure.nc')
            dataset_grid.get_climate_features(
                climate_data=era5_climate_data,
                geopotential_data=geopotential_data,
                change_units=True)
        except Exception as e:
            print(f"Error adding climate data for {sgi_id} in {year}: {e}")
            continue

        # Add OGGM topographic data
        try:
            df_y_gl = dataset_grid.data
            df_y_gl.rename(columns={'RGIId': 'RGIId_old'}, inplace=True)

            # Add RGI IDs for OGGM data through intersection with shapefiles
            df_y_gl = mbm.data_processing.utils.get_rgi(
                data=df_y_gl, glacier_outlines=glacier_outline_rgi)

            # Drop points without RGI ID (outside of RGI outlines)
            df_y_gl = df_y_gl.dropna(subset=['RGIId'])

            # Variables of interest
            voi = ["hugonnet_dhdt", "consensus_ice_thickness", "millan_v"]

            df_y_gl = add_OGGM_features(df_y_gl, voi, path_OGGM)

            # Add GLWD_ID
            # print('  - Adding GLWD ID...')
            df_y_gl['GLWD_ID'] = df_y_gl.apply(
                lambda x: get_hash(f"{x.GLACIER}_{x.YEAR}"), axis=1)
            df_y_gl['GLWD_ID'] = df_y_gl['GLWD_ID'].astype(str)

            dataset_grid = mbm.Dataset(cfg=cfg,
                                       data=df_y_gl,
                                       region_name='CH',
                                       data_path=path_PMB_GLAMOS_csv)
        except Exception as e:
            print(f"Error adding OGGM data for {sgi_id} in {year}: {e}")
            continue

        # Convert to monthly time resolution
        # print('  - Converting to monthly time resolution...')
        try:
            dataset_grid.convert_to_monthly(
                meta_data_columns=cfg.metaData,
                vois_climate=vois_climate,
                vois_topographical=voi_topographical)
        except Exception as e:
            print(
                f"Error converting to monthly resolution for {sgi_id} in {year}: {e}"
            )
            continue

        # print(
        #     f"  - DF grid shape after conversion: {dataset_grid.data.shape}"
        # )

        # Save gridded dataset
        save_path = os.path.join(path_save_monthly,
                                 f"{sgi_id}_grid_{year}.parquet")
        try:
            dataset_grid.data.to_parquet(save_path,
                                         engine="pyarrow",
                                         compression="snappy")
        except Exception as e:
            print(f"Error saving dataset for {sgi_id} in {year}: {e}")

In [None]:
sgi_id = 'A50i-16'
# Plot all OGGM variables
fig, axs = plt.subplots(1, 3, figsize=(15, 5))
df = pd.read_parquet(
    os.path.join(path_save_monthly, f"{sgi_id}_grid_{year}.parquet"))
df = df[df.MONTHS == 'sep']
voi = ['hugonnet_dhdt', 'consensus_ice_thickness', 'millan_v']
for i, var in enumerate(voi):
    sns.scatterplot(df,
                    x='POINT_LON',
                    y='POINT_LAT',
                    hue=var,
                    s=5,
                    alpha=0.5,
                    palette='twilight_shifted',
                    ax=axs[i])