# Glacier grids from SGI or GLAMOS:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the SGI grid and use OGGM topography. Computing takes a long time because of the conversion to monthly format.
## Setting up:

In [None]:
import pandas as pd
import os
import warnings
from tqdm.notebook import tqdm
import re
import massbalancemachine as mbm
import geopandas as gpd
import matplotlib.pyplot as plt
import geopandas as gpd
import geopandas as gpd
import csv

# scripts
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *
from scripts.geodata import *
from scripts.xgb_helpers import *
from scripts.config_CH import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

cfg = mbm.SwitzerlandConfig()

In [None]:
seed_all(cfg.seed)
free_up_cuda()  # in case no memory

# Plot styles:
path_style_sheet = 'scripts/example.mplstyle'
plt.style.use(path_style_sheet)

# Climate columns
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]
# Topographical columns
voi_topographical = [
    "aspect",
    "slope",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
    "topo",
]

In [None]:
glaciers_glamos_dem = os.listdir(os.path.join(path_GLAMOS_topo, 'lv95/'))

# Glacier outlines:
glacier_outline_sgi = gpd.read_file(
    os.path.join(path_SGI_topo, 'inventory_sgi2016_r2020',
                 'SGI_2016_glaciers_copy.shp'))  # Load the shapefile
glacier_outline_rgi = gpd.read_file(path_rgi_outlines)

# Sort glaciers by area
gl_area = get_gl_area()
gl_area['clariden'] = gl_area['claridenL']

In [None]:
# Load RGI data
rgi_df = pd.read_csv(path_glacier_ids,
                     sep=',').rename(columns=lambda x: x.strip())

# Sort and set index for easier lookup
rgi_df.sort_values(by='short_name', inplace=True)
rgi_df.set_index('short_name', inplace=True)

# Load geodetic mass balance data
geodeticMB = pd.read_csv(f"{path_geodetic_MB_glamos}dV_DOI2024_allcomb.csv")

rgi_df.reset_index(inplace=True)
sgi_gl = rgi_df.loc[rgi_df.short_name.isin(
    glaciers_glamos_dem)]['sgi-id'].unique()

# add clariden
clariden_L_sgi_id = rgi_df[rgi_df.short_name == 'claridenL']['sgi-id'].unique()

# add to sgi_gl
sgi_gl = np.concatenate((sgi_gl, clariden_L_sgi_id))

# Filter geodeticMB for relevant SGI IDs
geodeticMB = geodeticMB[geodeticMB['SGI-ID'].isin(sgi_gl)]

# Create a mapping dictionary for glacier names
sgi_to_glacier_name = rgi_df[[
    'sgi-id', 'short_name'
]].drop_duplicates().set_index('sgi-id')['short_name'].to_dict()

# Add glacier names based on SGI-ID mapping
geodeticMB['glacier_name'] = geodeticMB['SGI-ID'].map(sgi_to_glacier_name)

# Standardize naming convention
geodeticMB['glacier_name'].replace({'claridenU': 'clariden'}, inplace=True)

# filter to glacier_list
geodeticMB = geodeticMB[geodeticMB.glacier_name.isin(glaciers_glamos_dem)]

# Extract unique start and end years per glacier
years_start_per_gl = geodeticMB.groupby(
    'glacier_name')['Astart'].unique().apply(list).to_dict()
years_end_per_gl = geodeticMB.groupby('glacier_name')['A_end'].unique().apply(
    list).to_dict()

glacier_list_geod = years_start_per_gl.keys()
years_start_per_gl, years_end_per_gl

## Regional predictions (all CH glaciers)

In [None]:
sgi_list = [
    re.split('_',
             re.split('.grid', f)[0])[1]
    for f in os.listdir(os.path.join(path_SGI_topo, 'aspect'))
]

# unique SGI IDs
sgi_list = list(set(sgi_list))
print('Number of unique SGI IDs:', len(sgi_list))

glaciers_glamos_dems = os.listdir(os.path.join(path_GLAMOS_topo, 'lv95'))

RUN = False
if RUN:
    # Create SGI topographical masks
    # Note: This function will take a while to run
    # It creates a mask for each glacier in the SGI list
    # and saves them in the specified directory.
    create_sgi_topo_masks(sgi_list,
                          type='sgi_id',
                          path_save=os.path.join(path_SGI_topo,
                                                 'xr_masked_grids_sgi/'))

In [None]:
path = os.path.join(path_SGI_topo, 'xr_masked_grids_sgi/')
xr.open_dataset(path + 'A10g-02.zarr').masked_aspect.plot()

## Data exploration:

In [None]:
data_glamos = pd.read_csv(path_PMB_GLAMOS_csv + 'CH_wgms_dataset_all.csv')
gl_area = get_gl_area()
areas_train_set = [
    gl_area[gl] for gl in data_glamos['GLACIER'].unique()
    if gl in gl_area.keys()
]

# histogram
plt.hist(areas_train_set, bins=50)
plt.xlabel('Area (km2)')
plt.title('Histogram of glacier areas with stakes')

In [None]:
# Load the shapefile
shapefile_path = os.path.join(path_SGI_topo, 'inventory_sgi2016_r2020',
                              'SGI_2016_glaciers.shp')
gdf_shapefiles = gpd.read_file(shapefile_path)

# Histogram of area:
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
sns.histplot(gdf_shapefiles.area / (10**6),
             color='blue',
             kde=True,
             bins=50,
             ax=axs[0])

# boxplot
sns.boxplot(x=gdf_shapefiles.area / (10**6), color='blue', ax=axs[1])

# set x label to km2
axs[0].set_xlabel('Area (km2)')
axs[1].set_xlabel('Area (km2)')

plt.suptitle('Histogram and Boxplot of all glaciers in SGI 2016')

## Create grids:

### 2016 - 2022:

In [None]:
# === Set up logging ===
log_filename = f"process_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(filename=log_filename,
                    level=logging.INFO,
                    format='%(asctime)s [%(levelname)s] %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')

# === Set up CSV progress log ===
csv_log_path = f"swiss_wide_progress_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
with open(csv_log_path, mode='w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["sgi_id", "year", "status", "message"])

years = range(2016, 2023)
RUN = False

if RUN:
    for year in years:
        path_save_monthly = f'../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/{year}/'
        if not os.path.exists(path_save_monthly):
            os.makedirs(path_save_monthly)
            logging.info(f"Created directory {path_save_monthly}")
        else:
            emptyfolder(path_save_monthly)
            logging.info(f"Emptied directory {path_save_monthly}")

        for sgi_id in tqdm(sgi_list, desc='Processing glaciers'):
            try:
                path_save = os.path.join(path_SGI_topo, 'xr_masked_grids_sgi/')
                path = os.path.join(path_save, f"{sgi_id}.zarr")
                ds_coarsened = xr.open_dataset(path)
            except Exception as e:
                msg = f"Error loading dataset for {sgi_id}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            try:
                rgi_id = None
                df_grid = create_glacier_grid_SGI(sgi_id, year, rgi_id,
                                                  ds_coarsened)
                df_grid.reset_index(drop=True, inplace=True)
                dataset_grid = mbm.Dataset(cfg=cfg,
                                           data=df_grid,
                                           region_name='CH',
                                           data_path=path_PMB_GLAMOS_csv)
            except Exception as e:
                msg = f"Error creating glacier grid for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            try:
                era5_climate_data = os.path.join(
                    path_ERA5_raw, 'era5_monthly_averaged_data.nc')
                geopotential_data = os.path.join(
                    path_ERA5_raw, 'era5_geopotential_pressure.nc')
                dataset_grid.get_climate_features(
                    climate_data=era5_climate_data,
                    geopotential_data=geopotential_data,
                    change_units=True,
                    smoothing_vois={
                        'vois_climate': vois_climate,
                        'vois_other': ['ALTITUDE_CLIMATE']
                    })

                if dataset_grid.data.empty:
                    raise ValueError(
                        f"No climate data for glacier {sgi_id} in {year}")
            except Exception as e:
                msg = f"Error adding climate data for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            try:
                df_y_gl = dataset_grid.data
                df_y_gl.rename(columns={'RGIId': 'RGIId_old'}, inplace=True)
                df_y_gl = mbm.data_processing.utils.get_rgi(
                    data=df_y_gl, glacier_outlines=glacier_outline_rgi)
                df_y_gl = df_y_gl.dropna(subset=['RGIId'])

                if df_y_gl.empty:
                    raise ValueError("No valid RGI intersection")
            except Exception as e:
                msg = f"No RGI intersection for {sgi_id} in {year}. Skipping..."
                logging.warning(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow(
                        [sgi_id, year, "skipped", "No RGI intersection"])
                continue

            try:
                voi = ["hugonnet_dhdt", "consensus_ice_thickness", "millan_v"]
                df_y_gl = add_OGGM_features(df_y_gl, voi, path_OGGM)
                df_y_gl['GLWD_ID'] = df_y_gl.apply(
                    lambda x: get_hash(f"{x.GLACIER}_{x.YEAR}"),
                    axis=1).astype(str)

                dataset_grid = mbm.Dataset(cfg=cfg,
                                           data=df_y_gl,
                                           region_name='CH',
                                           data_path=path_PMB_GLAMOS_csv)
            except Exception as e:
                msg = f"Error adding OGGM data for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            try:
                dataset_grid.convert_to_monthly(
                    meta_data_columns=cfg.metaData,
                    vois_climate=vois_climate,
                    vois_topographical=voi_topographical)
            except Exception as e:
                msg = f"Error converting to monthly for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            # Final save
            df_oggm = dataset_grid.data
            df_oggm.rename(columns={
                'aspect': 'aspect_sgi',
                'slope': 'slope_sgi'
            },
                           inplace=True)
            df_oggm['POINT_ELEVATION'] = df_oggm['topo']

            save_path = os.path.join(path_save_monthly,
                                     f"{sgi_id}_grid_{year}.parquet")
            try:
                dataset_grid.data.to_parquet(save_path,
                                             engine="pyarrow",
                                             compression="snappy")
                logging.info(f"Successfully saved {sgi_id} for {year}")
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "success", ""])
            except Exception as e:
                msg = f"Error saving dataset for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])

In [None]:
year = 2021
path_save_monthly = f'../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/{year}/'

sgi_id = 'B36-26'  # Aletsch
# Plot all OGGM variables
df = pd.read_parquet(
    os.path.join(path_save_monthly, f"{sgi_id}_grid_{year}.parquet"))
df = df[df.MONTHS == 'sep']
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
voi = [
    't2m', 'tp', 'ALTITUDE_CLIMATE', 'ELEVATION_DIFFERENCE', 'hugonnet_dhdt',
    'consensus_ice_thickness'
]
axs = axs.flatten()
for i, var in enumerate(voi):
    sns.scatterplot(df,
                    x='POINT_LON',
                    y='POINT_LAT',
                    hue=var,
                    s=5,
                    alpha=0.5,
                    palette='twilight_shifted',
                    ax=axs[i])

## Train ML model:

### Set up model:

In [None]:
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]

vois_topographical = [
    "aspect_sgi",
    "slope_sgi",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
]

data_glamos = pd.read_csv(path_PMB_GLAMOS_csv + 'CH_wgms_dataset_all.csv')

# Initialize logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

# Transform data to monthly format (run or load data):
paths = {
    'csv_path': path_PMB_GLAMOS_csv,
    'era5_climate_data': path_ERA5_raw + 'era5_monthly_averaged_data.nc',
    'geopotential_data': path_ERA5_raw + 'era5_geopotential_pressure.nc',
    'radiation_save_path': path_pcsr + 'zarr/'
}
RUN = False
dataloader_gl = process_or_load_data(
    run_flag=RUN,
    data_glamos=data_glamos,
    paths=paths,
    cfg=cfg,
    vois_climate=vois_climate,
    add_pcsr=False,
    vois_topographical=vois_topographical,
    output_file='CH_wgms_dataset_monthly_swisswide.csv',
)
data_monthly = dataloader_gl.data

### CV splits:

In [None]:
# test_glaciers = [
#     'tortin', 'plattalva', 'sanktanna', 'schwarzberg', 'hohlaub', 'pizol',
#     'corvatsch', 'tsanfleuron', 'forno'
# ]

test_glaciers = []

# Ensure all test glaciers exist in the dataset
existing_glaciers = set(dataloader_gl.data.GLACIER.unique())
missing_glaciers = [g for g in test_glaciers if g not in existing_glaciers]

if missing_glaciers:
    print(
        f"Warning: The following test glaciers are not in the dataset: {missing_glaciers}"
    )

# Define training glaciers correctly
train_glaciers = [i for i in existing_glaciers if i not in test_glaciers]

data_test = dataloader_gl.data[dataloader_gl.data.GLACIER.isin(test_glaciers)]
print('Size of test data:', len(data_test))

data_train = dataloader_gl.data[dataloader_gl.data.GLACIER.isin(
    train_glaciers)]
print('Size of train data:', len(data_train))

if len(data_train) == 0:
    print("Warning: No training data available!")
else:
    test_perc = (len(data_test) / len(data_train)) * 100
    print('Percentage of test size: {:.2f}%'.format(test_perc))

# Number of annual versus winter measurements:
print('Train:')
print('Number of winter and annual samples:', len(data_train))
print('Number of annual samples:',
      len(data_train[data_train.PERIOD == 'annual']))
print('Number of winter samples:',
      len(data_train[data_train.PERIOD == 'winter']))

# Same for test
data_test_annual = data_test[data_test.PERIOD == 'annual']
data_test_winter = data_test[data_test.PERIOD == 'winter']

print('Test:')
print('Number of winter and annual samples:', len(data_test))
print('Number of annual samples:', len(data_test_annual))
print('Number of winter samples:', len(data_test_winter))

print('Total:')
print('Number of monthly rows:', len(dataloader_gl.data))
print('Number of annual rows:',
      len(dataloader_gl.data[dataloader_gl.data.PERIOD == 'annual']))
print('Number of winter rows:',
      len(dataloader_gl.data[dataloader_gl.data.PERIOD == 'winter']))

In [None]:
splits, test_set, train_set = get_CV_splits(dataloader_gl,
                                            test_split_on='GLACIER',
                                            test_splits=test_glaciers,
                                            random_state=cfg.seed)

print('Test glaciers: ({}) {}'.format(len(test_set['splits_vals']),
                                      test_set['splits_vals']))
test_perc = (len(test_set['df_X']) / len(train_set['df_X'])) * 100
print('Percentage of test size: {:.2f}%'.format(test_perc))
print('Size of test set:', len(test_set['df_X']))
print('Train glaciers: ({}) {}'.format(len(train_set['splits_vals']),
                                       train_set['splits_vals']))
print('Size of train set:', len(train_set['df_X']))

visualiseSplits(test_set['y'], train_set['y'], splits)
visualiseInputs(train_set, test_set, vois_climate)

### Model:

In [None]:
param_init = {}
param_init['device'] = 'cuda:0'
param_init['tree_method'] = 'hist'
param_init["random_state"] = cfg.seed
param_init["n_jobs"] = cfg.numJobs

custom_params = {'learning_rate': 0.01, 'max_depth': 6, 'n_estimators': 800}

# Feature columns:
feature_columns = ['ELEVATION_DIFFERENCE'
                   ] + list(vois_climate) + list(vois_topographical)
# feature_columns = ['ELEVATION_DIFFERENCE'
#                    ] + list(vois_climate) + list(vois_topographical)
all_columns = feature_columns + cfg.fieldsNotFeatures
df_X_train_subset = train_set['df_X'][all_columns]
print('Shape of training dataset:', df_X_train_subset.shape)
print('Shape of testing dataset:', test_set['df_X'][all_columns].shape)
print('Running with features:', feature_columns)

params = {**param_init, **custom_params}
print(params)
custom_model = mbm.models.CustomXGBoostRegressor(cfg, **params)

# Fit on train data:
custom_model.fit(train_set['df_X'][all_columns], train_set['y'])

# Make predictions on test
custom_model = custom_model.set_params(device='cpu')
features_test, metadata_test = custom_model._create_features_metadata(
    test_set['df_X'][all_columns])
y_pred = custom_model.predict(features_test)
print('Shape of the test:', features_test.shape)

# Make predictions aggr to meas ID:
y_pred_agg = custom_model.aggrPredict(metadata_test, features_test)

# Calculate scores
score = custom_model.score(test_set['df_X'][all_columns],
                           test_set['y'])  # negative
print('Overall score:', np.abs(score))

grouped_ids = getDfAggregatePred(test_set, y_pred_agg, all_columns)
# PlotPredictions(grouped_ids, y_pred, metadata_test, test_set, custom_model)
# plt.suptitle(f'MBM tested on {test_glaciers}', fontsize=20)
# plt.tight_layout()

In [None]:
RUN = False

# Define paths
path_xr_grids = '../../../data/GLAMOS/topo/SGI2020/xr_masked_grids_sgi/'  # SGI DEMs

if RUN:
    years = range(2016, 2023)
    # years = [2016]
    for year in years:
        path_save_glw = f'../../../data/GLAMOS/distributed_MB_grids/MBM/swisswide/{year}/'
        path_monthly_grids = f'../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/{year}/'
        sgi_id_list = [
            re.split('_', f)[0] for f in os.listdir(path_monthly_grids)
        ]

        # check if path exists
        if not os.path.exists(path_save_glw):
            os.makedirs(path_save_glw)
        else:
            emptyfolder(path_save_glw)

        # Feature columns
        vois_climate = [
            't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
        ]
        feature_columns = ['ELEVATION_DIFFERENCE'
                           ] + list(vois_climate) + list(vois_topographical)
        all_columns = feature_columns + cfg.fieldsNotFeatures
        print('Running for feature columns:', all_columns)

        for sgi_id in tqdm(sgi_id_list, desc='SGI Ids'):
            print(sgi_id)
            # Load parquet input glacier grid file in monthly format (pre-processed)
            df_grid_monthly = pd.read_parquet(
                os.path.join(path_monthly_grids,
                             f"{sgi_id}_grid_{year}.parquet"))

            df_grid_monthly.drop_duplicates(inplace=True)

            # Keep only necessary columns, avoiding missing columns issues
            df_grid_monthly = df_grid_monthly[[
                col for col in all_columns if col in df_grid_monthly.columns
            ]]

            # Create geodata object
            geoData = mbm.GeoData(df_grid_monthly)

            # Computes and saves gridded MB for a year and glacier
            path_glacier_dem = os.path.join(path_xr_grids, f"{sgi_id}.zarr")
            geoData.gridded_MB_pred(custom_model,
                                    sgi_id,
                                    year,
                                    all_columns,
                                    path_glacier_dem,
                                    path_save_glw,
                                    cfg,
                                    save_monthly_pred=True)

In [None]:
# open an example
sgi_id = 'B36-26'  # Aletsch
year = 2016
path_save_glw = f'../../../data/GLAMOS/distributed_MB_grids/MBM/swisswide/{year}/'
path = os.path.join(path_save_glw, f"{sgi_id}/{sgi_id}_{year}_annual.zarr")

xr.open_dataset(path).pred_masked.plot()

In [None]:
# Load parquet input glacier grid file in monthly format (pre-processed)
sgi_id = 'B36-26'  # Aletsch
year = 2016
path_monthly_grids = f'../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/{year}/'
df = pd.read_parquet(
    os.path.join(path_monthly_grids, f"{sgi_id}_grid_{year}.parquet"))
fig, axs = plt.subplots(1, 4, figsize=(15, 5))

df = df[df.MONTHS == 'sep']
voi = ['t2m', 'tp', 'hugonnet_dhdt', 'consensus_ice_thickness']
for i, var in enumerate(voi):
    sns.scatterplot(df,
                    x='POINT_LON',
                    y='POINT_LAT',
                    hue=var,
                    s=5,
                    alpha=0.5,
                    palette='twilight_shifted',
                    ax=axs[i])

## Analyze results:

### Look at 2016:

#### Mean predicted MB:

In [None]:
year = 2016
path_save_glw = f'../../../data/GLAMOS/distributed_MB_grids/MBM/swisswide/{year}/'
sgi_id_list = os.listdir(path_save_glw)

def get_mean_mb_year(year):
    path_save_glw = f'../../../data/GLAMOS/distributed_MB_grids/MBM/swisswide/{year}/'

    # Calculate mean predicted mb for each glacier
    rows = []
    for sgi_id in tqdm(sgi_id_list):
        gridd_mb = xr.open_dataset(
            os.path.join(path_save_glw,
                         f"{sgi_id}/{sgi_id}_{year}_annual.zarr"))
        mean_value = gridd_mb.pred_masked.mean().values.item()
        rows.append({'sgi_id': sgi_id, 'mean_mb': mean_value})

    mean_mb = pd.DataFrame(rows)
    return mean_mb


mean_mb_2016 = get_mean_mb_year(2016)
mean_mb_2017 = get_mean_mb_year(2017)
mean_mb_2018 = get_mean_mb_year(2018)
mean_mb_2019 = get_mean_mb_year(2019)
mean_mb_2020 = get_mean_mb_year(2020)
mean_mb_2021 = get_mean_mb_year(2021)
mean_mb_2022 = get_mean_mb_year(2022)

In [None]:
# Plot mean mb from MBM and GLAMOS:
# open reference GLAMOS
df_reference = pd.read_csv(
    '../../../data/GLAMOS/massbalance_swisswide_2024_r2024_clean.csv').iloc[1:]

ref_MB_glamos = []

for year in range(2016, 2023):
    ref_CH_y = df_reference[(df_reference.catchment == 'Switzerland')
                            & (df_reference.year == str(year))]
    ref_MB_glamos.append(ref_CH_y['massbalance evolution'].values[0])

# Prepare the data
years = list(range(2016, 2023))
mbm_mb = [
    mean_mb_2016.mean_mb.mean(),
    mean_mb_2017.mean_mb.mean(),
    mean_mb_2018.mean_mb.mean(),
    mean_mb_2019.mean_mb.mean(),
    mean_mb_2020.mean_mb.mean(),
    mean_mb_2021.mean_mb.mean(),
    mean_mb_2022.mean_mb.mean()
]

# Build DataFrame correctly
df = pd.DataFrame({
    'MBM MB': mbm_mb,
    'GLAMOS MB': ref_MB_glamos
}, index=years)

# give same type to columns
df['MBM MB'] = df['MBM MB'].astype(float)
df['GLAMOS MB'] = df['GLAMOS MB'].astype(float)

# Now plotting works
df.plot(kind='bar', figsize=(8, 5))

plt.xlabel('Year')
plt.ylabel('Mean Mass Balance (m w.e.)')
plt.title('Comparison of Mean Mass Balance: MBM vs GLAMOS')
plt.xticks(rotation=0)
plt.legend()
plt.tight_layout()
plt.show()

#### Get volumes and areas:

In [None]:
def convert_id(id_str):
    return id_str.replace('/', '-')

# Paths
path_volumes = '../../../data/GLAMOS/volumes/'
path_areas = '../../../data/GLAMOS/topo/SGI2020/inventory_sgi2016_r2020'

# Load the shapefile of volumes
volgdf = gpd.read_file(os.path.join(path_volumes, 'Summary.shp'))
volgdf['sgi-id'] = volgdf['pk_sgi'].apply(convert_id)
volgdf['V_2016'] = volgdf['V_2016'] * 10**9  # convert to m³

# Load the shapefile of areas from SGI 2016
areagdf = gpd.read_file(os.path.join(path_areas, 'SGI_2016_glaciers.shp'))
areagdf['area_2016'] = areagdf['area_km2'] * 10**6  # convert to m²

# Initialize glacier_info with volumes and areas
glacier_info = volgdf[['sgi-id',
                       'V_2016']].merge(areagdf[['sgi-id', 'area_2016']],
                                        on='sgi-id',
                                        how='inner')

# List of years you want to process
years = range(2016, 2023)  # includes 2022

# Now loop over the years and merge mean mass balance year by year
for year in years:
    mean_mb_df = globals().get(f"mean_mb_{year}", None)
    if mean_mb_df is not None:
        mean_mb_df = mean_mb_df.copy()
        mean_mb_df['sgi-id'] = mean_mb_df['sgi_id'].apply(convert_id)
        glacier_info = glacier_info.merge(
            mean_mb_df[['sgi-id', 'mean_mb'
                        ]].rename(columns={'mean_mb': f'mean_mb_{year}'}),
            on='sgi-id',
            how=
            'left'  # use 'left' to avoid dropping glaciers if some years are missing
        )
    else:
        print(f"Warning: mean_mb_{year} not found in globals.")

glacier_info.dropna(inplace=True)  # Drop rows with NaN values
glacier_info.set_index('sgi-id', inplace=True)
glacier_info.head()

#### Total vol change 2016:

In [None]:
density_ice = 916.7  # or 917 kg/m³
density_water = 1000  # kg/m³

# Calculate volume changes
glacier_info['vol_change_2016'] = (glacier_info['area_2016'] *
                                   glacier_info['mean_mb_2016']) * (
                                       density_water / density_ice)

vol_change_2016 = glacier_info['vol_change_2016'].sum(
) / 10**9  # convert to km3
volume_2016 = glacier_info['V_2016'].sum() / 10**9  # convert to km3
area_2016 = glacier_info['area_2016'].sum() / 10**6  # convert to km2
volume_change_2016_perc = vol_change_2016 / volume_2016 * 100
mb_2016 = glacier_info['mean_mb_2016'].mean()

ref_CH_2016 = df_reference[(df_reference.catchment == 'Switzerland')
                           & (df_reference.year == '2016')]

print('Volume change from GLAMOS:', ref_CH_2016['volume change'].values[0],
      '%')  # in %
print('Volume change from MBM:', np.round(volume_change_2016_perc, 2),
      '%')  # in %

print('Mean mass balance from GLAMOS:',
      ref_CH_2016['massbalance evolution'].values[0], 'm w.e.')
print('Mean mass balance from MBM:', np.round(mb_2016, 2), 'm w.e.')

### Volume area scaling:

In [None]:
# def volume_area_scaling(
#         glacier_info,
#         t1,
#         beta=1.36,
#         density_ice=916.7,  # or 917 kg/m³
#         density_water=1000  # kg/m³
# ):
#     # calculate c for every glacier
#     glacier_info['c'] = glacier_info[f'V_{t1}'] / (glacier_info[f'area_{t1}']**
#                                                    beta)
#     years = [t1, t1 + 1]

#     # Initialize arrays to store volume and area
#     volume_ev = pd.DataFrame(columns=years, index=glacier_info.index)
#     area_ev = pd.DataFrame(columns=years, index=glacier_info.index)
#     mb_ev = pd.DataFrame(columns=years, index=glacier_info.index)

#     # Set initial conditions
#     volume_ev[t1] = glacier_info[f'V_{t1}'].values
#     area_ev[t1] = glacier_info[f'area_{t1}'].values
#     mb_ev[t1] = glacier_info[f'mean_mb_{t1}'].values

#     # Add mb evolution
#     mb_ev[t1 + 1] = glacier_info[f'mean_mb_{t1+1}'].values

#     # Forward simulation
#     # Calculate volume change
#     volume_change = mb_ev[t1] * area_ev[t1] * (density_water / density_ice)

#     # Update volume
#     volume_ev[t1 + 1] = np.maximum(volume_ev[t1] + volume_change, 0)

#     # Update area using V-A scaling
#     area_ev[t1 + 1] = np.maximum(
#         (volume_ev[t1 + 1] / glacier_info['c'])**(1 / beta), 0)

#     glacier_info[f'area_{year+1}'] = area_ev[year + 1]
#     glacier_info[f'V_{year+1}'] = volume_ev[year + 1]


# end_year = 2022
# for year in range(2016, end_year):
#     volume_area_scaling(
#         glacier_info,
#         t1=year,
#         beta=1.36,
#     )
# glacier_info.head()

In [None]:
def volume_area_scaling(
    glacier_info,
    t1,
    beta=1.36,
    density_ice=916.7,  # kg/m³
    density_water=1000  # kg/m³
):
    """
    Update glacier_info by applying volume-area scaling from year t1 to t1+1.
    """

    # Calculate c if not already done
    if 'c' not in glacier_info.columns:
        glacier_info['c'] = glacier_info[f'V_{t1}'] / (glacier_info[f'area_{t1}'] ** beta)

    # Get starting volume and area
    V_t1 = glacier_info[f'V_{t1}']
    A_t1 = glacier_info[f'area_{t1}']

    # Get mass balance for the following year (mean_mb at t1+1)
    mb = glacier_info[f'mean_mb_{t1}']

    # Calculate volume change [m³ of ice]
    vol_change = mb * A_t1 * (density_water / density_ice)

    # Update volume, ensuring non-negative
    V_t2 = (V_t1 + vol_change).clip(lower=0)

    # Update area using volume-area scaling, ensuring non-negative
    A_t2 = (V_t2 / glacier_info['c']) ** (1 / beta)
    A_t2 = A_t2.clip(lower=0)

    # Save results back to glacier_info
    glacier_info[f'V_{t1+1}'] = V_t2
    glacier_info[f'area_{t1+1}'] = A_t2
    
end_year = 2022
for year in range(2016, end_year):
    volume_area_scaling(
        glacier_info,
        t1=year,
        beta=1.36
    )

glacier_info.head()

In [None]:
glacier_info['vol_change_2022'] = (glacier_info['area_2022'] *
                                   glacier_info['mean_mb_2022']) * (
                                       density_water / density_ice)

vol_change_2022 = glacier_info['vol_change_2022'].sum(
) / 10**9  # convert to km3

volume_2022 = glacier_info['V_2022'].sum() / 10**9  # convert to km3
mb_2022 = glacier_info['mean_mb_2022'].mean()

volume_change_2022_perc = vol_change_2022 / volume_2022 * 100

ref_CH_2022 = df_reference[(df_reference.catchment == 'Switzerland')
                           & (df_reference.year == '2022')]

print('Volume change from GLAMOS:', ref_CH_2022['volume change'].values[0],
      '%')  # in %
print('Volume change from MBM:', np.round(volume_change_2022_perc, 2),
      '%')  # in %

print('Mean mass balance from GLAMOS:',
      ref_CH_2022['massbalance evolution'].values[0], 'm w.e.')
print('Mean mass balance from MBM:', np.round(mb_2022, 2), 'm w.e.')

In [None]:
volume_change_y_perc, ref_V_glamos = [], []
for year in range(2016, 2023):
    glacier_info[f'vol_change_{year}'] = (glacier_info[f'area_{year}'] *
                                    glacier_info[f'mean_mb_{year}']) * (
                                        density_water / density_ice)

    vol_change_y = glacier_info[f'vol_change_{year}'].sum(
    ) / 10**9  # convert to km3

    volume_y = glacier_info[f'V_{year}'].sum() / 10**9  # convert to km3
    mb_y = glacier_info[f'mean_mb_{year}'].mean()

    volume_change_y_perc.append(vol_change_y / volume_y * 100)
    
    ref_CH_y = df_reference[(df_reference.catchment == 'Switzerland')
                           & (df_reference.year == str(year))]
    
    ref_V_glamos.append(ref_CH_y['volume change'].values[0])
    

# Build DataFrame correctly
df = pd.DataFrame({
    'MBM V': volume_change_y_perc,
    'GLAMOS V': ref_V_glamos
}, index=years)

# give same type to columns
df['MBM V'] = df['MBM V'].astype(float)
df['GLAMOS V'] = df['GLAMOS V'].astype(float)

# Now plotting works
df.plot(kind='bar', figsize=(8, 5))

plt.xlabel('Year')
plt.ylabel('Mean Mass Balance (m w.e.)')
plt.title('Comparison of volume change: MBM vs GLAMOS')
plt.xticks(rotation=0)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
df_sub = glacier_info.sort_values(by='area_2016', ascending=False).head(20)
df_sub['area_2016'] = df_sub['area_2016'] / 10**6  # convert to km2
df_sub['area_2022'] = df_sub['area_2022'] / 10**6  # convert to km2

# Plotting
fig, ax = plt.subplots(figsize=(8, 5))
index = np.arange(len(df_sub))
bar_width = 0.35

# Bars for 2016 and 2022 areas
ax.bar(index, df_sub['area_2016'], bar_width, label='Area 2016')
ax.bar(index + bar_width, df_sub['area_2022'], bar_width, label='Area 2022')

# Axis labels and title
ax.set_xlabel('Glacier Index')
ax.set_ylabel('Area (km²)')
ax.set_title('Glacier Area Comparison: 2016 vs 2022')
ax.set_xticks(index + bar_width / 2)
ax.set_xticklabels(df_sub.index)

# rotate x labels
plt.xticks(rotation=45)
ax.legend()

# Layout optimization
plt.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(12, 6))
ax = axs.flatten()
for i, year in enumerate(range(2017, 2023)):
    area_perc_loss_y = (
        glacier_info['area_2016'] -
        glacier_info[f'area_{year}']) / glacier_info['area_2016']

    sns.boxplot(area_perc_loss_y, ax=ax[i], color='blue')
    ax[i].set_title(f'Loss from 2016 to {year}')
    ax[i].set_ylabel('Area loss (%)')

plt.tight_layout()

In [None]:
glacier_info[glacier_info['area_2018'] == 0]