# Glacier grids from SGI or GLAMOS:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the SGI grid and use OGGM topography. Computing takes a long time because of the conversion to monthly format.
## Setting up:

In [None]:
import pandas as pd
import os
import warnings
from tqdm.notebook import tqdm
import re
import massbalancemachine as mbm
import geopandas as gpd
import matplotlib.pyplot as plt
import geopandas as gpd
import geopandas as gpd
import csv

# scripts
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *
from scripts.geodata import *
from scripts.xgb_helpers import *
from scripts.config_CH import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

cfg = mbm.SwitzerlandConfig()

In [None]:
seed_all(cfg.seed)
free_up_cuda()  # in case no memory

# Plot styles:
path_style_sheet = 'scripts/example.mplstyle'
plt.style.use(path_style_sheet)

# Climate columns
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]
# Topographical columns
voi_topographical = [
    "aspect",
    "slope",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
    "topo",
]

In [None]:
glaciers_glamos_dem = os.listdir(os.path.join(path_GLAMOS_topo, 'lv95/'))

# Glacier outlines:
glacier_outline_sgi = gpd.read_file(
    os.path.join(path_SGI_topo, 'inventory_sgi2016_r2020',
                 'SGI_2016_glaciers_copy.shp'))  # Load the shapefile
glacier_outline_rgi = gpd.read_file(path_rgi_outlines)

# Sort glaciers by area
gl_area = get_gl_area()
gl_area['clariden'] = gl_area['claridenL']

In [None]:
# Load RGI data
rgi_df = pd.read_csv(path_glacier_ids,
                     sep=',').rename(columns=lambda x: x.strip())

# Sort and set index for easier lookup
rgi_df.sort_values(by='short_name', inplace=True)
rgi_df.set_index('short_name', inplace=True)

# Load geodetic mass balance data
geodeticMB = pd.read_csv(f"{path_geodetic_MB_glamos}dV_DOI2024_allcomb.csv")

rgi_df.reset_index(inplace=True)
sgi_gl = rgi_df.loc[rgi_df.short_name.isin(
    glaciers_glamos_dem)]['sgi-id'].unique()

# add clariden
clariden_L_sgi_id = rgi_df[rgi_df.short_name == 'claridenL']['sgi-id'].unique()

# add to sgi_gl
sgi_gl = np.concatenate((sgi_gl, clariden_L_sgi_id))

# Filter geodeticMB for relevant SGI IDs
geodeticMB = geodeticMB[geodeticMB['SGI-ID'].isin(sgi_gl)]

# Create a mapping dictionary for glacier names
sgi_to_glacier_name = rgi_df[[
    'sgi-id', 'short_name'
]].drop_duplicates().set_index('sgi-id')['short_name'].to_dict()

# Add glacier names based on SGI-ID mapping
geodeticMB['glacier_name'] = geodeticMB['SGI-ID'].map(sgi_to_glacier_name)

# Standardize naming convention
geodeticMB['glacier_name'].replace({'claridenU': 'clariden'}, inplace=True)

# filter to glacier_list
geodeticMB = geodeticMB[geodeticMB.glacier_name.isin(glaciers_glamos_dem)]

# Extract unique start and end years per glacier
years_start_per_gl = geodeticMB.groupby(
    'glacier_name')['Astart'].unique().apply(list).to_dict()
years_end_per_gl = geodeticMB.groupby('glacier_name')['A_end'].unique().apply(
    list).to_dict()

glacier_list_geod = years_start_per_gl.keys()
years_start_per_gl, years_end_per_gl

## Regional predictions (all CH glaciers)

In [None]:
sgi_list = [
    re.split('_',
             re.split('.grid', f)[0])[1]
    for f in os.listdir(os.path.join(path_SGI_topo, 'aspect'))
]

# unique SGI IDs
sgi_list = list(set(sgi_list))
print('Number of unique SGI IDs:', len(sgi_list))

glaciers_glamos_dems = os.listdir(os.path.join(path_GLAMOS_topo, 'lv95'))

RUN = False
if RUN:
    # Create SGI topographical masks
    # Note: This function will take a while to run
    # It creates a mask for each glacier in the SGI list
    # and saves them in the specified directory.
    create_sgi_topo_masks(sgi_list,
                          type='sgi_id',
                          path_save=os.path.join(path_SGI_topo,
                                                 'xr_masked_grids_sgi/'))

In [None]:
path = os.path.join(path_SGI_topo, 'xr_masked_grids_sgi/')
xr.open_dataset(path + 'A10g-02.zarr').masked_aspect.plot()

## Data exploration:

In [None]:
data_glamos = pd.read_csv(path_PMB_GLAMOS_csv + 'CH_wgms_dataset_all.csv')
gl_area = get_gl_area()
areas_train_set = [
    gl_area[gl] for gl in data_glamos['GLACIER'].unique()
    if gl in gl_area.keys()
]

# histogram
plt.hist(areas_train_set, bins=50)
plt.xlabel('Area (km2)')
plt.title('Histogram of glacier areas with stakes')

In [None]:
# Load the shapefile
shapefile_path = os.path.join(path_SGI_topo, 'inventory_sgi2016_r2020',
                              'SGI_2016_glaciers.shp')
gdf_shapefiles = gpd.read_file(shapefile_path)

# Histogram of area:
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
sns.histplot(gdf_shapefiles.area / (10**6),
             color='blue',
             kde=True,
             bins=50,
             ax=axs[0])

# boxplot
sns.boxplot(x=gdf_shapefiles.area / (10**6), color='blue', ax=axs[1])

# set x label to km2
axs[0].set_xlabel('Area (km2)')
axs[1].set_xlabel('Area (km2)')

plt.suptitle('Histogram and Boxplot of all glaciers in SGI 2016')

## Create grids:

### 2016 - 2022:

In [None]:
# === Set up logging ===
log_filename = f"process_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(filename=log_filename,
                    level=logging.INFO,
                    format='%(asctime)s [%(levelname)s] %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')

# === Set up CSV progress log ===
csv_log_path = f"swiss_wide_progress_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
with open(csv_log_path, mode='w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["sgi_id", "year", "status", "message"])

years = range(2016, 2023)
RUN = False

if RUN:
    for year in years:
        path_save_monthly = f'../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/{year}/'
        if not os.path.exists(path_save_monthly):
            os.makedirs(path_save_monthly)
            logging.info(f"Created directory {path_save_monthly}")
        else:
            emptyfolder(path_save_monthly)
            logging.info(f"Emptied directory {path_save_monthly}")

        for sgi_id in tqdm(sgi_list, desc='Processing glaciers'):
            try:
                path_save = os.path.join(path_SGI_topo, 'xr_masked_grids_sgi/')
                path = os.path.join(path_save, f"{sgi_id}.zarr")
                ds_coarsened = xr.open_dataset(path)
            except Exception as e:
                msg = f"Error loading dataset for {sgi_id}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            try:
                rgi_id = None
                df_grid = create_glacier_grid_SGI(sgi_id, year, rgi_id,
                                                  ds_coarsened)
                df_grid.reset_index(drop=True, inplace=True)
                dataset_grid = mbm.Dataset(cfg=cfg,
                                           data=df_grid,
                                           region_name='CH',
                                           data_path=path_PMB_GLAMOS_csv)
            except Exception as e:
                msg = f"Error creating glacier grid for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            try:
                era5_climate_data = os.path.join(
                    path_ERA5_raw, 'era5_monthly_averaged_data.nc')
                geopotential_data = os.path.join(
                    path_ERA5_raw, 'era5_geopotential_pressure.nc')
                dataset_grid.get_climate_features(
                    climate_data=era5_climate_data,
                    geopotential_data=geopotential_data,
                    change_units=True,
                    smoothing_vois={
                        'vois_climate': vois_climate,
                        'vois_other': ['ALTITUDE_CLIMATE']
                    })

                if dataset_grid.data.empty:
                    raise ValueError(
                        f"No climate data for glacier {sgi_id} in {year}")
            except Exception as e:
                msg = f"Error adding climate data for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            try:
                df_y_gl = dataset_grid.data
                df_y_gl.rename(columns={'RGIId': 'RGIId_old'}, inplace=True)
                df_y_gl = mbm.data_processing.utils.get_rgi(
                    data=df_y_gl, glacier_outlines=glacier_outline_rgi)
                df_y_gl = df_y_gl.dropna(subset=['RGIId'])

                if df_y_gl.empty:
                    raise ValueError("No valid RGI intersection")
            except Exception as e:
                msg = f"No RGI intersection for {sgi_id} in {year}. Skipping..."
                logging.warning(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow(
                        [sgi_id, year, "skipped", "No RGI intersection"])
                continue

            try:
                voi = ["hugonnet_dhdt", "consensus_ice_thickness", "millan_v"]
                df_y_gl = add_OGGM_features(df_y_gl, voi, path_OGGM)
                df_y_gl['GLWD_ID'] = df_y_gl.apply(
                    lambda x: get_hash(f"{x.GLACIER}_{x.YEAR}"),
                    axis=1).astype(str)

                dataset_grid = mbm.Dataset(cfg=cfg,
                                           data=df_y_gl,
                                           region_name='CH',
                                           data_path=path_PMB_GLAMOS_csv)
            except Exception as e:
                msg = f"Error adding OGGM data for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            try:
                dataset_grid.convert_to_monthly(
                    meta_data_columns=cfg.metaData,
                    vois_climate=vois_climate,
                    vois_topographical=voi_topographical)
            except Exception as e:
                msg = f"Error converting to monthly for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])
                continue

            # Final save
            df_oggm = dataset_grid.data
            df_oggm.rename(columns={
                'aspect': 'aspect_sgi',
                'slope': 'slope_sgi'
            },
                           inplace=True)
            df_oggm['POINT_ELEVATION'] = df_oggm['topo']

            save_path = os.path.join(path_save_monthly,
                                     f"{sgi_id}_grid_{year}.parquet")
            try:
                dataset_grid.data.to_parquet(save_path,
                                             engine="pyarrow",
                                             compression="snappy")
                logging.info(f"Successfully saved {sgi_id} for {year}")
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "success", ""])
            except Exception as e:
                msg = f"Error saving dataset for {sgi_id} in {year}: {e}"
                logging.error(msg)
                print(msg)
                with open(csv_log_path, 'a', newline='') as f:
                    csv.writer(f).writerow([sgi_id, year, "error", str(e)])

In [None]:
year = 2021
path_save_monthly = f'../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/{year}/'

sgi_id = 'B36-26'  # Aletsch
# Plot all OGGM variables
df = pd.read_parquet(
    os.path.join(path_save_monthly, f"{sgi_id}_grid_{year}.parquet"))
df = df[df.MONTHS == 'sep']
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
voi = [
    't2m', 'tp', 'ALTITUDE_CLIMATE', 'ELEVATION_DIFFERENCE', 'hugonnet_dhdt',
    'consensus_ice_thickness'
]
axs = axs.flatten()
for i, var in enumerate(voi):
    sns.scatterplot(df,
                    x='POINT_LON',
                    y='POINT_LAT',
                    hue=var,
                    s=5,
                    alpha=0.5,
                    palette='twilight_shifted',
                    ax=axs[i])

## Train ML model:

### Set up model:

In [None]:
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]

vois_topographical = [
    "aspect_sgi",
    "slope_sgi",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
]

data_glamos = pd.read_csv(path_PMB_GLAMOS_csv + 'CH_wgms_dataset_all.csv')

# Initialize logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

# Transform data to monthly format (run or load data):
paths = {
    'csv_path': path_PMB_GLAMOS_csv,
    'era5_climate_data': path_ERA5_raw + 'era5_monthly_averaged_data.nc',
    'geopotential_data': path_ERA5_raw + 'era5_geopotential_pressure.nc',
    'radiation_save_path': path_pcsr + 'zarr/'
}
RUN = False
dataloader_gl = process_or_load_data(
    run_flag=RUN,
    data_glamos=data_glamos,
    paths=paths,
    cfg=cfg,
    vois_climate=vois_climate,
    add_pcsr=False,
    vois_topographical=vois_topographical,
    output_file='CH_wgms_dataset_monthly_swisswide.csv',
)
data_monthly = dataloader_gl.data

### CV splits:

In [None]:
# test_glaciers = [
#     'tortin', 'plattalva', 'sanktanna', 'schwarzberg', 'hohlaub', 'pizol',
#     'corvatsch', 'tsanfleuron', 'forno'
# ]

test_glaciers = []

# Ensure all test glaciers exist in the dataset
existing_glaciers = set(dataloader_gl.data.GLACIER.unique())
missing_glaciers = [g for g in test_glaciers if g not in existing_glaciers]

if missing_glaciers:
    print(
        f"Warning: The following test glaciers are not in the dataset: {missing_glaciers}"
    )

# Define training glaciers correctly
train_glaciers = [i for i in existing_glaciers if i not in test_glaciers]

data_test = dataloader_gl.data[dataloader_gl.data.GLACIER.isin(test_glaciers)]
print('Size of test data:', len(data_test))

data_train = dataloader_gl.data[dataloader_gl.data.GLACIER.isin(
    train_glaciers)]
print('Size of train data:', len(data_train))

if len(data_train) == 0:
    print("Warning: No training data available!")
else:
    test_perc = (len(data_test) / len(data_train)) * 100
    print('Percentage of test size: {:.2f}%'.format(test_perc))

# Number of annual versus winter measurements:
print('Train:')
print('Number of winter and annual samples:', len(data_train))
print('Number of annual samples:',
      len(data_train[data_train.PERIOD == 'annual']))
print('Number of winter samples:',
      len(data_train[data_train.PERIOD == 'winter']))

# Same for test
data_test_annual = data_test[data_test.PERIOD == 'annual']
data_test_winter = data_test[data_test.PERIOD == 'winter']

print('Test:')
print('Number of winter and annual samples:', len(data_test))
print('Number of annual samples:', len(data_test_annual))
print('Number of winter samples:', len(data_test_winter))

print('Total:')
print('Number of monthly rows:', len(dataloader_gl.data))
print('Number of annual rows:',
      len(dataloader_gl.data[dataloader_gl.data.PERIOD == 'annual']))
print('Number of winter rows:',
      len(dataloader_gl.data[dataloader_gl.data.PERIOD == 'winter']))

In [None]:
splits, test_set, train_set = get_CV_splits(dataloader_gl,
                                            test_split_on='GLACIER',
                                            test_splits=test_glaciers,
                                            random_state=cfg.seed)

print('Test glaciers: ({}) {}'.format(len(test_set['splits_vals']),
                                      test_set['splits_vals']))
test_perc = (len(test_set['df_X']) / len(train_set['df_X'])) * 100
print('Percentage of test size: {:.2f}%'.format(test_perc))
print('Size of test set:', len(test_set['df_X']))
print('Train glaciers: ({}) {}'.format(len(train_set['splits_vals']),
                                       train_set['splits_vals']))
print('Size of train set:', len(train_set['df_X']))

visualiseSplits(test_set['y'], train_set['y'], splits)
visualiseInputs(train_set, test_set, vois_climate)

### Model:

In [None]:
param_init = {}
param_init['device'] = 'cuda:0'
param_init['tree_method'] = 'hist'
param_init["random_state"] = cfg.seed
param_init["n_jobs"] = cfg.numJobs

custom_params = {'learning_rate': 0.01, 'max_depth': 6, 'n_estimators': 800}

# Feature columns:
feature_columns = ['ELEVATION_DIFFERENCE'
                   ] + list(vois_climate) + list(vois_topographical)
# feature_columns = ['ELEVATION_DIFFERENCE'
#                    ] + list(vois_climate) + list(vois_topographical)
all_columns = feature_columns + cfg.fieldsNotFeatures
df_X_train_subset = train_set['df_X'][all_columns]
print('Shape of training dataset:', df_X_train_subset.shape)
print('Shape of testing dataset:', test_set['df_X'][all_columns].shape)
print('Running with features:', feature_columns)

params = {**param_init, **custom_params}
print(params)
custom_model = mbm.models.CustomXGBoostRegressor(cfg, **params)

# Fit on train data:
custom_model.fit(train_set['df_X'][all_columns], train_set['y'])

# Make predictions on test
custom_model = custom_model.set_params(device='cpu')
features_test, metadata_test = custom_model._create_features_metadata(
    test_set['df_X'][all_columns])
y_pred = custom_model.predict(features_test)
print('Shape of the test:', features_test.shape)

# Make predictions aggr to meas ID:
y_pred_agg = custom_model.aggrPredict(metadata_test, features_test)

# Calculate scores
score = custom_model.score(test_set['df_X'][all_columns],
                           test_set['y'])  # negative
print('Overall score:', np.abs(score))

grouped_ids = getDfAggregatePred(test_set, y_pred_agg, all_columns)
# PlotPredictions(grouped_ids, y_pred, metadata_test, test_set, custom_model)
# plt.suptitle(f'MBM tested on {test_glaciers}', fontsize=20)
# plt.tight_layout()

In [None]:
RUN = False

# Define paths
path_xr_grids = '../../../data/GLAMOS/topo/SGI2020/xr_masked_grids_sgi/'  # SGI DEMs

if RUN:
    years = range(2016, 2023)
    # years = [2016]
    for year in years:
        path_save_glw = f'../../../data/GLAMOS/distributed_MB_grids/MBM/swisswide/{year}/'
        path_monthly_grids = f'../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/{year}/'
        sgi_id_list = [
            re.split('_', f)[0] for f in os.listdir(path_monthly_grids)
        ]

        # check if path exists
        if not os.path.exists(path_save_glw):
            os.makedirs(path_save_glw)
        else:
            emptyfolder(path_save_glw)

        # Feature columns
        vois_climate = [
            't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
        ]
        feature_columns = ['ELEVATION_DIFFERENCE'
                           ] + list(vois_climate) + list(vois_topographical)
        all_columns = feature_columns + cfg.fieldsNotFeatures
        print('Running for feature columns:', all_columns)

        for sgi_id in tqdm(sgi_id_list, desc='SGI Ids'):
            print(sgi_id)
            # Load parquet input glacier grid file in monthly format (pre-processed)
            df_grid_monthly = pd.read_parquet(
                os.path.join(path_monthly_grids,
                             f"{sgi_id}_grid_{year}.parquet"))

            df_grid_monthly.drop_duplicates(inplace=True)

            # Keep only necessary columns, avoiding missing columns issues
            df_grid_monthly = df_grid_monthly[[
                col for col in all_columns if col in df_grid_monthly.columns
            ]]

            # Create geodata object
            geoData = mbm.GeoData(df_grid_monthly)

            # Computes and saves gridded MB for a year and glacier
            path_glacier_dem = os.path.join(path_xr_grids, f"{sgi_id}.zarr")
            geoData.gridded_MB_pred(custom_model,
                                    sgi_id,
                                    year,
                                    all_columns,
                                    path_glacier_dem,
                                    path_save_glw,
                                    cfg,
                                    save_monthly_pred=True)

In [None]:
# open an example
sgi_id = 'B36-26'  # Aletsch
year = 2016
path_save_glw = f'../../../data/GLAMOS/distributed_MB_grids/MBM/swisswide/{year}/'
path = os.path.join(path_save_glw, f"{sgi_id}/{sgi_id}_{year}_annual.zarr")

xr.open_dataset(path).pred_masked.plot()

In [None]:
# Load parquet input glacier grid file in monthly format (pre-processed)
sgi_id = 'B36-26'  # Aletsch
year = 2016
path_monthly_grids = f'../../../data/GLAMOS/topo/gridded_topo_inputs/SGI_regional_preds/{year}/'
df = pd.read_parquet(
    os.path.join(path_monthly_grids, f"{sgi_id}_grid_{year}.parquet"))
fig, axs = plt.subplots(1, 4, figsize=(15, 5))

df = df[df.MONTHS == 'sep']
voi = ['t2m', 'tp', 'hugonnet_dhdt', 'consensus_ice_thickness']
for i, var in enumerate(voi):
    sns.scatterplot(df,
                    x='POINT_LON',
                    y='POINT_LAT',
                    hue=var,
                    s=5,
                    alpha=0.5,
                    palette='twilight_shifted',
                    ax=axs[i])

## Analyze results:

### Look at 2016:

#### Mean predicted MB:

In [None]:
year = 2016
path_save_glw = f'../../../data/GLAMOS/distributed_MB_grids/MBM/swisswide/{year}/'
sgi_id_list = os.listdir(path_save_glw)

def get_mean_mb_year(year):
    path_save_glw = f'../../../data/GLAMOS/distributed_MB_grids/MBM/swisswide/{year}/'

    # Calculate mean predicted mb for each glacier
    rows = []
    for sgi_id in tqdm(sgi_id_list):
        gridd_mb = xr.open_dataset(
            os.path.join(path_save_glw, f"{sgi_id}/{sgi_id}_{year}_annual.zarr"))
        mean_value = gridd_mb.pred_masked.mean().values.item()
        rows.append({'sgi_id': sgi_id, 'mean_mb': mean_value})

    mean_mb = pd.DataFrame(rows)
    return mean_mb

In [None]:
mean_mb_2016 = get_mean_mb_year(2016)
mean_mb_2022 = get_mean_mb_year(2022)

#### Get volumes and areas:

In [None]:
def convert_id(id_str):
    return id_str.replace('/', '-')

path_volumes = '../../../data/GLAMOS/volumes/'
path_areas = '../../../data/GLAMOS/topo/SGI2020/inventory_sgi2016_r2020'

# Load the shapefile of volumes
volgdf = gpd.read_file(os.path.join(path_volumes, 'Summary.shp'))
volgdf['sgi-id'] = volgdf['pk_sgi'].apply(convert_id)  # directly creating 'sgi-id'
volgdf['V_2016'] = volgdf['V_2016'] * 10**9  # convert to m3

# Load the shapefile of areas
areagdf = gpd.read_file(os.path.join(path_areas, 'SGI_2016_glaciers.shp'))
areagdf['area_2016'] = areagdf['area_km2'] * 10**6  # convert to m2

# Load the mean mass balance data (assuming this is loaded before)
mean_mb_2016['sgi-id'] = mean_mb_2016['sgi_id'].apply(convert_id)
mean_mb_2022['sgi-id'] = mean_mb_2022['sgi_id'].apply(convert_id)

# Merge the dataframes on 'sgi-id'
glacier_info = volgdf[['sgi-id', 'V_2016']].merge(
    areagdf[['sgi-id', 'area_2016']], on='sgi-id', how='inner'
).merge(
    mean_mb_2016[['sgi-id', 'mean_mb']].rename(columns={'mean_mb': 'mean_mb_2016'}), on='sgi-id', how='inner'
).merge(
    mean_mb_2022[['sgi-id', 'mean_mb']].rename(columns={'mean_mb': 'mean_mb_2022'}), on='sgi-id', how='inner'
)

# Calculate volume changes
glacier_info['vol_change_2016'] = glacier_info['area_2016'] * glacier_info['mean_mb_2016']
glacier_info['vol_change_2022'] = glacier_info['area_2016'] * glacier_info['mean_mb_2022']

glacier_info.head()

In [None]:
glacier_info.area_2016.sum()/10**6

In [None]:
ref_CH_2016

#### Total vol change 2016:

In [None]:
vol_change_2016 = glacier_info['vol_change_2016'].sum() / 10**9  # convert to km3
volume_2016 = glacier_info['V_2016'].sum() / 10**9  # convert to km3
area_2016 = glacier_info['area_2016'].sum() / 10**6  # convert to km2
volume_2016, area_2016, vol_change_2016

# open reference GLAMOS
df_reference = pd.read_csv(
    '../../../data/GLAMOS/massbalance_swisswide_2024_r2024_clean.csv').iloc[1:]
ref_CH_2016 = df_reference[(df_reference.catchment == 'Switzerland')
                      & (df_reference.year == '2016')]

print('Volume change from GLAMOS:', ref_CH_2016['volume change'].values[0])
print('Volume change from MBM:', vol_change_2016)

In [None]:
vol_change_2022 = glacier_info['vol_change_2022'].sum() / 10**9  # convert to km3
volume_2022 = glacier_info['V_2016'].sum() / 10**9  # convert to km3
area_2022 = glacier_info['area_2016'].sum() / 10**6  # convert to km2
volume_2022, area_2022, vol_change_2022

# open reference GLAMOS
ref_CH_2022 = df_reference[(df_reference.catchment == 'Switzerland')
                      & (df_reference.year == '2022')]

print('Volume change from GLAMOS:', ref_CH_2022['volume change'].values[0])
print('Volume change from MBM:', vol_change_2022)

### Volume area scaling:

In [None]:
# calculate c for every glacier
beta = 1.36
glacier_info['c'] = volgdf['V_2016'] / (areagdf['area_2016']**beta)

# Inialisation:
density = 916.7  # kg/m3

t1, t2 = 2015, 2017
years = np.arange(t1, t2 + 1, 1)

# Initialize arrays to store volume and area
volume_ev = pd.DataFrame(columns=years, index=glacier_info.index)
area_ev = pd.DataFrame(columns=years, index=glacier_info.index)
mb_ev = pd.DataFrame(columns=years, index=glacier_info.index)

# Set initial conditions
volume_ev[2016] = glacier_info['V_2016'].values
area_ev[2016] = glacier_info['area_2016'].values
mb_ev[2016] = glacier_info['mean_mb_2016'].values

idx_16 = 2016

In [None]:
# Forward simulation
for i, y in enumerate(np.arange(2016, t2, 1)):
    # Calculate volume change
    volume_change = mb_ev[idx_16 + i] * area_ev[idx_16 + i] / density

    # Update volume
    volume_ev[idx_16 + i + 1] = np.maximum(
        volume_ev[idx_16 + i] + volume_change, 0)

    # Update area using V-A scaling
    area_ev[idx_16 + i + 1] = np.maximum(
        (volume_ev[idx_16 + i + 1] / glacier_info['c'])**(1 / beta), 0)

#Backward simulation
for i, y in enumerate(np.arange(2016, t1, -1)):
    # Calculate volume change
    volume_change = mb_ev[idx_16 - i] * area_ev[idx_16 - i] / density

    # Update volume
    volume_ev[idx_16 - i - 1] = np.maximum(
        volume_ev[idx_16 - i] - volume_change, 0)

    # Update area using V-A scaling
    area_ev[idx_16 - i - 1] = np.maximum(
        (volume_ev[idx_16 - i - 1] / glacier_info['c'])**(1 / beta), 0)

area_change = (area_ev.sub(area_ev[2016], axis=0)).div(area_ev[2016], axis=0)
volume_change = (volume_ev.sub(volume_ev[2016], axis=0))

In [None]:
area_change

In [None]:
"""Parameters
----------
gdir: `py_class:crampon.GlacierDirectory`
    The GlacierDirectory to process the ice thickness for.

Returns
-------
None
"""
#input_cal = 'swisstopo_exact_date'
input_cal = 'marty'

if input_cal == 'marty':
    print('calibration marty gmbs')
    marty_df = pd.read_csv(
        '/scratch-fourth/acremona/crampon/data/geodetic_mb/geod_mb_ch_linear_1std_bin_50_t100_500_tband_01.csv',
        encoding="iso-8859-1")
else:
    marty_df = pd.read_csv(
        '/scratch-fourth/acremona/crampon/data/geodetic_mb/geodetic_mb_swisstopo_exact_dates.csv',
        encoding="iso-8859-1")

volgdf = gpd.read_file(
    '/scratch-fourth/acremona/crampon/data/volumes/Summary.shp')

if mb_model:
    if type(mb_model) == list:
        mb_models = mb_model
    else:
        mb_models = [mb_model]
else:
    mb_models = [eval(m) for m in cfg.MASSBALANCE_MODELS]

density = cfg.RHO / cfg.RHO_W

heights, widths = gdir.get_inversion_flowline_hw()

#iterate over marty df for gdir
marty_df_gdir = marty_df[marty_df.RGI_Id == gdir.rgi_id]
marty_df_gdir = marty_df_gdir.drop(
    marty_df_gdir[marty_df_gdir.geod_mb_mwey_1.isna()].index)

if marty_df_gdir.empty:
    print('No geodetic mass balance available for calibrations')
    return

df_indexes = marty_df_gdir.index.values

for inde in df_indexes:
    skipcali = False
    if os.path.exists(
            gdir.get_filepath('calibration', filesuffix='_marty_multiple')):
        os.remove(
            gdir.get_filepath('calibration', filesuffix='_marty_multiple'))

    cal_data = marty_df_gdir.loc[inde]

    t1_date = pd.to_datetime(cal_data.date_1, dayfirst=True)
    t2_date = pd.to_datetime(cal_data.date_2, dayfirst=True)

    years = np.arange(t1_year_run, t2_year_run + 1, 1)

    #inputs v-a scaling
    volume_inventory_16 = volgdf[volgdf.pk_sgi == '/'.join(
        gdir.rgi_id.split('.')[1].split('-'))].V_2016.values[0] * 10**9
    area_inventory_16 = gdir.area_km2 * 10**6  # in km2!!

    # calculate c for every glacier
    c = volume_inventory_16 / (area_inventory_16**beta)

    idx_16 = 2016 - t1_year_run

    try:
        area_df = pd.read_csv(gdir.get_filepath('area_change'))

        if f'area_change_rate_perc_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}' in area_df.drop(
            ['year'], axis=1).columns:
            continue

    except FileNotFoundError:
        data = {'year': years}
        area_df = pd.DataFrame(data=data)

    try:
        param_df = pd.read_csv(
            gdir.get_filepath('initial_calibrated_parameters'))

        if f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}' in param_df.drop(
            ['params'], axis=1).columns:
            skipcali = True
            print((param_df[param_df.params == 'HockModel_mu_hock']))
            mu_hock = (
                param_df[param_df.params == 'HockModel_mu_hock']
            )[f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'].values[
                0]
            a_ice = (
                param_df[param_df.params == 'HockModel_a_ice']
            )[f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'].values[
                0]
            p_fh = (
                param_df[param_df.params == 'HockModel_prcp_fac']
            )[f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'].values[
                0]

            tf = (
                param_df[param_df.params == 'PellicciottiModel_tf']
            )[f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'].values[
                0]
            srf = (
                param_df[param_df.params == 'PellicciottiModel_srf']
            )[f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'].values[
                0]
            p_fp = (
                param_df[param_df.params == 'PellicciottiModel_prcp_fac']
            )[f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'].values[
                0]

            if np.isnan([mu_hock, a_ice, p_fh, tf, srf, p_fp]).any():
                area_df.loc[:,
                            f'area_change_rate_perc_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'] = np.zeros_like(
                                years) * np.nan
                area_df.to_csv(gdir.get_filepath('area_change'), index=False)
                continue

    except FileNotFoundError:
        # uncomment when the initial parameter file should be writen!!!

        to_calibrate_csv = []
        for mo in mb_models:
            for i in mo.cali_params_guess.keys():
                to_calibrate_csv.append(mo.prefix + i)
        print(to_calibrate_csv)
        data = {'params': to_calibrate_csv}
        param_df = pd.DataFrame(data=data)
        print(param_df)

    out_params = []
    braker = False
    for count in range(2):
        mb_ens = []
        #if braker:
        #    break
        for m in mb_models:
            print(m)
            if skipcali:
                if count == 0:
                    cfg.PARAMS['geometry_evolution'] = False

                    if m.prefix == 'HockModel_':
                        day_model = m(gdir,
                                      mu_hock=mu_hock,
                                      a_ice=a_ice,
                                      prcp_fac=p_fh,
                                      bias=0.,
                                      snow_redist=False)
                    elif m.prefix == 'PellicciottiModel_':
                        day_model = m(gdir,
                                      tf=tf,
                                      srf=srf,
                                      prcp_fac=p_fp,
                                      bias=0.,
                                      snow_redist=False)
                    else:
                        raise NotImplementedError
                        print('use Hock and Pellicciotti Models')

                else:
                    cfg.PARAMS['geometry_evolution'] = True

                    if m.prefix == 'HockModel_':
                        day_model = m(gdir,
                                      mu_hock=mu_hock,
                                      a_ice=a_ice,
                                      prcp_fac=p_fh,
                                      bias=0.,
                                      snow_redist=False,
                                      area_change_dict=area_change_dict)
                    elif m.prefix == 'PellicciottiModel_':
                        day_model = m(gdir,
                                      tf=tf,
                                      srf=srf,
                                      prcp_fac=p_fp,
                                      bias=0.,
                                      snow_redist=False,
                                      area_change_dict=area_change_dict)
                    else:
                        raise NotImplementedError
                        print('use Hock and Pellicciotti Models')

            else:
                if count == 0:
                    cfg.PARAMS['geometry_evolution'] = False

                    if p_fac:
                        param_dict = m.cali_params_guess.copy()
                        p = {}
                        for k, v in param_dict.items():
                            if k not in ['prcp_fac']:
                                p.update({k: v})
                            else:
                                p.update({k: p_fac})
                        pdict = p

                        calibrate_mb_model_on_geod_mb_one_paramset_snowlines(
                            gdir,
                            mb_model=m,
                            conv_thresh=0.005,
                            initial_param_guess=pdict,
                            gmb_delta=0.,
                            it_thresh=5,
                            cali_suffix='_marty_multiple',
                            cal_data=cal_data)

                    else:
                        calibrate_mb_model_on_geod_mb_one_paramset_snowlines(
                            gdir,
                            mb_model=m,
                            conv_thresh=0.005,
                            gmb_delta=0.,
                            it_thresh=5,
                            cali_suffix='_marty_multiple',
                            cal_data=cal_data)

                    day_model = m(gdir,
                                  bias=0.,
                                  cali_suffix='_marty_multiple',
                                  snow_redist=False)

                    for p in day_model.cali_params_list:
                        if len(
                                getattr(day_model, p).dropna().values
                        ) == 0:  #np.isnan(np.unique(getattr(day_model, p))):
                            braker = True

                            area_df.loc[:,
                                        f'area_change_rate_perc_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'] = np.zeros_like(
                                            years) * np.nan
                            area_df.to_csv(gdir.get_filepath('area_change'),
                                           index=False)

                            param_df.loc[:,
                                         f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'] = [
                                             np.nan, np.nan, np.nan, np.nan,
                                             np.nan, np.nan
                                         ]
                            param_df.to_csv(gdir.get_filepath(
                                'initial_calibrated_parameters'),
                                            index=False)
                            #continue

                            break
                        else:
                            out_params.append(
                                np.unique(
                                    getattr(day_model, p).dropna().values)[0])
                    if braker:
                        break
                else:
                    cfg.PARAMS['geometry_evolution'] = True
                    day_model = m(gdir,
                                  bias=0.,
                                  cali_suffix='_marty_multiple',
                                  snow_redist=False,
                                  area_change_dict=area_change_dict)

            mb_years = []
            for y in years:
                tmp = day_model.get_specific_mb(heights, widths, year=y)
                mb_years.append(tmp)
            mb_ens.append(mb_years)

        if braker:
            break

        mb_med = np.median(mb_ens, axis=0)
        print('mb_med', mb_med)
        # Initialize arrays to store volume and area
        volume = np.zeros_like(years)
        area = np.zeros_like(years)

        # Set initial conditions
        volume[idx_16] = volume_inventory_16
        area[idx_16] = area_inventory_16

        # Forward simulation
        for i, y in enumerate(np.arange(2016, t2_year_run, 1)):
            # Calculate volume change
            volume_change = mb_med[idx_16 + i] * area[idx_16 + i] / density

            # Update volume
            volume[idx_16 + i + 1] = volume[idx_16 + i] + volume_change
            volume[idx_16 + i + 1] = max(
                volume[idx_16 + i + 1], 0)  # Ensure volume doesn't go negative

            # Calculate area using V-A scaling
            area[idx_16 + i + 1] = (volume[idx_16 + i + 1] / c)**(1 / beta)
            area[idx_16 + i + 1] = max(area[idx_16 + i + 1],
                                       0)  # Ensure area doesn't go negative
        #Backward simulation
        for i, y in enumerate(np.arange(2016, t1_year_run, -1)):
            # Calculate volume change
            volume_change = mb_med[idx_16 - i] * area[idx_16 - i] / density
            # Update volume
            volume[idx_16 - i - 1] = volume[idx_16 - i] - volume_change
            volume[idx_16 - i - 1] = max(
                volume[idx_16 - i - 1], 0)  # Ensure volume doesn't go negative
            # Calculate area using V-A scaling
            area[idx_16 - i - 1] = (volume[idx_16 - i - 1] / c)**(1 / beta)
            area[idx_16 - i - 1] = max(area[idx_16 - i - 1],
                                       0)  # Ensure area doesn't go negative

        area_change = (area - area_inventory_16) / area_inventory_16
        volume_change = volume - volume_inventory_16

        area_change_dict = dict(zip(years, area_change))

    if braker:
        continue

    area_df.loc[:,
                f'area_change_rate_perc_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'] = area_change
    area_df.to_csv(gdir.get_filepath('area_change'), index=False)

    if not skipcali:
        param_df.loc[:,
                     f'params_{t1_date.day}_{t1_date.month}_{t1_date.year}_{t2_date.day}_{t2_date.month}_{t2_date.year}_{input_cal}'] = out_params
        param_df.to_csv(gdir.get_filepath('initial_calibrated_parameters'),
                        index=False)
