# OGGM Glacier Modeling - HMA CMIP6 Scenarios

This notebook performs glacier state simulation using the OGGM model with CMIP6 climate scenarios for future projections. The workflow is refactored from the original regional analysis to provide a complete modeling pipeline including:

- Historical simulation with ERA5-Land data
- CMIP6 climate data processing
- Multiple SSP scenario projections (SSP126, SSP245, SSP370, SSP585)
- Comprehensive sensitivity analysis


## Configuration 

In [None]:
# Configuration for HMA region
subregion = 'HMA'
boundary_shapefile = '/mnt/402D567E601BAE10/OGGM/tmp/HMA_one.shp'
working_dir = '/mnt/3FE827E84836B503/OGGM_results/climate_extreme_HMA_glacier_modeling_CMIP6_v3'

# ERA5-Land local data paths (adapt these paths as needed)
era5_local_dir = {
        'tmp':'/mnt/3FE827E84836B503/ERA5_Land_monthly/era5_land_monthly_t2m_1950-2025_flat_HMA.nc',
        'pre':'/mnt/3FE827E84836B503/ERA5_Land_monthly/era5_land_monthly_prcp_1950-2025_flat_HMA.nc',
        'inv':'/mnt/3FE827E84836B503/ERA5_Land_monthly/era5_land_invariant_flat_HMA.nc'
}

# RGI regions covering Qilian Shan
rgi_regions = [13, 14, 15]  # Central Asia, South Asia East, South Asia West
rgi_version = '62'

# Projection settings
projection_start_year = 2025
projection_end_year = 2100

# CMIP6 scenarios to analyze
cmip6_scenarios = ['ssp126', 'ssp245', 'ssp370', 'ssp585']
cmip6_models = ['BCC-CSM2-MR', 'CAMS-CSM1-0', 'CESM2', 'CESM2-WACCM', 
                'EC-Earth3', 'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4',
                'INM-CM4-8', 'INM-CM5-0', 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM']


## Import libraries

In [None]:
# Libraries
import os
from time import gmtime, strftime
import geopandas as gpd
import shapely.geometry as shpg
import xarray as xr
import numpy as np
import pandas as pd

# OGGM imports
from oggm import utils, workflow, tasks, graphics, global_tasks, shop
import oggm.cfg as cfg
from oggm.shop import gcm_climate, ecmwf
from oggm.core import massbalance, climate
from oggm.core.massbalance import MultipleFlowlineMassBalance
from oggm.shop import create_scieno

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import STL
import statsmodels.api as sm
from statsmodels.robust.robust_linear_model import RLM
from statsmodels.robust.norms import HuberT

import pickle
from datetime import datetime as dt

# Set plotting style
sns.set_style('ticks')
sns.set_context('notebook')


## Initialize OGGM


In [None]:
# Initialize OGGM
cfg.initialize(logging_level='WARNING')

# Enable multiprocessing
cfg.PARAMS['use_multiprocessing'] = True
#cfg.PARAMS['mp_processes'] = 96

# Enable geometry storage for analysis
cfg.PARAMS['store_model_geometry'] = True
cfg.PARAMS['store_fl_diagnostics'] = True

# Set working directory
cfg.PATHS['working_dir'] = utils.mkdir(working_dir, reset=False)  # reset to True for first use
cfg.PARAMS['cfl_min_dt'] = 30
cfg.PARAMS['continue_on_error'] = True
cfg.PATHS['rgi_dir'] = './rgi'


## Select HMA glaciers

In [None]:
# Read HMA boundary shapefile
basin = gpd.read_file(boundary_shapefile)

# Get RGI glaciers within the boundary for all specified regions
gdf_sel = gpd.GeoDataFrame()
for region in rgi_regions:
    fr = utils.get_rgi_region_file(region, version=rgi_version)
    gdf = gpd.read_file(fr)
    in_bas = [basin.geometry.contains(shpg.Point(x, y))[0] for (x, y) in zip(gdf.CenLon, gdf.CenLat)]
    gdf_region = gdf.loc[in_bas]
    gdf_sel = gdf_sel._append(gdf_region)

print(f"Selected {len(gdf_sel)} glaciers in HMA region")


In [None]:
gdirs = workflow.init_glacier_directories(gdf_sel['RGIId'].tolist())
print(f"Initialized {len(gdirs)} glacier directories")

In [None]:
# Initialize glacier directories from pre-processed data
base_url = 'https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L1-L2_files/centerlines/'
gdirs = workflow.init_glacier_directories(gdf_sel, from_prepro_level=2, prepro_border=160, 
                                          prepro_base_url=base_url)
print(f"Initialized {len(gdirs)} glacier directories")

In [None]:
gdir_file = cfg.PATHS['working_dir'] + '/gdir_list.pkl'
with open(gdir_file, 'wb') as f:
    pickle.dump(gdirs, f, protocol=pickle.HIGHEST_PROTOCOL)
    

In [None]:
gdir_file = cfg.PATHS['working_dir'] + '/gdir_list.pkl'
with open(gdir_file, 'rb') as f:
    gdirs = pickle.load(f)

# gdirs

In [None]:
# Filter out glaciers without complete preprocessing data
path = cfg.PATHS['working_dir'] + '/per_glacier'
incomplete_glaciers = []

if os.path.exists(path):
    for root, dirs, files in os.walk(path):
        parts = root.replace(path, '').strip(os.sep).split(os.sep)
        if len(parts) == 3 and parts[2].startswith('RGI'):
            inversion_file = os.path.join(root, 'inversion_flowlines.pkl')
            if not os.path.exists(inversion_file):
                incomplete_glaciers.append(parts[2])

# Remove incomplete glaciers
if len(incomplete_glaciers) > 0:
    print(f"Removing {len(incomplete_glaciers)} incomplete glaciers...")
    gdirs = [gdir for gdir in gdirs if gdir.rgi_id not in incomplete_glaciers]
    print(f"Remaining glaciers: {len(gdirs)}")


In [None]:
## SELECTE 500 GLACIERS FOR TEST

start_idx, end_idx = 85400, 85900
gdirs = gdirs[start_idx:end_idx]
# gdirs

# random_indices = np.random.choice(50000, 500, replace=False)
# random_indices = np.sort(random_indices) # 500 Random Numbers
# gdirs = [gdirs[i] for i in random_indices]


## Climate preprocessing

### Historical ERA5-Land

In [None]:
# Process ERA5-Land data for Qilian Shan
print("Processing ERA5-Land data (this may take a while)...")
workflow.execute_entity_task(ecmwf.process_ecmwf_data, gdirs, dataset='ERA5L-LATEST',
                            download=False, local_path_dict=era5_local_dir)

# Remove glaciers without valid climate data
valid_gdirs = []
for gdir in gdirs:
    fpath = gdir.dir + '/climate_historical.nc'
    try:
        tmp = xr.open_dataset(fpath)
        valid_gdirs.append(gdir)
        tmp.close()
    except:
        pass
gdirs = valid_gdirs
print(f"Valid glaciers: {len(gdirs)}")

# Compile historical climate data
climate_hist = utils.compile_climate_input(gdirs, filename='climate_historical', input_filesuffix='', path=False)
climate_hist.to_netcdf(cfg.PATHS['working_dir']+'/ERA5_'+subregion+ f'_{start_idx}-{end_idx-1}.nc')

print("Historical climate data compiled")


### CMIP6 Processing

In [None]:
# Download and process CMIP6 data for each model and scenario
print("Processing CMIP6 data for future projections...")

# Base URLs for CMIP6 data (update as needed)
base_url_temp = '/mnt/402D567E601BAE10/OGGM/download_cache/cluster.klima.uni-bremen.de/~oggm/cmip6/GCM/{}/{}_{}_r1i1p1f1_tas.nc'
base_url_precip = '/mnt/402D567E601BAE10/OGGM/download_cache/cluster.klima.uni-bremen.de/~oggm/cmip6/GCM/{}/{}_{}_r1i1p1f1_pr.nc'

# Process each GCM and SSP scenario combination
for gcm in cmip6_models:
    print(f"Processing model: {gcm}")
    for ssp in cmip6_scenarios:
        # print(f"  - Scenario: {ssp}")
        
        # File paths for temperature and precipitation
        ft = base_url_temp.format(gcm, gcm, ssp)
        fp = base_url_precip.format(gcm, gcm, ssp)
        
        try:
            # Process CMIP6 data for glaciers
            workflow.execute_entity_task(
                gcm_climate.process_cmip_data, gdirs,
                filesuffix='_{}_{}'.format(gcm, ssp),
                year_range=('2000', '2019'),  # Standard CMIP6 projection period, try different year ranges,include 2019
                fpath_temp=ft,
                fpath_precip=fp
            )
        except Exception as e:
            print(f"    Warning: Failed to process {gcm} {ssp}: {e}")
            continue

print("CMIP6 data processing complete")


### Plot corrected temp and prcp series in CMIP6 data

In [None]:
gcm_all = []  # in this array all datasets going to be stored with additional coordinates GCM and SSP
creation_date = strftime("%Y-%m-%d %H:%M:%S", gmtime())  # here add the current time for info
for GCM in cmip6_models:  # loop through all GCMs
    for SSP in cmip6_scenarios:  # loop through all SSPs
        # rid = '_{}_{}_0019cor'.format(GCM, SSP)
        rid = '_{}_{}'.format(GCM, SSP)
        fpath = gdirs[0].dir+'/gcm_data'+rid+'.nc'
        if os.path.exists(fpath) == False:
            continue
        else:
            gcm_tmp = utils.compile_climate_input(gdirs, filename='gcm_data', input_filesuffix=rid, path=False)  # 聚合所有冰川的CMIP6气候数据
        # compile_climate_input: month to date float, 1950-1-15 -> 1950.0
        # gcm_tmp = xr.open_dataset(cfg.PATHS['working_dir']+'/climate_input'+rid+'.nc')

        gcm_tmp.coords['GCM'] = GCM  # add GCM as a coordinate
        gcm_tmp.coords['GCM'].attrs['description'] = 'used Global circulation Model'  # add a description for GCM
        gcm_tmp = gcm_tmp.expand_dims("GCM")  # add GCM as a dimension to all Data variables

        gcm_tmp.coords['SSP'] = SSP  # add SSP as a coordinate
        gcm_tmp.coords['SSP'].attrs['description'] = 'used Representative Concentration Pathway'  # add a description for SSP
        gcm_tmp = gcm_tmp.expand_dims("SSP")  # add SSP as a dimension to all Data variables

        gcm_tmp.attrs['creation_date'] = creation_date  # also add todays date for info
        gcm_all.append(gcm_tmp)  # add the dataset with extra coordinates to our final ds_all array

        gcm_tmp.close()
    
gcm_merged = xr.combine_by_coords(gcm_all, fill_value=np.nan)  # define how the missing GCM, SSP combinations should be filled
gcm_merged.to_netcdf(cfg.PATHS['working_dir']+'/GCM_'+subregion+ f'_cmip6_{start_idx}-{end_idx-1}.nc')
gcm_merged

In [None]:
climate_hist = xr.open_dataset(cfg.PATHS['working_dir']+'/ERA5_'+subregion+ f'_{start_idx}-{end_idx-1}.nc')
# Calculate regional mean
climate_hist_region = climate_hist.mean(dim='rgi_id', skipna=True, keep_attrs=True)#看一下

gcm_merged = xr.open_dataset(cfg.PATHS['working_dir']+'/GCM_'+subregion+ f'_cmip6_{start_idx}-{end_idx-1}.nc')

In [None]:
mean_four_ssp = gcm_merged.mean(dim='rgi_id',  # over which dimension the sum should be taken, here we want to sum up over all glacier ids
                            skipna=True,  # ignore nan values
                            keep_attrs=True  # keep the variable descriptions
                                    ).mean(dim='GCM',  # over which dimension the sum should be taken, here we want to sum up over all glacier ids
                            skipna=True,  # ignore nan values
                            keep_attrs=True # keep the variable descriptions
                                        ).sel(time=slice("1950", "2101"))# time to float
mean_four_ssp

In [None]:
date_label = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")
mean_four_ssp['time'] = date_label

fig, axes = plt.subplots(1, 2 , figsize=(12, 5))
for i, ssp in enumerate(cmip6_scenarios):
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.index, selected_ssp['prcp'], label=ssp, alpha=0.8, color=plt.get_cmap('Accent')(i))
    axes[1].plot(selected_ssp.index, selected_ssp['temp'], label=ssp, alpha=0.8, color=plt.get_cmap('Accent')(i))
    # STL
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    
    axes[0].plot(prcp_trend.index, prcp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))

axes[0].legend()
axes[1].legend()
axes[0].set_title('Precipitation')
axes[1].set_title('Temperature')
plt.show()


In [None]:
date_label = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")
mean_four_ssp['time'] = date_label

fig, axes = plt.subplots(1, 2 , figsize=(12, 5))
for i, ssp in enumerate(cmip6_scenarios):
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.resample('AS').sum().index, selected_ssp.resample('AS').sum()['prcp'], label=ssp, color=plt.get_cmap('Accent')(i))
    axes[1].plot(selected_ssp.resample('AS').mean().index, selected_ssp.resample('AS').mean()['temp'], label=ssp, color=plt.get_cmap('Accent')(i))
    # STL
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    
    axes[0].plot(prcp_trend.index, prcp_trend.values*12, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    
axes[0].legend()
axes[1].legend()
axes[0].set_title('Precipitation (mm/yr)')
axes[1].set_title('Temperature (°C)')
plt.show()


### Compare corrected CMIP6 with ERA5-Land

In [None]:
mean_four_ssp = gcm_merged.mean(dim='rgi_id',
                                skipna=True,
                                keep_attrs=True  # keep the variable descriptions
                                    ).mean(dim='GCM',
                                skipna=True,  # ignore nan values
                                keep_attrs=True # keep the variable descriptions
                                ).sel(time=slice("1950", "2101")) #to 2024-12

compare_era5 = climate_hist_region.sel(time=slice("1950", "2025"))

date_label = pd.date_range(start="01/01/1950", end="12/01/2024", freq="MS")
mean_four_ssp['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")
compare_era5['time'] = date_label

fig, axes = plt.subplots(1, 2 , figsize=(12, 5))

for i, ssp in enumerate(cmip6_scenarios):
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.index, selected_ssp['prcp'], label=ssp, color=plt.get_cmap('Accent')(i)) #mm/yr
    axes[1].plot(selected_ssp.index, selected_ssp['temp'], label=ssp, color=plt.get_cmap('Accent')(i)) #°C
    # STL
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    
    axes[0].plot(prcp_trend.index, prcp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))

era5_df = compare_era5.to_dataframe()
axes[0].plot(era5_df.index, era5_df['prcp'], label='ERA5-Land', color='black', alpha=0.5, linestyle='--')
axes[1].plot(era5_df.index, era5_df['temp'], label='ERA5-Land', color='black', alpha=0.5, linestyle='--')
prcp_trend = STL(era5_df['prcp'], period=12, trend=241, seasonal=13).fit().trend
temp_trend = STL(era5_df['temp'], period=12, trend=241, seasonal=13).fit().trend
axes[0].plot(prcp_trend.index, prcp_trend.values, label='ERA5_trend', color='k')
axes[1].plot(temp_trend.index, temp_trend.values, label='ERA5_trend', color='k')

# axes[0].legend()
# axes[1].legend()
axes[0].set_title('Precipitation (mm/month)')
axes[1].set_title('Temperature (°C)')
axes[0].set_xlim(dt(2008,1,1), dt(2025,1,1))
axes[1].set_xlim(dt(2012,1,1), dt(2025,1,1))

axes[0].set_ylim(0,50)
axes[1].set_ylim(2,8)

plt.show()

In [None]:
fig, axes = plt.subplots(1, 2 , figsize=(12, 5))
for i, ssp in enumerate(cmip6_scenarios):
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.resample('AS').sum().index, selected_ssp.resample('AS').sum()['prcp'], label=ssp
                , color=plt.get_cmap('Accent')(i))
    axes[1].plot(selected_ssp.resample('AS').mean().index, selected_ssp.resample('AS').mean()['temp'], label=ssp
                , color=plt.get_cmap('Accent')(i))
    # STL
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    
    axes[0].plot(prcp_trend.index, prcp_trend.values*12, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))

era5_df = compare_era5.to_dataframe()
axes[0].plot(era5_df.resample('AS').sum().index, era5_df.resample('AS').sum()['prcp'], label='ERA5-Land', color='black')
axes[1].plot(era5_df.resample('AS').mean().index, era5_df.resample('AS').mean()['temp'], label='ERA5-Land', color='black')

prcp_trend = STL(era5_df['prcp'], period=12, trend=241, seasonal=13).fit().trend
temp_trend = STL(era5_df['temp'], period=12, trend=241, seasonal=13).fit().trend
axes[0].plot(prcp_trend.index, prcp_trend.values*12, label='ERA5_trend', color='k')
axes[1].plot(temp_trend.index, temp_trend.values, label='ERA5_trend', color='k')

axes[0].legend()
axes[1].legend()
axes[0].set_title('Precipitation (mm/year)')
axes[1].set_title('Temperature (°C)')
axes[0].set_xlim(dt(2000,1,1), dt(2035,1,1))
axes[1].set_xlim(dt(2000,1,1), dt(2035,1,1))

# axes[1].set_ylim(-15, -12)

plt.show()

In [None]:
target_time_range = pd.date_range(start='2018-01-01', end='2024-12-01', freq='MS')

for i, ssp in enumerate(cmip6_scenarios):
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend

    print(f'{ssp}: {temp_trend.loc[target_time_range].mean()}')
    

era5_df = compare_era5.to_dataframe()
temp_trend = STL(era5_df['temp'], period=12, trend=241, seasonal=13).fit().trend
print(f'ERA5-Land: {temp_trend.loc[target_time_range].mean()}')

### Comparison for a single glacier

In [None]:
idx = 21

mean_four_ssp = gcm_merged.sel(rgi_id=gdirs[idx].rgi_id
                            ).mean(dim='GCM',
                                skipna=True,  # ignore nan values
                                keep_attrs=True # keep the variable descriptions
                                ).sel(time=slice("1950", "2101")) #to 2024-12

compare_era5 = climate_hist.sel(rgi_id=gdirs[idx].rgi_id).sel(time=slice("1950", "2025"))

date_label = pd.date_range(start="01/01/1950", end="12/01/2024", freq="MS")
mean_four_ssp['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")
compare_era5['time'] = date_label

fig, axes = plt.subplots(1, 2 , figsize=(12, 5))

for i, ssp in enumerate(cmip6_scenarios):
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.index, selected_ssp['prcp'], label=ssp, color=plt.get_cmap('Accent')(i)) #mm/yr
    axes[1].plot(selected_ssp.index, selected_ssp['temp'], label=ssp, color=plt.get_cmap('Accent')(i)) #°C
    # STL
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    
    axes[0].plot(prcp_trend.index, prcp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))

era5_df = compare_era5.to_dataframe()
axes[0].plot(era5_df.index, era5_df['prcp'], label='ERA5-Land', color='black', alpha=0.5, linestyle='--')
axes[1].plot(era5_df.index, era5_df['temp'], label='ERA5-Land', color='black', alpha=0.5, linestyle='--')
prcp_trend = STL(era5_df['prcp'], period=12, trend=241, seasonal=13).fit().trend
temp_trend = STL(era5_df['temp'], period=12, trend=241, seasonal=13).fit().trend
axes[0].plot(prcp_trend.index, prcp_trend.values, label='ERA5_trend', color='k')
axes[1].plot(temp_trend.index, temp_trend.values, label='ERA5_trend', color='k')

axes[0].legend()
# axes[1].legend()
axes[0].set_title(f'Precipitation (mm/month) {gdirs[idx].rgi_id}')
axes[1].set_title(f'Temperature (°C) {gdirs[idx].rgi_id}')
axes[0].set_xlim(dt(2000,1,1), dt(2035,1,1))
axes[1].set_xlim(dt(2000,1,1), dt(2035,1,1))

axes[1].set_ylim(2,8)

plt.show()

In [None]:
fig, axes = plt.subplots(1, 2 , figsize=(12, 5))
for i, ssp in enumerate(cmip6_scenarios):
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.resample('AS').sum().index, selected_ssp.resample('AS').sum()['prcp'], label=ssp
                , color=plt.get_cmap('Accent')(i))
    axes[1].plot(selected_ssp.resample('AS').mean().index, selected_ssp.resample('AS').mean()['temp'], label=ssp
                , color=plt.get_cmap('Accent')(i))
    # STL
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    
    axes[0].plot(prcp_trend.index, prcp_trend.values*12, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))

era5_df = compare_era5.to_dataframe()
axes[0].plot(era5_df.resample('AS').sum().index, era5_df.resample('AS').sum()['prcp'], label='ERA5-Land', color='black')
axes[1].plot(era5_df.resample('AS').mean().index, era5_df.resample('AS').mean()['temp'], label='ERA5-Land', color='black')

prcp_trend = STL(era5_df['prcp'], period=12, trend=241, seasonal=13).fit().trend
temp_trend = STL(era5_df['temp'], period=12, trend=241, seasonal=13).fit().trend
axes[0].plot(prcp_trend.index, prcp_trend.values*12, label='ERA5_trend', color='k')
axes[1].plot(temp_trend.index, temp_trend.values, label='ERA5_trend', color='k')

axes[0].legend()
axes[1].legend()
axes[0].set_title(f'Precipitation (mm/year) {gdirs[idx].rgi_id}')
axes[1].set_title(f'Temperature (°C) {gdirs[idx].rgi_id}')
axes[0].set_xlim(dt(2000,1,1), dt(2035,1,1))
axes[1].set_xlim(dt(2000,1,1), dt(2035,1,1))

# axes[1].set_ylim(-8, -2)

plt.show()

### simulate extreme fluctuations for CMIP6

In [None]:
# four ssps, 13 GCMs for all rgis
target_cmip = gcm_merged.sel(time=slice("1950", "2101")) #to 2100-12, because time in gcm_merged is float
# for all rgis
ref_era5 = climate_hist.sel(time=slice("1950", "2025"))
ref_era5['time'] = pd.date_range(start="01/01/1950", end="12/01/2024", freq="MS")
target_cmip['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")

adjust_cmip = []

for gdir in gdirs:
    print(f'Simulate extremes for {gdir.rgi_id}')
    
    ref_date_range = pd.date_range(start=f"01/01/1990", end=f"12/01/2024", freq="MS")
    ref_era5_df = ref_era5.sel(rgi_id=gdir.rgi_id).to_dataframe().loc[ref_date_range] # 1990-2024

    for GCM in cmip6_models:  # loop through all GCMs
        print(f'    {GCM}:')
        for SSP in cmip6_scenarios:  # loop through all SSPs
            selected_cmip = target_cmip.sel(rgi_id=gdir.rgi_id, GCM=GCM, SSP=SSP)
            selected_cmip_df = selected_cmip.to_dataframe()
            selected_cmip_df['year'] = selected_cmip_df.index.year
            selected_cmip_df['month'] = selected_cmip_df.index.month
            print(f"        {SSP}: {(~selected_cmip_df['prcp'].notna()).sum()} NaNs; {(selected_cmip_df['prcp']==0).sum()} zeros")

            selected_cmip_df_nona = selected_cmip_df.loc[selected_cmip_df['temp'].notna()]
            max_date_nona = selected_cmip_df_nona.index.max()

            # score for future years
            future_cmip_df = selected_cmip_df.loc[(selected_cmip_df['year']>=2025) & (selected_cmip_df.index<=max_date_nona)] #monthly data
            hist_cmip_df = selected_cmip_df.loc[ref_date_range]

            adjusted_temp = selected_cmip_df['temp'].copy()
            adjusted_prcp = selected_cmip_df['prcp'].copy()

            # future adjustment using QDM
            for m in range(1, 13): # Loop 1 to 12
                m_hist_era5_df = ref_era5_df[ref_era5_df.index.month==m]
                m_hist_cmip_df = hist_cmip_df[hist_cmip_df.index.month==m]
                m_future_cmip_df = future_cmip_df[future_cmip_df.index.month==m]

                for date, date_info in m_future_cmip_df.iterrows():
                    cmip_temp = date_info['temp']
                    τ_temp = (m_future_cmip_df['temp']<=cmip_temp).sum()/len(m_future_cmip_df['temp']) # quantile of future temp
                    
                    hist_era5_τ_temp = np.nanpercentile(m_hist_era5_df['temp'], τ_temp*100)
                    hist_cmip6_τ_temp = np.nanpercentile(m_hist_cmip_df['temp'], τ_temp*100)
                    corrected_cmip_temp = hist_era5_τ_temp + (cmip_temp - hist_cmip6_τ_temp)
                    # for prcp, times
                    cmip_prcp = date_info['prcp']
                    τ_prcp = (m_future_cmip_df['prcp']<=cmip_prcp).sum()/len(m_future_cmip_df['prcp']) # quantile of future prcp

                    hist_era5_τ_prcp = np.nanpercentile(m_hist_era5_df['prcp'], τ_prcp*100)
                    hist_cmip6_τ_prcp = np.nanpercentile(m_hist_cmip_df['prcp'], τ_prcp*100)
                    scalling_factor = cmip_prcp / hist_cmip6_τ_prcp
                    if (scalling_factor > 4) or (np.isinf(scalling_factor)) or (np.isnan(scalling_factor)):
                        scalling_factor = np.nanpercentile(m_future_cmip_df['prcp'], 50) / np.nanpercentile(m_hist_cmip_df['prcp'], 50) # use general trend
                        if (scalling_factor > 4) or (np.isinf(scalling_factor)) or (np.isnan(scalling_factor)):
                            scalling_factor = np.nanpercentile(future_cmip_df['prcp'], 50) / np.nanpercentile(hist_cmip_df['prcp'], 50)

                    corrected_cmip_prcp = hist_era5_τ_prcp * scalling_factor

                    adjusted_temp.loc[date] = corrected_cmip_temp
                    adjusted_prcp.loc[date] = corrected_cmip_prcp

            adjusted_prcp = adjusted_prcp.mask(adjusted_prcp<0, 0)
            adjusted_df = pd.DataFrame({
                'prcp': adjusted_prcp,
                'temp': adjusted_temp
            })
            # turn to nc
            adjusted_ds = selected_cmip.copy(deep=False)
            
            # 更新 prcp 和 temp 的值
            adjusted_ds['prcp'] = xr.DataArray(
                adjusted_df['prcp'].values,
                dims=selected_cmip.prcp.dims,
                coords={dim: selected_cmip.coords[dim] for dim in selected_cmip.prcp.dims},
                attrs=selected_cmip.prcp.attrs if hasattr(selected_cmip.prcp, 'attrs') else {}
            )
            adjusted_ds['temp'] = xr.DataArray(
                adjusted_df['temp'].values,
                dims=selected_cmip.temp.dims,
                coords={dim: selected_cmip.coords[dim] for dim in selected_cmip.temp.dims},
                attrs=selected_cmip.temp.attrs if hasattr(selected_cmip.temp, 'attrs') else {}
            )
            
            dims_to_expand = []
            for coord_name in ['rgi_id', 'GCM', 'SSP']:
                if coord_name in adjusted_ds.coords and coord_name not in adjusted_ds.dims:
                    dims_to_expand.append(coord_name)
            
            if dims_to_expand:
                adjusted_ds = adjusted_ds.expand_dims(dims_to_expand)

            adjust_cmip.append(adjusted_ds)
            print(f"        {SSP}: {(~adjusted_df['prcp'].notna()).sum()} NaNs; {(adjusted_df['prcp']==0).sum()} zeros")
            

adjust_cmip_merged = xr.combine_by_coords(adjust_cmip, fill_value=np.nan)
adjust_cmip_merged.to_netcdf(cfg.PATHS['working_dir']+'/GCM_'+subregion+f'_cmip6_{start_idx}-{end_idx-1}_hist_extreme_repli.nc')
adjust_cmip_merged

In [None]:
# for gdir in gdirs:
#     print(f'{gdir.rgi_id}:')

for GCM in cmip6_models:  # loop through all GCMs
    print(f'    {GCM}:')
    for SSP in cmip6_scenarios:  # loop through all SSPs
        temp_cmip = adjust_cmip_merged.sel(rgi_id='RGI60-13.16449', GCM=GCM, SSP=SSP)
        temp_df = temp_cmip.to_dataframe()

        print(f"        {SSP}: {(~temp_df['prcp'].notna()).sum()} NaNs")
        print(f"        {SSP}: {(temp_df['prcp']==0).sum()} zeros")

In [None]:
# Method 2, DQM
# four ssps, 13 GCMs for all rgis
target_cmip = gcm_merged.sel(time=slice("1950", "2101")) #to 2100-12, because time in gcm_merged is float
# for all rgis
ref_era5 = climate_hist.sel(time=slice("1950", "2025"))
ref_era5['time'] = pd.date_range(start="01/01/1950", end="12/01/2024", freq="MS")
target_cmip['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")

adjust_cmip = []

for gdir in gdirs:
    print(f'Simulate extremes for {gdir.rgi_id}')
    
    ref_date_range = pd.date_range(start=f"01/01/1990", end=f"12/01/2024", freq="MS")
    era5_df = ref_era5.sel(rgi_id=gdir.rgi_id).to_dataframe()
    era5_temp_trend = STL(era5_df['temp'], period=12, trend=241, seasonal=13).fit().trend
    era5_prcp_trend = STL(era5_df['prcp'], period=12, trend=241, seasonal=13).fit().trend
    era5_df['temp_trend'] = era5_temp_trend
    era5_df['temp_leftover'] = era5_df['temp'] - era5_df['temp_trend']
    era5_df['prcp_trend'] = era5_prcp_trend
    era5_df['prcp_leftover'] = era5_df['prcp'] - era5_df['prcp_trend'] # fluc
    ref_era5_df = era5_df.loc[ref_date_range] # 1990-2024

    for GCM in cmip6_models:  # loop through all GCMs
        for SSP in cmip6_scenarios:  # loop through all SSPs
            selected_cmip = target_cmip.sel(rgi_id=gdir.rgi_id, GCM=GCM, SSP=SSP)
            selected_cmip_df = selected_cmip.to_dataframe()
            selected_cmip_df['year'] = selected_cmip_df.index.year
            selected_cmip_df['month'] = selected_cmip_df.index.month

            cmip_temp_trend = STL(selected_cmip_df['temp'], period=12, trend=241, seasonal=13).fit().trend
            cmip_prcp_trend = STL(selected_cmip_df['prcp'], period=12, trend=241, seasonal=13).fit().trend
            selected_cmip_df['temp_trend'] = cmip_temp_trend
            selected_cmip_df['temp_leftover'] = selected_cmip_df['temp'] - selected_cmip_df['temp_trend']
            selected_cmip_df['prcp_trend'] = cmip_prcp_trend
            selected_cmip_df['prcp_leftover'] = selected_cmip_df['prcp'] - selected_cmip_df['prcp_trend'] # fluc

            selected_cmip_df_nona = selected_cmip_df.loc[selected_cmip_df['temp'].notna()]
            max_date_nona = selected_cmip_df_nona.index.max()

            # score for future years
            future_cmip_df = selected_cmip_df.loc[(selected_cmip_df['year']>=2025) & (selected_cmip_df.index<=max_date_nona)] #monthly data
            hist_cmip_df = selected_cmip_df.loc[ref_date_range]

            adjusted_temp = selected_cmip_df['temp'].copy()
            adjusted_prcp = selected_cmip_df['prcp'].copy()

            # future adjustment using QDM
            for m in range(1, 13): # Loop 1 to 12
                m_hist_era5_df = ref_era5_df[ref_era5_df.index.month==m]
                m_hist_cmip_df = hist_cmip_df[hist_cmip_df.index.month==m]
                m_future_cmip_df = future_cmip_df[future_cmip_df.index.month==m]

                m_future_cmip_df['τ_temp'] = m_future_cmip_df['temp_leftover'].rank(method='average', pct=True)
                m_future_cmip_df['τ_prcp'] = m_future_cmip_df['prcp_leftover'].rank(method='average', pct=True)

                for date, date_info in m_future_cmip_df.iterrows():
                    cmip_temp_trend = date_info['temp_trend']
                    τ_temp = date_info['τ_temp']
                    hist_era5_τ_temp_flu = np.nanpercentile(m_hist_era5_df['temp_leftover'], τ_temp*100)
                    corrected_cmip_temp = cmip_temp_trend + hist_era5_τ_temp_flu
                    # for prcp
                    cmip_prcp_trend = date_info['prcp_trend']
                    τ_prcp = date_info['τ_prcp']
                    hist_era5_τ_prcp_flu = np.nanpercentile(m_hist_era5_df['prcp_leftover'], τ_prcp*100)
                    corrected_cmip_prcp = cmip_prcp_trend + hist_era5_τ_prcp_flu
                    if corrected_cmip_prcp<0:
                        corrected_cmip_prcp = 0
                    
                    adjusted_temp.loc[date] = corrected_cmip_temp
                    adjusted_prcp.loc[date] = corrected_cmip_prcp

            adjusted_prcp = adjusted_prcp.mask(adjusted_prcp<0, 0)
            adjusted_df = pd.DataFrame({
                'prcp': adjusted_prcp,
                'temp': adjusted_temp
            })
            # turn to nc
            adjusted_ds = selected_cmip.copy(deep=False)
            
            # 更新 prcp 和 temp 的值
            adjusted_ds['prcp'] = xr.DataArray(
                adjusted_df['prcp'].values,
                dims=selected_cmip.prcp.dims,
                coords={dim: selected_cmip.coords[dim] for dim in selected_cmip.prcp.dims},
                attrs=selected_cmip.prcp.attrs if hasattr(selected_cmip.prcp, 'attrs') else {}
            )
            adjusted_ds['temp'] = xr.DataArray(
                adjusted_df['temp'].values,
                dims=selected_cmip.temp.dims,
                coords={dim: selected_cmip.coords[dim] for dim in selected_cmip.temp.dims},
                attrs=selected_cmip.temp.attrs if hasattr(selected_cmip.temp, 'attrs') else {}
            )
            
            dims_to_expand = []
            for coord_name in ['rgi_id', 'GCM', 'SSP']:
                if coord_name in adjusted_ds.coords and coord_name not in adjusted_ds.dims:
                    dims_to_expand.append(coord_name)
            
            if dims_to_expand:
                adjusted_ds = adjusted_ds.expand_dims(dims_to_expand)

            adjust_cmip.append(adjusted_ds)

adjust_cmip_merged = xr.combine_by_coords(adjust_cmip, fill_value=np.nan)
adjust_cmip_merged.to_netcdf(cfg.PATHS['working_dir']+'/GCM_'+subregion+f'_cmip6_{start_idx}-{end_idx-1}_hist_extreme_repli2.nc')
adjust_cmip_merged

In [None]:
# selected_cmip = target_cmip.sel(rgi_id=gdir.rgi_id, GCM=GCM, SSP=SSP)
# selected_cmip_df = selected_cmip.to_dataframe()
# selected_cmip_df['year'] = selected_cmip_df.index.year

(adjusted_prcp-selected_cmip_df['prcp']).iloc[1500:].plot()
print((adjusted_prcp-selected_cmip_df['prcp']).iloc[1500:].mean())

In [None]:
(adjusted_temp-selected_cmip_df['temp']).iloc[1500:].plot()
print((adjusted_temp-selected_cmip_df['temp']).iloc[1500:].mean())

In [None]:
target_cmip = gcm_merged.sel(time=slice("1950", "2101")) #to 2100-12, because time in gcm_merged is float
target_cmip['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")

adjust_cmip_merged = xr.open_dataset(cfg.PATHS['working_dir']+'/GCM_'+subregion+f'_cmip6_{start_idx}-{end_idx-1}_hist_extreme_repli.nc')
adjust_cmip_merged

#### Average all rgis

In [None]:
# compared with CMIP6 without scaling fluc
mean_four_ssp_adjusted = adjust_cmip_merged.mean(dim='rgi_id',
                                skipna=True,
                                keep_attrs=True  # keep the variable descriptions
                                    ).mean(dim='GCM',
                                skipna=True,  # ignore nan values
                                keep_attrs=True # keep the variable descriptions
                                ).sel(time=slice("1950", "2101")) #to 2100-12

compare_era5 = climate_hist_region.sel(time=slice("1950", "2025"))
mean_four_ssp_adjusted['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")
compare_era5['time'] = pd.date_range(start="01/01/1950", end="12/01/2024", freq="MS")

mean_four_ssp = gcm_merged.mean(dim='rgi_id',
                                skipna=True,
                                keep_attrs=True  # keep the variable descriptions
                                    ).mean(dim='GCM',
                                skipna=True,  # ignore nan values
                                keep_attrs=True # keep the variable descriptions
                                ).sel(time=slice("1950", "2101")) #to 2100-12
mean_four_ssp['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")

fig, axes = plt.subplots(1, 2 , figsize=(15, 5))
for i, ssp in enumerate(cmip6_scenarios[3:4]):
    selected_ssp_scaled = mean_four_ssp_adjusted.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp_scaled.index, selected_ssp_scaled['prcp'], label=ssp, alpha=0.8, color=plt.get_cmap('Accent')(i), linewidth=3)
    axes[1].plot(selected_ssp_scaled.index, selected_ssp_scaled['temp'], label=ssp, alpha=0.8, color=plt.get_cmap('Accent')(i), linewidth=3)
    # STL
    prcp_trend = STL(selected_ssp_scaled['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp_scaled['temp'], period=12, trend=241, seasonal=13).fit().trend
    axes[0].plot(prcp_trend.index, prcp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))

    # plot CMIP6 without scaling fluc
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.index, selected_ssp['prcp'], alpha=1, color=plt.get_cmap('Accent')(i+2), linestyle='--')#not adjusted
    axes[1].plot(selected_ssp.index, selected_ssp['temp'], alpha=1, color=plt.get_cmap('Accent')(i+2), linestyle='--')
    # STL
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    axes[0].plot(prcp_trend.index, prcp_trend.values, color=plt.get_cmap('Accent')(i+4), linestyle='--',label=ssp+' trend (not adjusted)')
    axes[1].plot(temp_trend.index, temp_trend.values, color=plt.get_cmap('Accent')(i+4), linestyle='--',label=ssp+' trend (not adjusted)')

axes[0].legend()
axes[1].legend()
axes[0].set_title('Precipitation (mm/month)')
axes[1].set_title('Temperature (°C)')
axes[0].set_xlim(dt(2050,1,1), dt(2100,1,1))
axes[1].set_xlim(dt(2080,1,1), dt(2100,1,1))
# axes[0].set_xlim(dt(2025,1,1), dt(2101,1,1))
# axes[1].set_xlim(dt(2025,1,1), dt(2101,1,1))
axes[0].set_ylim(0, 50)
axes[1].set_ylim(4, 10) #(-25, -18)
plt.show()


In [None]:
print(selected_ssp_scaled.loc[pd.date_range('2025-1-1', '2100-12-1', freq='MS'), 'temp'].mean())
print(selected_ssp.loc[pd.date_range('2025-1-1', '2100-12-1', freq='MS'), 'temp'].mean())

print(selected_ssp_scaled.loc[pd.date_range('2025-1-1', '2100-12-1', freq='MS'), 'prcp'].mean())
print(selected_ssp.loc[pd.date_range('2025-1-1', '2100-12-1', freq='MS'), 'prcp'].mean())

In [None]:
fig, axes = plt.subplots(1, 2 , figsize=(12, 5))
for i, ssp in enumerate(cmip6_scenarios[1:2]):
    selected_ssp_scaled = mean_four_ssp_adjusted.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp_scaled.resample('AS').sum().index, selected_ssp_scaled.resample('AS').sum()['prcp'], label=ssp, color=plt.get_cmap('Accent')(i))
    axes[1].plot(selected_ssp_scaled.resample('AS').mean().index, selected_ssp_scaled.resample('AS').mean()['temp'], label=ssp, color=plt.get_cmap('Accent')(i))
    # STL
    prcp_trend = STL(selected_ssp_scaled['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp_scaled['temp'], period=12, trend=241, seasonal=13).fit().trend
    axes[0].plot(prcp_trend.index, prcp_trend.values*12, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))

    # plot CMIP6 without scaling fluc
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.resample('AS').sum().index, selected_ssp.resample('AS').sum()['prcp'], color=plt.get_cmap('Accent')(i), linestyle='--')
    axes[1].plot(selected_ssp.resample('AS').mean().index, selected_ssp.resample('AS').mean()['temp'], color=plt.get_cmap('Accent')(i), linestyle='--')
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    axes[0].plot(prcp_trend.index, prcp_trend.values*12, color=plt.get_cmap('Accent')(i+4), linestyle='--', label=ssp+' trend (not adjusted)')
    axes[1].plot(temp_trend.index, temp_trend.values, color=plt.get_cmap('Accent')(i+4), linestyle='--', label=ssp+' trend (not adjusted)')
    
axes[0].legend()
axes[1].legend()
axes[0].set_title('Precipitation (mm/yr)')
axes[1].set_title('Temperature (°C)')
axes[0].set_xlim(dt(2020,1,1), dt(2100,1,1))
axes[1].set_xlim(dt(2060,1,1), dt(2100,1,1))
plt.show()


#### single glacier

In [None]:
# compared with CMIP6 without scaling fluc
mean_four_ssp_adjusted = adjust_cmip_merged.sel(rgi_id=gdirs[-1].rgi_id, 
                                                GCM=cmip6_models[-1], time=slice("1950", "2101")) #to 2100-12
mean_four_ssp_adjusted['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")

mean_four_ssp = gcm_merged.sel(rgi_id=gdirs[-1].rgi_id, 
                               GCM=cmip6_models[-1], time=slice("1950", "2101")) #to 2100-12
mean_four_ssp['time'] = pd.date_range(start="01/01/1950", end="12/01/2100", freq="MS")

fig, axes = plt.subplots(1, 2 , figsize=(15, 5))
for i, ssp in enumerate(cmip6_scenarios[2:3]):
    selected_ssp_scaled = mean_four_ssp_adjusted.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp_scaled.index, selected_ssp_scaled['prcp'], label=ssp, alpha=0.8, color=plt.get_cmap('Accent')(i), linewidth=3)
    axes[1].plot(selected_ssp_scaled.index, selected_ssp_scaled['temp'], label=ssp, alpha=0.8, color=plt.get_cmap('Accent')(i), linewidth=3)
    # STL
    prcp_trend = STL(selected_ssp_scaled['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp_scaled['temp'], period=12, trend=241, seasonal=13).fit().trend
    axes[0].plot(prcp_trend.index, prcp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4), zorder=5)
    axes[1].plot(temp_trend.index, temp_trend.values, label=ssp+' trend', color=plt.get_cmap('Accent')(i+4))

    # plot CMIP6 without scaling fluc
    selected_ssp = mean_four_ssp.sel(SSP=ssp).to_dataframe()
    axes[0].plot(selected_ssp.index, selected_ssp['prcp'], alpha=1, color=plt.get_cmap('Accent')(i+2), linestyle='--')#not adjusted
    axes[1].plot(selected_ssp.index, selected_ssp['temp'], alpha=1, color=plt.get_cmap('Accent')(i+2), linestyle='--')
    # STL
    prcp_trend = STL(selected_ssp['prcp'], period=12, trend=241, seasonal=13).fit().trend
    temp_trend = STL(selected_ssp['temp'], period=12, trend=241, seasonal=13).fit().trend
    axes[0].plot(prcp_trend.index, prcp_trend.values, color=plt.get_cmap('Accent')(i+4), linestyle='--',label=ssp+' trend (not adjusted)', zorder=5)
    axes[1].plot(temp_trend.index, temp_trend.values, color=plt.get_cmap('Accent')(i+4), linestyle='--',label=ssp+' trend (not adjusted)')

axes[0].legend()
axes[1].legend()
axes[0].set_title('Precipitation (mm/month)')
axes[1].set_title('Temperature (°C)')
axes[0].set_xlim(dt(2040,1,1), dt(2100,1,1))
axes[1].set_xlim(dt(2080,1,1), dt(2100,1,1))
# axes[0].set_xlim(dt(2025,1,1), dt(2101,1,1))
# axes[1].set_xlim(dt(2025,1,1), dt(2101,1,1))
axes[0].set_ylim(80, 150)
# axes[1].set_ylim(4, 8) #(-25, -18)
plt.show()


## Glacier clibration and inversion

In [None]:
# Get geodetic mass balance reference data for calibration
altimetry_filepath = '/mnt/3FE827E84836B503/Altimetry_model_input/Altimetrymb_rgiregion_result_for_OGGM0024.csv'
df_ref_dmdtda = utils.get_geodetic_mb_dataframe(file_path=altimetry_filepath)

# Calibrate each glacier using geodetic mass balance
print("Calibrating glaciers using geodetic mass balance (this may take a while)...")
for gdir in gdirs:
    ref_period = '2000-01-01_2024-01-01'
    
    try:
        # Get reference mass balance for this glacier
        df_ref_dmdtda0 = df_ref_dmdtda.loc[gdir.rgi_id]
        # df_ref_dmdtda0 = df_ref_dmdtda0.loc[df_ref_dmdtda0['period'] == ref_period]
        dmdtda_reference = df_ref_dmdtda0['dmdtda'] * 1000  # m/yr -> mm/yr
        
        # Perform mu* calibration
        climate.mu_star_calibration_from_geodetic_mb(
            gdir, 
            ignore_hydro_months=True,
            ref_mb=dmdtda_reference,
            ref_period=ref_period,
            step_height_for_corr=10,
            max_height_change_for_corr=5000,
            min_mu_star=20,
            max_mu_star=500
        )
    except Exception as e:
        # Skip glaciers that fail calibration
        print(f"Calibration failed for {gdir.rgi_id}: {e}")
        continue

# Remove glaciers without valid calibration
valid_gdirs = []
for gdir in gdirs:
    try:
        df = gdir.read_json('local_mustar')
        mu_star = df['mu_star_glacierwide']
        if not pd.isna(mu_star):
            valid_gdirs.append(gdir)
    except:
        pass

gdirs = valid_gdirs
print(f"Successfully calibrated {len(gdirs)} glaciers")

# Compute apparent mass balance and run inversion
print("Computing ice thickness through inversion...")
for gdir in gdirs:
    climate.apparent_mb_from_any_mb(gdir)

# Run inversion tasks
global_tasks.inversion_tasks(gdirs, glen_a=None, fs=None, filter_inversion_output=True)

# Initialize present-time glacier
workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs, filesuffix='')

print("Ice thickness computation complete")


In [None]:
path = cfg.PATHS['working_dir'] + '/per_glacier'
gdf_sel2 = []

if os.path.exists(path):
    for root, dirs, files in os.walk(path):
        # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
        parts = root.replace(path, '').strip(os.sep).split(os.sep)
        if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
            inversion_file = os.path.join(root, 'inversion_output.pkl')
            if not os.path.exists(inversion_file):
                gdf_sel2.append(parts[2])

# Remove incomplete glaciers
if len(gdf_sel2) > 0:
    print(f"Removing {len(gdf_sel2)} incomplete glaciers...")
    gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
    print(f"Remaining glaciers: {len(gdirs)}")

## CMIP projection

### Normal

In [None]:
# generate future forcing
workflow.execute_entity_task(
                create_scieno.repulicate_cmip6,
                gdirs,
                start_year=1950,
                end_year=2100,
                output_filesuffix='_normal',
            )


In [None]:
# Run projections for each CMIP6 scenario and model
print("Running CMIP6 scenario projections (2025-2100)...")

projection_results = []

for ssp in cmip6_scenarios:
    print(f"\nProcessing scenario: {ssp}")
    
    for gcm in cmip6_models:
        print(f"  - Model: {gcm}")
        
        # Define file suffix for this model-scenario combination
        raw_filesuffix = f'_{gcm}_{ssp}'
        extreme_filesuffix = f'_{gcm}_{ssp}_normal'
        projection_id = f'_proj_{gcm}_{ssp}_normal_{start_idx}-{end_idx-1}'
            
        try:
            # Run projection simulation
            workflow.execute_entity_task(
                tasks.run_with_hydro, gdirs,
                run_task=tasks.run_from_climate_data,
                climate_filename='gcm_data',
                climate_input_filesuffix=extreme_filesuffix,
                fixed_geometry_spinup_yr=2000,
                ref_area_from_y0=True,
                output_filesuffix=projection_id,
                store_monthly_hydro=False
            )
            
            path = cfg.PATHS['working_dir'] + '/per_glacier'
            gdf_sel2 = []
            for root, dirs, files in os.walk(path):
                # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
                parts = root.replace(path, '').strip(os.sep).split(os.sep)
                if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
                    result_file = os.path.join(root, 'model_diagnostics'+projection_id+'.nc')
                    if not os.path.exists(result_file):
                        gdf_sel2.append(parts[2])
            if len(gdf_sel2) > 0:
                gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
                print(f"        Remaining glaciers: {len(gdirs)}")
                        
            # Compile results for this model-scenario
            ds_projection = utils.compile_run_output(gdirs, input_filesuffix=projection_id, path=False)
            ds_projection = ds_projection.assign_coords(GCM=gcm, SSP=ssp)
            ds_projection = ds_projection.expand_dims(['GCM', 'SSP'])

            projection_results.append(ds_projection)
            
        except Exception as e:
            print(f"    Error running projection for {gcm} {ssp}: {e}")
            continue

ds_all_projections = xr.combine_by_coords(projection_results, fill_value=np.nan, combine_attrs='override')
ds_all_projections = ds_all_projections.sortby('rgi_id')
output_path = cfg.PATHS['working_dir'] + f'/run_output_cmip6_normal_{start_idx}-{end_idx-1}.nc'
ds_all_projections.to_netcdf(output_path)

# for ssp in cmip6_scenarios:
#     for gcm in cmip6_models:
#         tmp_file_path = cfg.PATHS['working_dir'] + f'/run_output' + f'_proj_{gcm}_{ssp}_normal_{start_idx}-{end_idx-1}.nc'
#         os.remove(tmp_file_path)

print("\nCMIP6 scenario projections complete!")

#### Dynamic spinup

In [None]:
# Dynamic spining up
SPINUP_START_YR = 1979 
# Define file suffix for this model-scenario combination
spinup_filesuffix = f'_spinup'
print("Running CMIP6 scenario projections (2025-2100) with Dynamic Spinup...")

# spinning-up
workflow.execute_entity_task(
    tasks.run_dynamic_spinup, gdirs,
    climate_input_filesuffix=None, 
    spinup_start_yr=SPINUP_START_YR,         
    minimise_for='area',              
    precision_percent=3,
    precision_absolute=3,       
    add_fixed_geometry_spinup=True,              
    output_filesuffix=spinup_filesuffix          
)


In [None]:
spinup_filesuffix = f'_spinup'
path = cfg.PATHS['working_dir'] + '/per_glacier'
gdf_sel2 = []

for root, dirs, files in os.walk(path):
    # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
    parts = root.replace(path, '').strip(os.sep).split(os.sep)
    if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
        result_file = os.path.join(root, 'model_diagnostics'+spinup_filesuffix+'.nc')
        if not os.path.exists(result_file):
            gdf_sel2.append(parts[2])
if len(gdf_sel2) > 0:
    gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]

print(f"Dynamic spinup complete. Glaciers ready: {len(gdirs)}")

In [None]:
projection_results_sp = []

for ssp in cmip6_scenarios:
    print(f"\nProcessing scenario: {ssp}")
    
    for gcm in cmip6_models:
        print(f"  - Model: {gcm}")
        # raw_filesuffix = f'_{gcm}_{ssp}'
        extreme_filesuffix = f'_{gcm}_{ssp}_normal'
        projection_id = f'_proj_{gcm}_{ssp}_normal_spinup_{start_idx}-{end_idx-1}'
            
        try:
            # Run projection simulation
            workflow.execute_entity_task(
                tasks.run_with_hydro, gdirs,
                run_task=tasks.run_from_climate_data,
                climate_filename='gcm_data',
                climate_input_filesuffix=extreme_filesuffix,
                init_model_filesuffix=spinup_filesuffix,
                init_model_yr=2000,
                ref_area_from_y0=True,
                output_filesuffix=projection_id,
                store_monthly_hydro=False
            )
            
            path = cfg.PATHS['working_dir'] + '/per_glacier'
            gdf_sel2 = []
            for root, dirs, files in os.walk(path):
                # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
                parts = root.replace(path, '').strip(os.sep).split(os.sep)
                if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
                    result_file = os.path.join(root, 'model_diagnostics'+projection_id+'.nc')
                    if not os.path.exists(result_file):
                        gdf_sel2.append(parts[2])
            if len(gdf_sel2) > 0:
                gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
                print(f"        Remaining glaciers: {len(gdirs)}")
                        
            # Compile results for this model-scenario
            ds_projection = utils.compile_run_output(gdirs, input_filesuffix=projection_id, path=False)
            ds_projection = ds_projection.assign_coords(GCM=gcm, SSP=ssp)
            ds_projection = ds_projection.expand_dims(['GCM', 'SSP'])

            projection_results_sp.append(ds_projection)
            
        except Exception as e:
            print(f"    Error running projection for {gcm} {ssp}: {e}")
            continue

ds_all_projections = xr.combine_by_coords(projection_results_sp, fill_value=np.nan, combine_attrs='override')
ds_all_projections = ds_all_projections.sortby('rgi_id')
output_path = cfg.PATHS['working_dir'] + f'/run_output_cmip6_normal_spinup_{start_idx}-{end_idx-1}.nc'
ds_all_projections.to_netcdf(output_path)

print("\nCMIP6 scenario projections complete!")

In [None]:
ds_all_projections = xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_normal_{start_idx}-{end_idx-1}.nc')
ds_all_projections_sp = xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_normal_spinup_{start_idx}-{end_idx-1}.nc')

ds_all_projections.sum(
    dim='rgi_id',
    skipna=True,
    keep_attrs=True,
).sel(SSP='ssp585').sel(GCM='CAMS-CSM1-0').volume.plot(label='no spinup')# to 2100

ds_all_projections_sp.sum(
    dim='rgi_id',
    skipna=True,
    keep_attrs=True,
).sel(SSP='ssp585').sel(GCM='CAMS-CSM1-0').volume.plot(label='spinup')# to 2100

plt.legend()

In [None]:
ref_date = np.mean([item.rgi_date for item in gdirs])
ref_volume = np.sum([tasks.get_inversion_volume(item) for item in gdirs])

ds_all_projections.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').volume.plot(label='no spinup')# to 2100

ds_all_projections_sp.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').volume.plot(label='spinup')# to 2100

plt.scatter(ref_date, ref_volume, c='red')
plt.legend()

In [None]:
ref_date = np.mean([item.rgi_date for item in gdirs])
ref_area = np.sum([item.rgi_area_m2 for item in gdirs])

ds_all_projections.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').area.plot(label='no spinup')# to 2100

ds_all_projections_sp.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').area.plot(label='spinup')# to 2100

plt.scatter(ref_date, ref_area, c='red')
plt.legend()

In [None]:
processed_file_list = []

for item in os.listdir(cfg.PATHS['working_dir']):
    if item.startswith('run_output_cmip6_normal_') and "all" not in item:  
        file_path = os.path.join(cfg.PATHS['working_dir'], item)
        if os.path.isfile(file_path):  # 确保是文件
            processed_file_list.append(file_path)

if len(processed_file_list) > 0:
    print(f"\nFound {len(processed_file_list)} files to merge...")
    compile_result = xr.open_mfdataset(processed_file_list, combine='by_coords')
    # sort by rgi_id
    compile_result = compile_result.sortby('rgi_id')
    compile_result.to_netcdf(os.path.join(cfg.PATHS['working_dir'], 'run_output_cmip6_normal_all.nc'))

### Repulicate extremes upon QDM

In [None]:
# generate future forcing
workflow.execute_entity_task(
                create_scieno.simulate_future_extremes_QDM,
                gdirs,
                start_year=1950,
                end_year=2100,
                future_start_year=2025,
                future_cooling_factor=None,
                output_filesuffix='_repu_his_extremes_QDM',
            )


In [None]:
# Run projections for each CMIP6 scenario and model
print("Running CMIP6 scenario projections (2025-2100)...")

projection_results = []

for ssp in cmip6_scenarios:
    print(f"\nProcessing scenario: {ssp}")
    
    for gcm in cmip6_models:
        print(f"  - Model: {gcm}")
        
        # Define file suffix for this model-scenario combination
        raw_filesuffix = f'_{gcm}_{ssp}'
        extreme_filesuffix = f'_{gcm}_{ssp}_repu_his_extremes_QDM'
        projection_id = f'_proj_{gcm}_{ssp}_repu_his_extremes_QDM_{start_idx}-{end_idx-1}'
            
        try:
            # Run projection simulation
            workflow.execute_entity_task(
                tasks.run_with_hydro, gdirs,
                run_task=tasks.run_from_climate_data,
                climate_filename='gcm_data',
                climate_input_filesuffix=extreme_filesuffix,
                fixed_geometry_spinup_yr=2000,
                ref_area_from_y0=True,
                output_filesuffix=projection_id,
                store_monthly_hydro=False
            )
            
            path = cfg.PATHS['working_dir'] + '/per_glacier'
            gdf_sel2 = []
            for root, dirs, files in os.walk(path):
                # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
                parts = root.replace(path, '').strip(os.sep).split(os.sep)
                if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
                    result_file = os.path.join(root, 'model_diagnostics'+projection_id+'.nc')
                    if not os.path.exists(result_file):
                        gdf_sel2.append(parts[2])
            if len(gdf_sel2) > 0:
                gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
                print(f"        Remaining glaciers: {len(gdirs)}")
                        
            # Compile results for this model-scenario
            ds_projection = utils.compile_run_output(gdirs, input_filesuffix=projection_id, path=False)
            ds_projection = ds_projection.assign_coords(GCM=gcm, SSP=ssp)
            ds_projection = ds_projection.expand_dims(['GCM', 'SSP'])

            projection_results.append(ds_projection)
            
        except Exception as e:
            print(f"    Error running projection for {gcm} {ssp}: {e}")
            continue

ds_all_projections = xr.combine_by_coords(projection_results, fill_value=np.nan, combine_attrs='override')
ds_all_projections = ds_all_projections.sortby('rgi_id')
output_path = cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_{start_idx}-{end_idx-1}.nc'
ds_all_projections.to_netcdf(output_path)

print("\nCMIP6 scenario projections complete!")

In [None]:
projection_results_sp = []

for ssp in cmip6_scenarios:
    print(f"\nProcessing scenario: {ssp}")
    
    for gcm in cmip6_models:
        print(f"  - Model: {gcm}")
        # raw_filesuffix = f'_{gcm}_{ssp}'
        extreme_filesuffix = f'_{gcm}_{ssp}_repu_his_extremes_QDM'
        projection_id = f'_proj_{gcm}_{ssp}_repu_his_extremes_QDM_spinup_{start_idx}-{end_idx-1}'
            
        try:
            # Run projection simulation
            workflow.execute_entity_task(
                tasks.run_with_hydro, gdirs,
                run_task=tasks.run_from_climate_data,
                climate_filename='gcm_data',
                climate_input_filesuffix=extreme_filesuffix,
                init_model_filesuffix=spinup_filesuffix,
                init_model_yr=2000,
                ref_area_from_y0=True,
                output_filesuffix=projection_id,
                store_monthly_hydro=False
            )
            
            path = cfg.PATHS['working_dir'] + '/per_glacier'
            gdf_sel2 = []
            for root, dirs, files in os.walk(path):
                # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
                parts = root.replace(path, '').strip(os.sep).split(os.sep)
                if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
                    result_file = os.path.join(root, 'model_diagnostics'+projection_id+'.nc')
                    if not os.path.exists(result_file):
                        gdf_sel2.append(parts[2])
            if len(gdf_sel2) > 0:
                gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
                print(f"        Remaining glaciers: {len(gdirs)}")
                        
            # Compile results for this model-scenario
            ds_projection = utils.compile_run_output(gdirs, input_filesuffix=projection_id, path=False)
            ds_projection = ds_projection.assign_coords(GCM=gcm, SSP=ssp)
            ds_projection = ds_projection.expand_dims(['GCM', 'SSP'])

            projection_results_sp.append(ds_projection)
            
        except Exception as e:
            print(f"    Error running projection for {gcm} {ssp}: {e}")
            continue

ds_all_projections = xr.combine_by_coords(projection_results_sp, fill_value=np.nan, combine_attrs='override')
ds_all_projections = ds_all_projections.sortby('rgi_id')
output_path = cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_spinup_{start_idx}-{end_idx-1}.nc'
ds_all_projections.to_netcdf(output_path)

print("\nCMIP6 scenario projections complete!")

In [None]:
ds_all_projections = xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_{start_idx}-{end_idx-1}.nc')
ds_all_projections_sp = xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_spinup_{start_idx}-{end_idx-1}.nc')

ds_all_projections.sum(
    dim='rgi_id',
    skipna=True,
    keep_attrs=True,
).sel(SSP='ssp585').sel(GCM='CAMS-CSM1-0').volume.plot(label='no spinup')# to 2100

ds_all_projections_sp.sum(
    dim='rgi_id',
    skipna=True,
    keep_attrs=True,
).sel(SSP='ssp585').sel(GCM='CAMS-CSM1-0').volume.plot(label='spinup')# to 2100

plt.legend()

In [None]:
ref_date = np.mean([item.rgi_date for item in gdirs])
ref_volume = np.sum([tasks.get_inversion_volume(item) for item in gdirs])

ds_all_projections.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').volume.plot(label='no spinup')# to 2100

ds_all_projections_sp.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').volume.plot(label='spinup')# to 2100

plt.scatter(ref_date, ref_volume, c='red')
plt.legend()

In [None]:
ref_date = np.mean([item.rgi_date for item in gdirs])
ref_area = np.sum([item.rgi_area_m2 for item in gdirs])

ds_all_projections.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').area.plot(label='no spinup')# to 2100

ds_all_projections_sp.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').area.plot(label='spinup')# to 2100

plt.scatter(ref_date, ref_area, c='red')
plt.legend()

### Repulicate extremes upon QDM (cool scalor: 1.2)

In [None]:
# generate future forcing
workflow.execute_entity_task(
                create_scieno.simulate_future_extremes_QDM,
                gdirs,
                start_year=1950,
                end_year=2100,
                future_start_year=2025,
                future_cooling_factor=1.2,
                output_filesuffix='_repu_his_extremes_QDM_cool',
            )


In [None]:
# Run projections for each CMIP6 scenario and model
print("Running CMIP6 scenario projections (2025-2100)...")

projection_results = []

for ssp in cmip6_scenarios:
    print(f"\nProcessing scenario: {ssp}")
    
    for gcm in cmip6_models:
        print(f"  - Model: {gcm}")
        
        # Define file suffix for this model-scenario combination
        raw_filesuffix = f'_{gcm}_{ssp}'
        extreme_filesuffix = f'_{gcm}_{ssp}_repu_his_extremes_QDM_cool'
        projection_id = f'_proj_{gcm}_{ssp}_repu_his_extremes_QDM_cool_{start_idx}-{end_idx-1}'
            
        try:
            # Run projection simulation
            workflow.execute_entity_task(
                tasks.run_with_hydro, gdirs,
                run_task=tasks.run_from_climate_data,
                climate_filename='gcm_data',
                climate_input_filesuffix=extreme_filesuffix,
                fixed_geometry_spinup_yr=2000,
                ref_area_from_y0=True,
                output_filesuffix=projection_id,
                store_monthly_hydro=False
            )
            
            path = cfg.PATHS['working_dir'] + '/per_glacier'
            gdf_sel2 = []
            for root, dirs, files in os.walk(path):
                # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
                parts = root.replace(path, '').strip(os.sep).split(os.sep)
                if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
                    result_file = os.path.join(root, 'model_diagnostics'+projection_id+'.nc')
                    if not os.path.exists(result_file):
                        gdf_sel2.append(parts[2])
            if len(gdf_sel2) > 0:
                gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
                print(f"        Remaining glaciers: {len(gdirs)}")
                        
            # Compile results for this model-scenario
            ds_projection = utils.compile_run_output(gdirs, input_filesuffix=projection_id, path=False)
            ds_projection = ds_projection.assign_coords(GCM=gcm, SSP=ssp)
            ds_projection = ds_projection.expand_dims(['GCM', 'SSP'])

            projection_results.append(ds_projection)
            
        except Exception as e:
            print(f"    Error running projection for {gcm} {ssp}: {e}")
            continue

ds_all_projections = xr.combine_by_coords(projection_results, fill_value=np.nan, combine_attrs='override')
ds_all_projections = ds_all_projections.sortby('rgi_id')
output_path = cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_cool_{start_idx}-{end_idx-1}.nc'
ds_all_projections.to_netcdf(output_path)

print("\nCMIP6 scenario projections complete!")

In [None]:
ds_all_projections = xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_cool_{start_idx}-{end_idx-1}.nc')

ds_all_projections.sum(
    dim='rgi_id',
    skipna=True,
    keep_attrs=True,
).sel(SSP='ssp585').sel(GCM='CAMS-CSM1-0').volume.plot()# to 2100

In [None]:
ds_all_projections.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').volume.plot()# to 2100

### Repulicate extremes upon QM

In [None]:
# generate future forcing
workflow.execute_entity_task(
                create_scieno.simulate_future_extremes_Detrend_QM,
                gdirs,
                start_year=1950,
                end_year=2100,
                future_start_year=2025,
                future_cooling_factor=None,
                output_filesuffix='_repu_his_extremes_QM',
            )


In [None]:
# Run projections for each CMIP6 scenario and model
print("Running CMIP6 scenario projections (2025-2100)...")

projection_results = []

for ssp in cmip6_scenarios:
    print(f"\nProcessing scenario: {ssp}")
    
    for gcm in cmip6_models:
        print(f"  - Model: {gcm}")
        
        # Define file suffix for this model-scenario combination
        raw_filesuffix = f'_{gcm}_{ssp}'
        extreme_filesuffix = f'_{gcm}_{ssp}_repu_his_extremes_QM'
        projection_id = f'_proj_{gcm}_{ssp}_repu_his_extremes_QM_{start_idx}-{end_idx-1}'
            
        try:
            # Run projection simulation
            workflow.execute_entity_task(
                tasks.run_with_hydro, gdirs,
                run_task=tasks.run_from_climate_data,
                climate_filename='gcm_data',
                climate_input_filesuffix=extreme_filesuffix,
                fixed_geometry_spinup_yr=2000,
                ref_area_from_y0=True,
                output_filesuffix=projection_id,
                store_monthly_hydro=False
            )
            
            path = cfg.PATHS['working_dir'] + '/per_glacier'
            gdf_sel2 = []
            for root, dirs, files in os.walk(path):
                # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
                parts = root.replace(path, '').strip(os.sep).split(os.sep)
                if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
                    result_file = os.path.join(root, 'model_diagnostics'+projection_id+'.nc')
                    if not os.path.exists(result_file):
                        gdf_sel2.append(parts[2])
            if len(gdf_sel2) > 0:
                gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
                print(f"        Remaining glaciers: {len(gdirs)}")
                        
            # Compile results for this model-scenario
            ds_projection = utils.compile_run_output(gdirs, input_filesuffix=projection_id, path=False)
            ds_projection = ds_projection.assign_coords(GCM=gcm, SSP=ssp)
            ds_projection = ds_projection.expand_dims(['GCM', 'SSP'])

            projection_results.append(ds_projection)
            
        except Exception as e:
            print(f"    Error running projection for {gcm} {ssp}: {e}")
            continue

ds_all_projections = xr.combine_by_coords(projection_results, fill_value=np.nan, combine_attrs='override')
ds_all_projections = ds_all_projections.sortby('rgi_id')
output_path = cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QM_{start_idx}-{end_idx-1}.nc'
ds_all_projections.to_netcdf(output_path)

print("\nCMIP6 scenario projections complete!")

In [None]:
projection_results_sp = []

for ssp in cmip6_scenarios:
    print(f"\nProcessing scenario: {ssp}")
    
    for gcm in cmip6_models:
        print(f"  - Model: {gcm}")
        # raw_filesuffix = f'_{gcm}_{ssp}'
        extreme_filesuffix = f'_{gcm}_{ssp}_repu_his_extremes_QM'
        projection_id = f'_proj_{gcm}_{ssp}_repu_his_extremes_QM_spinup_{start_idx}-{end_idx-1}'
            
        try:
            # Run projection simulation
            workflow.execute_entity_task(
                tasks.run_with_hydro, gdirs,
                run_task=tasks.run_from_climate_data,
                climate_filename='gcm_data',
                climate_input_filesuffix=extreme_filesuffix,
                init_model_filesuffix=spinup_filesuffix,
                init_model_yr=2000,
                ref_area_from_y0=True,
                output_filesuffix=projection_id,
                store_monthly_hydro=False
            )
            
            path = cfg.PATHS['working_dir'] + '/per_glacier'
            gdf_sel2 = []
            for root, dirs, files in os.walk(path):
                # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
                parts = root.replace(path, '').strip(os.sep).split(os.sep)
                if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
                    result_file = os.path.join(root, 'model_diagnostics'+projection_id+'.nc')
                    if not os.path.exists(result_file):
                        gdf_sel2.append(parts[2])
            if len(gdf_sel2) > 0:
                gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
                print(f"        Remaining glaciers: {len(gdirs)}")
                        
            # Compile results for this model-scenario
            ds_projection = utils.compile_run_output(gdirs, input_filesuffix=projection_id, path=False)
            ds_projection = ds_projection.assign_coords(GCM=gcm, SSP=ssp)
            ds_projection = ds_projection.expand_dims(['GCM', 'SSP'])

            projection_results_sp.append(ds_projection)
            
        except Exception as e:
            print(f"    Error running projection for {gcm} {ssp}: {e}")
            continue

ds_all_projections = xr.combine_by_coords(projection_results_sp, fill_value=np.nan, combine_attrs='override')
ds_all_projections = ds_all_projections.sortby('rgi_id')
output_path = cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QM_spinup_{start_idx}-{end_idx-1}.nc'
ds_all_projections.to_netcdf(output_path)

print("\nCMIP6 scenario projections complete!")

In [None]:
ds_all_projections = xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QM_{start_idx}-{end_idx-1}.nc')

ds_all_projections.sum(
    dim='rgi_id',
    skipna=True,
    keep_attrs=True,
).sel(SSP='ssp585').sel(GCM='CAMS-CSM1-0').volume.plot()# to 2100

In [None]:
ds_all_projections.sum(
                dim='rgi_id',
                skipna=True,
                keep_attrs=True,
            ).mean(
                dim='GCM',
                skipna=True,
                keep_attrs=True,
            ).sel(SSP='ssp585').volume.plot()# to 2100

### Plot

In [None]:
ds_normal= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_normal_{start_idx}-{end_idx-1}.nc')
ds_extreme1= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_{start_idx}-{end_idx-1}.nc')
# ds_extreme2= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_cool_{start_idx}-{end_idx-1}.nc')
ds_extreme3= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QM_{start_idx}-{end_idx-1}.nc')

ds_normal_sp= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_normal_spinup_{start_idx}-{end_idx-1}.nc')
ds_extreme1_sp= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QDM_spinup_{start_idx}-{end_idx-1}.nc')
ds_extreme3_sp= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_cmip6_repu_his_extremes_QM_spinup_{start_idx}-{end_idx-1}.nc')

ds_collect = [ds_normal, ds_extreme1, ds_extreme3,
              ds_normal_sp, ds_extreme1_sp, ds_extreme3_sp]
labels = ['CMIP6', 'CMIP6 + QDM extremes', 'CMIP6 + QM extremes', 
          'CMIP6(spinup)', 'CMIP6 + QDM extremes(spinup)', 'CMIP6 + QM extremes(spinup)']
colors = [plt.get_cmap('Accent')(0), plt.get_cmap('Accent')(5), plt.get_cmap('Accent')(6),
          plt.get_cmap('Accent')(0), plt.get_cmap('Accent')(5), plt.get_cmap('Accent')(6)]
styles = ['-', '-', '-', '--', '--', '--']
widths = [2, 2, 2, 2, 2, 2]

fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for i, ssp in enumerate(cmip6_scenarios):
    for ds_i, ds_item in enumerate(ds_collect):
        mean_temp = ds_item.sum(
                        dim='rgi_id',
                        skipna=True,
                    ).mean(
                        dim='GCM',
                        skipna=True,
                    ).sel(SSP=ssp)
        axes[i].plot(mean_temp.time.values, mean_temp.volume.values, linewidth=widths[ds_i], color=colors[ds_i], 
                        label=labels[ds_i], linestyle=styles[ds_i], alpha=0.7)
        axes[i].set_xlabel('Year', fontsize=12)
        axes[i].set_ylabel(f'Volume (m$^3$)', fontsize=12)
        axes[i].set_title(ssp.upper(), fontsize=14)

        axes[i].tick_params(labelsize=10)
        axes[i].set_xlim([2000, 2100])

    if i == 3:
        axes[i].legend(fontsize=12, loc='best')

plt.show()

In [None]:

fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for i, ssp in enumerate(cmip6_scenarios):
    for ds_i, ds_item in enumerate(ds_collect):
        mean_temp = ds_item.sum(
                        dim='rgi_id',
                        skipna=True,
                    ).mean(
                        dim='GCM',
                        skipna=True,
                    ).sel(SSP=ssp)
        axes[i].plot(mean_temp.time.values, mean_temp.area.values, linewidth=widths[ds_i], color=colors[ds_i], 
                        label=labels[ds_i], linestyle=styles[ds_i], alpha=0.7)
        axes[i].set_xlabel('Year', fontsize=12)
        axes[i].set_ylabel(f'Area (m$^2$)', fontsize=12)
        axes[i].set_title(ssp.upper(), fontsize=14)

        axes[i].tick_params(labelsize=10)
        axes[i].set_xlim([2000, 2100])

    if i == 3:
        axes[i].legend(fontsize=12, loc='best')

plt.show()

# Historical modelling

### Normal

In [None]:
# Run historical simulation
file_id = f'_hist_spinup_{start_idx}-{end_idx-1}'
workflow.execute_entity_task(
                            tasks.run_with_hydro, gdirs,
                            run_task=tasks.run_from_climate_data,
                            climate_filename='climate_historical',
                            init_model_filesuffix=spinup_filesuffix,
                            init_model_yr=2000,
                            ref_area_from_y0=True,
                            output_filesuffix=file_id,
                            store_monthly_hydro=False
                            )

path = cfg.PATHS['working_dir'] + '/per_glacier'
gdf_sel2 = []
if os.path.exists(path):
    for root, dirs, files in os.walk(path):
        # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
        parts = root.replace(path, '').strip(os.sep).split(os.sep)
        if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
            result_file = os.path.join(root, f'model_diagnostics{file_id}.nc')
            if not os.path.exists(result_file):
                gdf_sel2.append(parts[2])
if len(gdf_sel2) > 0:
    print(f"Removing {len(gdf_sel2)} incomplete glaciers...")
    gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
    print(f"Remaining glaciers: {len(gdirs)}")

# Compile output
ds_hist = utils.compile_run_output(gdirs, input_filesuffix=file_id)
print("Historical simulation complete")


### Remove warming (2015-2024)

In [None]:
climate_filesuffix = '_warm_rm'

workflow.execute_entity_task(
    create_scieno.remove_warming_trend, gdirs,
    ys_start=2015,
    ys_end=2024,
    output_filesuffix=climate_filesuffix      
)


In [None]:
# Run historical simulation
file_id = f'_hist_warm_rm_spinup_{start_idx}-{end_idx-1}'
workflow.execute_entity_task(
                            tasks.run_with_hydro, gdirs,
                            run_task=tasks.run_from_climate_data,
                            climate_filename='climate_historical',
                            climate_input_filesuffix=climate_filesuffix,
                            init_model_filesuffix=spinup_filesuffix,
                            init_model_yr=2000,
                            ref_area_from_y0=True,
                            output_filesuffix=file_id,
                            store_monthly_hydro=False
                            )

path = cfg.PATHS['working_dir'] + '/per_glacier'
gdf_sel2 = []
if os.path.exists(path):
    for root, dirs, files in os.walk(path):
        # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
        parts = root.replace(path, '').strip(os.sep).split(os.sep)
        if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
            result_file = os.path.join(root, f'model_diagnostics{file_id}.nc')
            if not os.path.exists(result_file):
                gdf_sel2.append(parts[2])
if len(gdf_sel2) > 0:
    print(f"Removing {len(gdf_sel2)} incomplete glaciers...")
    gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
    print(f"Remaining glaciers: {len(gdirs)}")

# Compile output
ds_hist = utils.compile_run_output(gdirs, input_filesuffix=file_id)
print("Historical simulation complete")


### Add warming (2005-2014)

In [None]:
climate_filesuffix = '_warm_add'

workflow.execute_entity_task(
    create_scieno.add_warming_trend, gdirs,
    ys_start=2005,
    ys_end=2014,
    output_filesuffix=climate_filesuffix      
)


In [None]:
# Run historical simulation
file_id = f'_hist_warm_add_spinup_{start_idx}-{end_idx-1}'
workflow.execute_entity_task(
                            tasks.run_with_hydro, gdirs,
                            run_task=tasks.run_from_climate_data,
                            climate_filename='climate_historical',
                            climate_input_filesuffix=climate_filesuffix,
                            init_model_filesuffix=spinup_filesuffix,
                            init_model_yr=2000,
                            ref_area_from_y0=True,
                            output_filesuffix=file_id,
                            store_monthly_hydro=False
                            )

path = cfg.PATHS['working_dir'] + '/per_glacier'
gdf_sel2 = []
if os.path.exists(path):
    for root, dirs, files in os.walk(path):
        # Path structure: per_glacier/rgi6/XX/rgi_id/inversion_flowlines.pkl
        parts = root.replace(path, '').strip(os.sep).split(os.sep)
        if len(parts) == 3 and parts[2].startswith('RGI'):  # RGI ID level
            result_file = os.path.join(root, f'model_diagnostics{file_id}.nc')
            if not os.path.exists(result_file):
                gdf_sel2.append(parts[2])
if len(gdf_sel2) > 0:
    print(f"Removing {len(gdf_sel2)} incomplete glaciers...")
    gdirs[:] = [gdir for gdir in gdirs if gdir.rgi_id not in gdf_sel2]
    print(f"Remaining glaciers: {len(gdirs)}")

# Compile output
ds_hist = utils.compile_run_output(gdirs, input_filesuffix=file_id)
print("Historical simulation complete")


### PLot

In [None]:
ds_normal_sp= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_hist_spinup_{start_idx}-{end_idx-1}.nc')
ds_extreme1_sp= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_hist_warm_rm_spinup_{start_idx}-{end_idx-1}.nc')
ds_extreme3_sp= xr.open_dataset(cfg.PATHS['working_dir'] + f'/run_output_hist_warm_add_spinup_{start_idx}-{end_idx-1}.nc')

ds_collect = [ds_normal_sp, ds_extreme1_sp, ds_extreme3_sp]
labels = ['ERA5(spinup)', 'ERA5 + 2015-2024 warming removed(spinup)', 'ERA5 + 2005-2014 warming added (spinup)']
colors = [plt.get_cmap('Accent')(5), plt.get_cmap('Accent')(0), plt.get_cmap('Accent')(6)]
styles = ['-', '-', '-']
widths = [2, 2, 2]

fig, ax = plt.subplots(figsize=(10, 6))

for ds_i, ds_item in enumerate(ds_collect):
    mean_temp = ds_item.sum(
                    dim='rgi_id',
                    skipna=True,
                )
    ax.plot(mean_temp.time.values, mean_temp.volume.values, linewidth=widths[ds_i], color=colors[ds_i], 
                    label=labels[ds_i], linestyle=styles[ds_i], alpha=0.7)
    ax.set_xlabel('Year', fontsize=12)
    ax.set_ylabel(f'Volume (m$^3$)', fontsize=12)

    ax.tick_params(labelsize=10)
    ax.set_xlim([2000, 2026])

plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

for ds_i, ds_item in enumerate(ds_collect):
    mean_temp = ds_item.sum(
                    dim='rgi_id',
                    skipna=True,
                )
    ax.plot(mean_temp.time.values, mean_temp.area.values, linewidth=widths[ds_i], color=colors[ds_i], 
                    label=labels[ds_i], linestyle=styles[ds_i], alpha=0.7)
    ax.set_xlabel('Year', fontsize=12)
    ax.set_ylabel(f'Area (m$^2$)', fontsize=12)

    ax.tick_params(labelsize=10)
    ax.set_xlim([2000, 2026])

plt.legend()
plt.show()