# Compare remotely-sensed time series to new PyGEM runs

In [None]:
import os
import glob
import xarray as xr
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Define file paths
data_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
sites_path = os.path.join(data_path, 'study-sites')
model_path = os.path.join(data_path, 'Brandon_new_PyGEM_runs')
figures_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/glacier-snow-cover-analysis/figures'

In [None]:
# Load glacier IDs from model runs
rgi_ids = [x for x in sorted(os.listdir(model_path)) if os.path.isdir(os.path.join(model_path, x))]
names = ['Gulkana', 'Wolverine', 'Lemon Creek', 'Sperry', 'South Cascade']
print('RGI IDs for glaciers with PyGEM runs:', rgi_ids)

# Define function to find closest SLA within 7 days
def find_closest_sla(time_values, obs_df, max_days=7):
    sla_obs = []
    for t in time_values:
        if pd.isnull(t):
            sla_obs.append(np.nan)
            continue
        diffs = np.abs(obs_df.index - t)
        closest_idx = diffs.argmin()
        if diffs[closest_idx] <= pd.Timedelta(days=max_days):
            sla_obs.append(obs_df.iloc[closest_idx]['SLA_from_AAR_m'])
        else:
            sla_obs.append(np.nan)
    return sla_obs

# Iterate over RGI IDs
for i, rgi_id in enumerate(rgi_ids):
    name = names[i]
    print(name, rgi_id)
    run_fns = sorted(glob.glob(os.path.join(model_path, rgi_id, '*.nc')))

    # Define output file
    out_fn = os.path.join(data_path, 'analysis', f"{rgi_id}_PyGEM_comparison.nc")
    if not os.path.exists(out_fn):

        # Compile modeled snowline altitudes and ELAs
        model_runs_list = []
        # iterate over model runs
        for fn in tqdm(run_fns):
            ds = xr.open_dataset(fn)
            ds['time'] = ds.indexes['time'].to_datetimeindex()
            # load model_parameters
            params = json.loads(ds.model_parameters)
            kp = params["kp"]
            tbias = params["tbias"]
            ddfsnow = params["ddfsnow"]
            ddfice = params["ddfice"]
            tsnow_threshold = params["tsnow_threshold"]
            precgrad = params["precgrad"]
            # extract snowline and ELA variables
            snowline = ds['glac_snowline_monthly']
            ela = ds['glac_ELA_annual']
            # create a new dataset with parameters and add to list
            run_ds = xr.Dataset({
                'glac_snowline_monthly': snowline,
                'glac_ELA_annual': ela,
                'kp': xr.DataArray(kp, dims=()),
                'tbias': xr.DataArray(tbias, dims=()),
                'ddfsnow': xr.DataArray(ddfsnow, dims=()),
                'ddfice': xr.DataArray(ddfice, dims=()),
            })
            model_runs_list.append(run_ds)
        # combine all runs into a single dataset
        combined_ds = xr.concat(model_runs_list, dim='run')
        # trim to post-2013, May to November (no observed snowline data outside then)
        combined_ds = combined_ds.sel(time=slice('2013-01-01', None))
        combined_ds = combined_ds.sel(time=combined_ds['time.month'].isin([5, 6, 7, 8, 9, 10]))
        # add glacier ID
        combined_ds['rgi_id'] = xr.DataArray(rgi_id, dims=())    

        # Load observed snow cover data
        scs_fn = os.path.join(sites_path, f"RGI60-0{rgi_id}", f"RGI60-0{rgi_id}_snow_cover_stats.csv")
        scs = pd.read_csv(scs_fn)
        scs['datetime'] = pd.to_datetime(scs['datetime'])
        scs = scs.set_index('datetime')

        # Sample SLA for each model time
        model_times = combined_ds['time'].values
        sla_obs = find_closest_sla(model_times, scs)

        # Create DataArray of observed SLAs with just time dimension
        sla_obs_da = xr.DataArray(
            data=sla_obs,
            dims='time',
            coords={'time': combined_ds['time']},
            name='observed_SLA'
        )

        # Merge with modeled dataset
        combined_ds['observed_SLA'] = sla_obs_da

        # Calculate modeled - observed SLAs
        combined_ds['mod-obs_SLA'] = combined_ds['glac_snowline_monthly'] - combined_ds['observed_SLA']

        # Save to file
        combined_ds.to_netcdf(out_fn)
        print('Combined dataset saved to file:', out_fn)
    else:
        # Load existing dataset
        combined_ds = xr.open_dataset(out_fn)
        print('Combined dataset loaded from file:', out_fn)

    # Load original model parameters
    modelprms_fn = os.path.join(model_path, '..', 'Rounce_et_al_2023', 'modelprms', f"{rgi_id}-modelprms_dict.pkl")
    modelprms = pd.read_pickle(modelprms_fn)

    # Calculate RMSE for each run
    diff = combined_ds['mod-obs_SLA']
    rmse_by_run = np.sqrt((diff**2).mean(dim='time'))
    combined_ds['rmse'] = rmse_by_run

    # Plot RMSE as a function of tbias and ddfsnow
    # create DataFrame for plotting
    df_plot = combined_ds[['tbias', 'ddfsnow', 'rmse']].to_dataframe().reset_index()
    # drop rows with NaN RMSE (i.e., where obs SLA missing for all times)
    df_plot = df_plot.dropna(subset=['rmse'])
    # identify row with lowest RMSE
    df_plot_best = df_plot.loc[df_plot['rmse'].idxmin()]
    # scatter plot
    fig = plt.figure(figsize=(8, 6))
    scatter = plt.scatter(
        df_plot['tbias'], df_plot['ddfsnow'],
        c=df_plot['rmse'], cmap='viridis_r', s=80, edgecolor='gray', linewidth=0.5
    )
    # plot the original parameter combination
    plt.plot(modelprms['emulator']['tbias'], modelprms['emulator']['ddfsnow'], 's', 
             markersize=15, markeredgecolor='m', markerfacecolor='None', markeredgewidth=2, label='Original')
    # plot the optimal parameter combination
    plt.plot(df_plot_best['tbias'], df_plot_best['ddfsnow'], '*', 
             markersize=15, markeredgecolor='m', markerfacecolor='None', markeredgewidth=2, label='Lowest RMSE')
    plt.legend()
    plt.xlabel('tbias')
    plt.ylabel('ddfsnow')
    plt.title(f'{name} ({rgi_id})')
    cbar = plt.colorbar(scatter)
    cbar.set_label('Snowline altitude RMSE (m)')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    # Save figure to file
    fig_fn = os.path.join(figures_path, f"{rgi_id}_PyGEM_comparison.png")
    fig.savefig(fig_fn, dpi=300, bbox_inches='tight')
    print('Figure saved to file:', fig_fn)
    plt.close(fig)

