# OGGM - data pulling

## Setting up:

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import pyproj
from pyproj import Transformer
import salem
from tqdm.notebook import tqdm
import logging

from oggm import cfg, utils, workflow, tasks

cfg.initialize(logging_level='WARNING')
cfg.PARAMS['border'] = 10
cfg.PARAMS['use_multiprocessing'] = True
cfg.PARAMS['continue_on_error'] = True
# Module logger
log = logging.getLogger('.'.join(__name__.split('.')[:-1]))

2024-08-20 11:34:47: oggm.cfg: Reading default parameters from the OGGM `params.cfg` configuration file.
2024-08-20 11:34:47: oggm.cfg: Multiprocessing switched OFF according to the parameter file.
2024-08-20 11:34:47: oggm.cfg: Multiprocessing: using all available processors (N=32)
2024-08-20 11:34:47: oggm.cfg: PARAMS['border'] changed from `80` to `10`.
2024-08-20 11:34:47: oggm.cfg: Multiprocessing switched ON after user settings.
2024-08-20 11:34:47: oggm.cfg: PARAMS['continue_on_error'] changed from `False` to `True`.


## Download OGGM data:

In [2]:
# set working directory
working_dir = '../../../data/OGGM/'
cfg.PATHS['working_dir'] = working_dir

# Set RGI version and region:
rgi_region = "11"  # Central Europe
rgi_version = "6"
rgi_dir = utils.get_rgi_dir(version=rgi_version)

In [3]:
path = utils.get_rgi_region_file(region=rgi_region, version=rgi_version)
rgidf = gpd.read_file(path)

# We use the directories with the shop data in it: "W5E5_w_data"
base_url = "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5_w_data/"
gdirs = workflow.init_glacier_directories(
    rgidf,
    from_prepro_level=3,
    prepro_base_url=base_url,
    prepro_border=10,
    reset=True,
    force=True,
)

2024-08-20 11:34:48: oggm.workflow: init_glacier_directories from prepro level 3 on 3927 glaciers.
2024-08-20 11:34:49: oggm.workflow: Execute entity tasks [gdir_from_prepro] on 3927 glaciers


In [4]:
# Tested tasks
task_list = [
    tasks.gridded_attributes,
    tasks.gridded_mb_attributes,
    # get_gridded_features,
]
for task in task_list:
    workflow.execute_entity_task(task, gdirs, print_log=False)

2024-08-20 11:35:14: oggm.workflow: Execute entity tasks [gridded_attributes] on 3927 glaciers
2024-08-20 11:35:31: oggm.workflow: Execute entity tasks [gridded_mb_attributes] on 3927 glaciers


In [34]:
# Load PMB data:
path_PMB_GLAMOS_csv = '../../../data/GLAMOS/point/csv/'
df_pmb = pd.read_csv(path_PMB_GLAMOS_csv + 'df_pmb_60s_clean.csv')
df_pmb.head(3)

Unnamed: 0,YEAR,POINT_ID,GLACIER,FROM_DATE,TO_DATE,POINT_LAT,POINT_LON,POINT_ELEVATION,POINT_BALANCE,PERIOD,RGIId
0,2006,100,adler,20051017,20061011,46.010637,7.855896,3096.507742,-2592,annual,RGI60-11.02764
1,2006,200,adler,20051017,20061011,46.010052,7.858628,3141.50652,-2502,annual,RGI60-11.02764
2,2006,300,adler,20051017,20061011,46.010646,7.860957,3191.502735,-2592,annual,RGI60-11.02764


In [35]:
# variables of interest from oggm
voi = ["aspect", "slope", "dis_from_border", "topo"]

# Initialise empty:
for var in voi:
    df_pmb[var] = np.nan
    
for i, row in tqdm(enumerate(df_pmb.iterrows()), desc='rows'):
    # Get info of that sample:
    RGI = df_pmb.iloc[i].RGIId
    POINT_LAT, POINT_LON = df_pmb.iloc[i].POINT_LAT, df_pmb.iloc[i].POINT_LON

    # Get oggm data for that RGI ID
    for gdir in gdirs:
        if gdir.rgi_id == RGI:
            break
    # gdir = find_gdir(gdirs, RGI)
        
    with xr.open_dataset(gdir.get_filepath("gridded_data")) as ds:
        ds = ds.load()

    # Transform stake coord to glacier system:
    transf = pyproj.Transformer.from_proj(salem.wgs84,
                                          gdir.grid.proj,
                                          always_xy=True)
    x_stake, y_stake = transf.transform(POINT_LON, POINT_LAT)  # x,y stake

    # Get glacier variables closest to these coordinates:
    stake = ds.sel(x=x_stake, y=y_stake, method="nearest")

    # Calculate min, max and median topography of glacier:
    # min_glacier = ds.where(ds.glacier_mask == 1).topo.min().values
    # max_glacier = ds.where(ds.glacier_mask == 1).topo.max().values
    # med_glacier = ds.where(ds.glacier_mask == 1).topo.median().values
    
    # Select variables of interest:
    stake_var = stake[voi]
    stake_var_df = stake_var.to_pandas()

    for var in stake_var_df.index:
        df_pmb.at[i, var] = stake_var_df.loc[var]
        
df_pmb.head()


rows: 9809it [02:03, 79.23it/s]


Unnamed: 0,YEAR,POINT_ID,GLACIER,FROM_DATE,TO_DATE,POINT_LAT,POINT_LON,POINT_ELEVATION,POINT_BALANCE,PERIOD,RGIId,aspect,slope,dis_from_border,topo
0,2006,100,adler,20051017,20061011,46.010637,7.855896,3096.507742,-2592,annual,RGI60-11.02764,4.441578,0.340053,120.0,3039.0
1,2006,200,adler,20051017,20061011,46.010052,7.858628,3141.50652,-2502,annual,RGI60-11.02764,4.772892,0.219036,30.0,3091.0
2,2006,300,adler,20051017,20061011,46.010646,7.860957,3191.502735,-2592,annual,RGI60-11.02764,4.480725,0.306125,150.0,3143.0
3,2007,100,adler,20061011,20071009,46.010602,7.855728,3093.507997,-2250,annual,RGI60-11.02764,4.399832,0.355558,108.166542,3026.0
4,2007,200,adler,20061011,20071009,46.010169,7.858461,3124.50639,-1836,annual,RGI60-11.02764,4.717781,0.213309,60.0,3084.0


In [39]:
# Save to csv:
df_pmb.to_csv(path_PMB_GLAMOS_csv + 'CH_wgms_dataset.csv', index=False)
df_pmb.head(3)

Unnamed: 0,YEAR,POINT_ID,GLACIER,FROM_DATE,TO_DATE,POINT_LAT,POINT_LON,POINT_ELEVATION,POINT_BALANCE,PERIOD,RGIId,aspect,slope,dis_from_border,topo
0,2006,100,adler,20051017,20061011,46.010637,7.855896,3096.507742,-2592,annual,RGI60-11.02764,4.441578,0.340053,120.0,3039.0
1,2006,200,adler,20051017,20061011,46.010052,7.858628,3141.50652,-2502,annual,RGI60-11.02764,4.772892,0.219036,30.0,3091.0
2,2006,300,adler,20051017,20061011,46.010646,7.860957,3191.502735,-2592,annual,RGI60-11.02764,4.480725,0.306125,150.0,3143.0
