# OGGM - data pulling

## Setting up:

In [3]:
! rm -rf per_glacier 

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import oggm

from oggm import cfg, utils, workflow, tasks, graphics
from oggm import entity_task, global_tasks
from oggm.utils import compile_climate_input
from oggm.core import gis
from oggm.utils import DEM_SOURCES
from pathlib import Path
from os import listdir
from os.path import isfile, join
import pyproj
from pyproj import Transformer
import salem
from tqdm import tqdm 

import os
import logging
from datetime import datetime, timedelta

In [None]:
from oggm import cfg, utils, workflow, tasks, graphics
from oggm import entity_task, global_tasks
from oggm.utils import compile_climate_input
from oggm.core import gis
from oggm.utils import DEM_SOURCES

from pathlib import Path
import os
import logging
from datetime import datetime, timedelta

cfg.initialize(logging_level='WARNING')
cfg.PARAMS['border'] = 10
cfg.PARAMS['use_multiprocessing'] = True 
cfg.PARAMS['continue_on_error'] = True
# Module logger
log = logging.getLogger('.'.join(__name__.split('.')[:-1]))

## Download OGGM data:

In [None]:
# set working directory
cfg.PATHS['working_dir'] = '.'

In [None]:
def decimal_to_datetime(ds):
    datetimes = []
    for dec_year in ds.time.data:
        year = int(dec_year)
        rem = dec_year - year

        base = datetime(year, 1, 1)
        calendar_time = base + timedelta(seconds=(base.replace(year=base.year + 1) - base).total_seconds() * rem)
        datetimes.append(calendar_time)
        
    climate_ds['time'] = datetimes
        
    return climate_ds

In [None]:
@entity_task(log)
def get_gridded_features(gdir):
    """Retrieves and processes gridded and climate data for a gdir in order to create the dataset
    to be fed to machine learning models.
    """
    # Retrieve gridded data
    with xr.open_dataset(gdir.get_filepath("gridded_data")) as ds:
        gridded_ds = ds.load()
    # Retrieve climate data
    with xr.open_dataset(gdir.get_filepath("climate_historical")) as ds:
        climate_ds = ds.load()

    #### Climate data ####
    # First we start by processing the climate data
    # Trim climate dataset to desired period of the Hugonnet et al. (2021) dataset
    climate_ds = climate_ds.sel(time=slice("2000-01-01", "2019-12-01"))

    # Now we downscale the climate data to the specific glacier
    # Temperature
    temps_2D = np.empty(
        (
            climate_ds.temp.data.size,
            gridded_ds.topo.data.shape[0],
            gridded_ds.topo.data.shape[1],
        )
    )
    i = 0
    for temp in climate_ds.temp.data:
        temps_2D[i, :, :] = np.tile(temp, gridded_ds.topo.data.shape) + 6.0 / 1000.0 * (
            gridded_ds.topo.data - climate_ds.ref_hgt.data
        )
        i = i + 1

    PDD_2D = np.sum(np.where(temps_2D > 0.0, temps_2D, 0.0), axis=0)

    # Rain
    rain_period_2D = np.empty(
        (
            climate_ds.prcp.data.size,
            gridded_ds.topo.data.shape[0],
            gridded_ds.topo.data.shape[1],
        )
    )
    i = 0
    for prcp in climate_ds.prcp.data:
        rain_period_2D[i, :, :] = np.tile(prcp, gridded_ds.topo.data.shape)
        i = i + 1

    rain_2D = np.sum(rain_period_2D, axis=0)

    # Snow
    snow_2D = np.where(temps_2D < 0.0, rain_period_2D, 0.0)

    # Now we create a dictionary with the full dataset, including the previous climate data
    # and all the interesting gridded datasets
    training_data = {
        "PDD_2D": PDD_2D,
        "rain_2D": rain_2D,
        "snow_2D": snow_2D,
        "topo": gridded_ds.topo.data,
        "aspect": gridded_ds.aspect.data,
        "slope": gridded_ds.slope.data,
        "dis_from_border": gridded_ds.dis_from_border.data,
        "glacier_mask": gridded_ds.glacier_mask.data,
        "millan_ice_thickness": gridded_ds.millan_ice_thickness.data,
        "hugonnet_dhdt": gridded_ds.hugonnet_dhdt.data,
        "ID": gdir.rgi_id,
    }
    print(gridded_ds.keys())

    return training_data

### Set RGI version and region:

In [None]:
rgi_region = "11"  # Central Europe
rgi_version = "6"
rgi_dir = utils.get_rgi_dir(version=rgi_version)

In [7]:

path = utils.get_rgi_region_file(region=rgi_region, version=rgi_version)
rgidf = gpd.read_file(path)

# We use the directories with the shop data in it: "W5E5_w_data"
base_url = "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5_w_data/"
gdirs = workflow.init_glacier_directories(
    rgidf,
    from_prepro_level=3,
    prepro_base_url=base_url,
    prepro_border=10,
    reset=True,
    force=True,
)

KeysView(<xarray.Dataset>
Dimensions:                  (x: 69, y: 95)
Coordinates:
  * x                        (x) float32 -1.026e+03 -1.002e+03 ... 581.8 605.8
  * y                        (y) float32 5.031e+06 5.031e+06 ... 5.029e+06
Data variables: (12/20)
    topo                     (y, x) float32 2.228e+03 2.225e+03 ... 3.35e+03
    topo_smoothed            (y, x) float32 2.235e+03 2.235e+03 ... 3.317e+03
    topo_valid_mask          (y, x) int8 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1
    glacier_mask             (y, x) int8 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0
    glacier_ext              (y, x) int8 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0
    consensus_ice_thickness  (y, x) float32 nan nan nan nan ... nan nan nan nan
    ...                       ...
    aspect                   (y, x) float32 0.1924 0.234 0.2315 ... 5.341 5.51
    slope_factor             (y, x) float32 3.872 3.872 3.872 ... 2.172 2.438
    dis_from_border          (y, x) float32 687.7 666.8 646.2 ... 596.

In [None]:
# Tested tasks
task_list = [
    tasks.gridded_attributes,
    tasks.gridded_mb_attributes,
    get_gridded_features,
]
for task in task_list:
    workflow.execute_entity_task(task, gdirs, print_log=False)

## Get attributes from all stakes in Switzerland:

### Add OGGM data to Swiss stakes:

In [None]:
# variables of interest from oggm
voi = ["aspect", "slope", "dis_from_border", "topo"]

#### Single stakes:

In [None]:
# Add additionnal information to all stakes:
path_latloncoord = (
    "../../../data/MB_modeling/GLAMOS/index/csv_files/massbalance/WGSlatloncoord/"
)
path_save = "../../../data/MB_modeling/GLAMOS/index/csv_files/massbalance/glacierattr/"
# First empty folder
emptyfolder(path_save)

for i in tqdm(range(len(glStakes.keys()))):
    key = list(glStakes.keys())[i]
    for fileName in glStakes[key]:
        coi = [
            "vaw_id",
            "sgi_id",
            "rgi_id",
            "glims_id",
            "date_fix0",
            "date_fix1",
            "date0",
            "date1",
            "date_smeas",
            "lat",
            "lon",
            "height",
            "b_a_fix",
            "b_w_fix",
        ]
        df_stake = read_stake_csv(path_latloncoord, fileName, coi)

        # coordinates of stake:
        lat_stake = df_stake.lat.unique()[0]
        lon_stake = df_stake.lon.unique()[0]

        # RGI ID of stake
        rgi_id_stake = df_stake.rgi_id.iloc[0]

        # get oggm data for that RGI ID
        for gdir in gdirs:
            if gdir.rgi_id == rgi_id_stake:
                break
        with xr.open_dataset(gdir.get_filepath("gridded_data")) as ds:
            ds = ds.load()

        # transform stake coord to glacier system:
        transf = pyproj.Transformer.from_proj(salem.wgs84,
                                              gdir.grid.proj,
                                              always_xy=True)
        x_stake, y_stake = transf.transform(lon_stake, lat_stake)  # x,y stake

        # Get glacier variables closest to these coordinates:
        stake = ds.sel(x=x_stake, y=y_stake, method="nearest")

        # Calculate min, max and median topography of glacier:
        min_glacier = ds.where(ds.glacier_mask == 1).topo.min().values
        max_glacier = ds.where(ds.glacier_mask == 1).topo.max().values
        med_glacier = ds.where(ds.glacier_mask == 1).topo.median().values

        # Select variables of interest:
        stake_var = stake[voi]
        stake_var_df = stake_var.to_pandas()

        stake_var_df['min_el_gl'] = min_glacier
        stake_var_df['max_el_gl'] = max_glacier
        stake_var_df['med_el_gl'] = med_glacier

        for var in stake_var_df.index:
            df_stake[var] = [
                stake_var_df.loc[var] for i in range(len(df_stake))
            ]
        df_stake.to_csv(path_save + fileName)

#### Multi stakes:

In [None]:
# Add additionnal information to all stakes:
path_latloncoord = (
    "../../../data/MB_modeling_multi/GLAMOS/index_time/csv_files/WGSlatloncoord/"
)
path_save = "../../../data/MB_modeling_multi/GLAMOS/index_time/csv_files/glacierattr/"
# First empty folder
emptyfolder(path_save)

for i in tqdm(range(len(glStakes.keys()))):
    key = list(glStakes.keys())[i]
    for fileName in glStakes[key]:
        coi = [
            "vaw_id",
            "sgi_id",
            "rgi_id",
            "glims_id",
            "date_fix0",
            "date_fix1",
            "date0",
            "date1",
            "date_smeas",
            "lat",
            "lon",
            "height",
            "b_a_fix",
            "b_w_fix",
        ]
        df_stake = read_stake_csv(path_latloncoord, fileName, coi)

        # coordinates of stake:
        lat_stake = df_stake.lat.unique()[0]
        lon_stake = df_stake.lon.unique()[0]
        
        # RGI ID of stake
        rgi_id_stake = df_stake.rgi_id.iloc[0]
        
        # get oggm data for that RGI ID

        for gdir in gdirs:
            if gdir.rgi_id == rgi_id_stake:
                break
        with xr.open_dataset(gdir.get_filepath("gridded_data")) as ds:
            ds = ds.load()

        # transform stake coord to glacier system:
        transf = pyproj.Transformer.from_proj(
            salem.wgs84, gdir.grid.proj, always_xy=True
        )
        x_stake, y_stake = transf.transform(lon_stake, lat_stake) # x,y stake

        # Get glacier variables closest to these coordinates:
        stake = ds.sel(x=x_stake, y=y_stake, method="nearest")
        
        # Calculate min, max and median topography of glacier:
        min_glacier = ds.where(ds.glacier_mask == 1).topo.min().values
        max_glacier = ds.where(ds.glacier_mask == 1).topo.max().values
        med_glacier = ds.where(ds.glacier_mask == 1).topo.median().values

        # Select variables of interest:
        stake_var = stake[voi]
        stake_var_df = stake_var.to_pandas()
        
        stake_var_df['min_el_gl'] = min_glacier
        stake_var_df['max_el_gl'] = max_glacier
        stake_var_df['med_el_gl'] = med_glacier

        for var in stake_var_df.index:
            df_stake[var] = [stake_var_df.loc[var] for i in range(len(df_stake))]
        df_stake.to_csv(path_save + fileName)

In [None]:
stake_var_df

In [None]:
ds.where(ds.glacier_mask == 1).topo.plot()

In [None]:
fileName = 'pers_P24_mb.csv'
df_stake = read_stake_csv(path_latloncoord, fileName, coi)
rgi_id_stake = df_stake.rgi_id.iloc[0]
print(rgi_id_stake)

for gdir in gdirs:
    if gdir.rgi_id == rgi_id_stake:
        break
with xr.open_dataset(gdir.get_filepath("gridded_data")) as ds:
    ds = ds.load()
ds

In [None]:
df_stake.head()

#### Example of Aletsch :

In [None]:
# Example Aletsch
# Get coordinates and time of file for this stake:
fileName = "aletsch_P0_mb.csv"
coi = [
    "glims_id",
    "sgi_id",
    "rgi_id",
    "glims_id",
    "date_fix0",
    "date_fix1",
    "date0",
    "date1",
    "lat",
    "lon",
    "height",
    "b_a_fix",
    "b_w_fix"
]
df_stake = read_stake_csv(path_latloncoord, fileName, coi)

print(lat_stake, lon_stake, rgi_id_stake)

for gdir in gdirs:
    # print(gdir.name)
    if gdir.rgi_id == rgi_id_stake:
        break
with xr.open_dataset(gdir.get_filepath("gridded_data")) as ds:
    ds = ds.load()

# transform stake coord to glacier system:
transf = pyproj.Transformer.from_proj(salem.wgs84, gdir.grid.proj, always_xy=True)
x_stake, y_stake = transf.transform(lon_stake, lat_stake)

# Get glacier variables at these coordinates:
stake = ds.sel(x=x_stake, y=y_stake, method="nearest")

# variables of interest:
voi = ["aspect", "slope", "dis_from_border", "topo"]
stake_var = stake[voi]
stake_var_df = stake_var.to_pandas()
stake_var_df