## Setting Up:

In [4]:
import pandas as pd
import os
import warnings
from tqdm.notebook import tqdm
import re
import massbalancemachine as mbm
import geopandas as gpd
from shapely.geometry import Polygon, LineString, Point

from scripts.helpers import *
from scripts.glamos_preprocess import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
df_pm = pd.read_csv(path_PMB_GLAMOS_csv + 'CH_wgms_dataset_topo.csv')
# Filter to start of MS data (1961):
df_pm = df_pm[df_pm.YEAR > 1960].sort_values(by = ['GLACIER', 'YEAR'], ascending = [True, True])

print('Number of winter and annual samples:', len(df_pm))
print('Number of annual samples:',
      len(df_pm[df_pm.PERIOD == 'annual']))
print('Number of winter samples:',
      len(df_pm[df_pm.PERIOD == 'winter']))
df_pm.head()

Number of winter and annual samples: 10918
Number of annual samples: 9063
Number of winter samples: 1855


Unnamed: 0,YEAR,POINT_ID,GLACIER,FROM_DATE,TO_DATE,POINT_LAT,POINT_LON,POINT_ELEVATION,POINT_BALANCE,PERIOD,RGIId,aspect,slope,dis_from_border,topo
5524,2006,100,adler,2005-10-17,2006-10-11,46.010637,7.855896,3096.507742,-2592,annual,RGI60-11.02764,,,,
5525,2006,200,adler,2005-10-17,2006-10-11,46.010052,7.858628,3141.50652,-2502,annual,RGI60-11.02764,,,,
5526,2006,300,adler,2005-10-17,2006-10-11,46.010646,7.860957,3191.502735,-2592,annual,RGI60-11.02764,,,,
5527,2007,100,adler,2006-10-11,2007-10-09,46.010602,7.855728,3093.507997,-2250,annual,RGI60-11.02764,,,,
5528,2007,200,adler,2006-10-11,2007-10-09,46.010169,7.858461,3124.50639,-1836,annual,RGI60-11.02764,,,,


In [26]:
# Get stakes per glacier
glStakes = {}
groupedByGL = df_pm.groupby(
    ['GLACIER', 'POINT_ID']).size().reset_index().sort_values(by=0,
                                                              ascending=False)
for gl in df_pm.GLACIER.unique():
    glStakes[gl] = groupedByGL[groupedByGL.GLACIER == gl].POINT_ID.unique()
glStakes

{'adler': array(['200', '400', '300', '600', '100', 's3-037', '600o', 'Ag-200',
        'Ag-400', 'Ag-600', 'FS193', 's3-256', '071', '059', '051', '038',
        '037', 'gpr1433', '200o', '079', 's3-25', 's4-23', 's4-32',
        's3-017', 's3-11', 's3-18', 's4-42'], dtype=object),
 'aletsch': array(['P3', 'P5', 'P9', 'P10', 'P11', 'PXX', 'EGIG', 'F', '76253', 'X',
        'G', '76353', 'AAA', 'P', '72254', 'AT', 'AR', 'BBB', '76454', 'J',
        'CCC', '77651', '76453', '72354', '77650', '6927', '75652',
        '72454', 'Z_61', 'R_64', 'AA', 'P9_59', 'W', 'T_61', 'DD', 'PP',
        'LL_69', 'G_64', 'EEE', 'CC', 'AV', 'Z', 'T', 'QQ_61', 'AS',
        '76452', 'R', '72253', 'V_61', 'Y_64', 'DDD', '6408', 'C', 'K',
        'AQ', 'W_61', 'U_61', '75651', '73650', '72453', '73653', 'J_57',
        '76352', 'H_IV', '72452', '73651', '73652', 'S_61', '6926',
        '75650', 'O_63', '73751', '77751', 'UU', 'V', 'D', 'SS', 'HH_69',
        '72252', '72251', 'Y', 'RR', 'P_64', 'V_69', 'C_6

In [50]:
# Get stakes per elevation bands:
N_bands = 10
df_aletsch = df_pm[df_pm.GLACIER == 'aletsch']
df_aletsch['EL_BIN'] = pd.cut(df_aletsch.POINT_ELEVATION, N_bands, precision=0, labels = [f'EL_{i}' for i in range(N_bands)])
df_aletsch

# save aletsch coordinates
df_aletsch[['GLACIER', 'POINT_ID', 'POINT_LAT', 'POINT_LON',
              'PERIOD', 'YEAR', 'EL_BIN']].to_csv(path_PMB_GLAMOS_csv + 'coordinates_aletsch.csv')