# Convert Norway dataset to the standard format of MBM

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import datetime as dt
import massbalancemachine as mbm

In [None]:
# Get filepath and filename of raw data
filepath = 'C:/Users/kasj/MassBalanceMachine/regions/Norway/data/'
filename = '2023-08-28_stake_mb_norway_cleaned_ids_latlon_wattributes_climate.csv'

# Read DataFrame
data = pd.read_csv(filepath + filename)

In [None]:
# Drop columns not needed for MBM
drop_cols = ['GLIMSID','BREID', 'utm_zone', 'utm_east_approx', 'utm_north_approx', 'altitude_approx', 'location_description', 'location_id', 'stake_no',
             'utm_east', 'utm_north', 'stake_remark', 'flag_correction', 'approx_loc', 'approx_altitude', 'diff_north', 'diff_east', 'diff_altitude',
             'diff_netto', 'lat_approx', 'lon_approx', 'topo', 'slope_factor', 'dis_from_border']

data = data.drop(columns=drop_cols)

In [None]:
# Rename columns in WGMS format
data = data.rename(columns={'RGIID':'RGIId', 'altitude':'POINT_ELEVATION', 'lat':'POINT_LAT', 'lon':'POINT_LON', 'altitude_climate':'ALTITUDE_CLIMATE'})

In [None]:
# Get hydrological year from current year
data['YEAR']=pd.to_datetime(data['dt_curr_year_max_date'].astype('string'), format="%d.%m.%Y %H:%M")
data['YEAR'] = data.YEAR.dt.year.astype('Int64')

In [None]:
# Add measurement id to know which annual, summer, winter belong together
data['MEASUREMENT_ID'] = np.arange(0, len(data))

In [None]:
data["ELEVATION_DIFFERENCE"] = data["ALTITUDE_CLIMATE"] - data["POINT_ELEVATION"]
data

In [None]:
# Get separate dataframes for annual, winter and summer mass balance
data_annual = data[data['balance_netto'].notna()]
data_winter = data[data['balance_winter'].notna()]
data_summer = data[data['balance_summer'].notna()]

In [None]:
# Align dataset names
data_annual = data_annual.drop(columns=['balance_winter', 'balance_summer', 'dt_curr_year_max_date'])
data_annual = data_annual.rename(columns={'balance_netto':'POINT_BALANCE','dt_prev_year_min_date':'FROM_DATE','dt_curr_year_min_date':'TO_DATE'})

data_winter = data_winter.drop(columns=['balance_summer', 'balance_netto', 'dt_curr_year_min_date'])
data_winter = data_winter.rename(columns={'balance_winter':'POINT_BALANCE','dt_prev_year_min_date':'FROM_DATE','dt_curr_year_max_date':'TO_DATE'})

data_summer = data_summer.drop(columns=['balance_winter', 'balance_netto', 'dt_prev_year_min_date'])
data_summer = data_summer.rename(columns={'balance_summer':'POINT_BALANCE', 'dt_curr_year_max_date':'FROM_DATE', 'dt_curr_year_min_date':'TO_DATE'})

In [None]:
data_annual


In [None]:
# Convert date format from DD.MM.YYYY HH:HH to 'YYYYMMDD'
data_annual['FROM_DATE']=pd.to_datetime(data_annual['FROM_DATE'], dayfirst=True)
data_annual['TO_DATE']=pd.to_datetime(data_annual['TO_DATE'], dayfirst=True)

data_annual['FROM_DATE'] = data_annual['FROM_DATE'].dt.strftime('%Y%m%d')
data_annual['TO_DATE'] = data_annual['TO_DATE'].dt.strftime('%Y%m%d')

data_summer['FROM_DATE']=pd.to_datetime(data_summer['FROM_DATE'], dayfirst=True)
data_summer['TO_DATE']=pd.to_datetime(data_summer['TO_DATE'], dayfirst=True)

data_summer['FROM_DATE'] = data_summer['FROM_DATE'].dt.strftime('%Y%m%d')
data_summer['TO_DATE'] = data_summer['TO_DATE'].dt.strftime('%Y%m%d')

data_winter['FROM_DATE']=pd.to_datetime(data_winter['FROM_DATE'], dayfirst=True)
data_winter['TO_DATE']=pd.to_datetime(data_winter['TO_DATE'], dayfirst=True)

data_winter['FROM_DATE'] = data_winter['FROM_DATE'].dt.strftime('%Y%m%d')
data_winter['TO_DATE'] = data_winter['TO_DATE'].dt.strftime('%Y%m%d')

In [None]:
#data_annual['POINT_ELEVATION'] = data_annual['POINT_ELEVATION'].astype(int)
#data_winter['POINT_ELEVATION'] = data_winter['POINT_ELEVATION'].astype(int)
#data_summer['POINT_ELEVATION'] = data_summer['POINT_ELEVATION'].astype(int)

In [None]:
data_winter['BALANCE_CODE']='BW'
data_summer['BALANCE_CODE']='BS'
data_annual['BALANCE_CODE']='BA'

In [None]:
data_cleaned = pd.concat([data_annual, data_winter, data_summer])

In [None]:
data_cleaned['POINT_ID'] = np.arange(0, len(data_cleaned))

In [None]:
data_cleaned.reset_index(inplace=True)
data_cleaned

In [None]:
# Get filepath and filename of raw data
filepath = 'C:/Users/kasj/MassBalanceMachine/regions/Norway/data/'
filename_save = '2024-09-11_stake_dataset_Norway.csv'

# Read DataFrame
data_cleaned.to_csv(filepath + filename_save)