# Create dictionaries for SUMup and SNOWPACK

#### Author: Megan Thompson-Munson
#### Date created: 20 September 2021

This script reads in SUMup observation data and SNOWPACK output, reformats the data, and creates dictionaries that are saved as pickle files.

TO DO:
- Add CFM capability
- Add ```.smet``` capability
- Add more comments about input/output

## User input

In [11]:
# BEGIN

# Select ice sheet
icesheet = 'AIS'

# Give path of SNOWPACK output data
path = '/projects/metm9666/snowpack/Scripts/Spinup/output/'

# END

In [12]:
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime
import pickle
from scipy.interpolate import griddata
from os.path import exists

## 1. Read in SUMup data

In [13]:
sumup = pickle.load(open(icesheet+'_SUMup.p','rb'))

In [14]:
# SUMup metadata for finding closest MERRA-2 point
suMeta = np.empty((len(sumup),4))

for i in range(len(sumup)):
    
    suData = sumup[i]

    suMeta[i,0] = suData['CoreID']
    suMeta[i,1] = suData['Timestamp']
    suMeta[i,2] = suData['Latitude']
    suMeta[i,3] = suData['Longitude']

df_suMeta = pd.DataFrame(suMeta,columns=['suID','suTimestamp','suLatitude','suLongitude'])
df_suMeta['suTimestamp'] = pd.to_datetime(df_suMeta.suTimestamp)

In [15]:
df_suMeta

Unnamed: 0,suID,suTimestamp,suLatitude,suLongitude
0,0.0,2011-12-28,-79.446800,-117.963501
1,1.0,2011-12-12,-79.347900,-116.290497
2,2.0,2011-12-24,-78.836899,-116.307098
3,3.0,2011-12-14,-78.727997,-114.732201
4,4.0,2011-12-20,-78.424301,-115.292000
...,...,...,...,...
889,1533.0,2009-02-04,-70.888000,133.285004
890,1534.0,2009-02-06,-69.827003,134.201996
891,1535.0,2009-02-08,-68.014999,136.464005
892,1536.0,2009-02-08,-67.415001,138.602005


## 2. Collect SUMup and SNOWPACK metadata

In [29]:
# Read in MERRA-2 location data
if icesheet == 'AIS':
    df_merra2locs = pd.read_table('AIS_station_list.lst',skiprows=1,delim_whitespace=True,usecols=[0,3,4],names=['Station','Latitude','Longitude'])
if icesheet == 'GrIS':
    df_merra2locs = pd.read_table('GrIS_station_list.lst',skiprows=1,delim_whitespace=True,usecols=[0,3,4],names=['Station','Latitude','Longitude'])

# Extract VIRs
VIRs = []
for i in range(len(df_merra2locs)):
    VIR = df_merra2locs.Station[i][8:]
    VIRs.append(VIR)
df_merra2locs['VIR'] = VIRs
df_merra2locs.drop(columns=['Station'])

# Haversine formula for calculating distance between two points on Earth
def haversine(lat1,lon1,lat2,lon2):
    phi1 = np.deg2rad(lat1)
    phi2 = np.deg2rad(lat2)
    theta1 = np.deg2rad(lon1)
    theta2 = np.deg2rad(lon2)
    del_phi = phi2-phi1
    del_theta = theta2-theta1
    a = np.sin(del_phi/2)**2 + (np.cos(phi1)*np.cos(phi2)*np.sin(del_theta/2)**2)
    c = 2*np.arctan2(np.sqrt(a),np.sqrt(1-a))
    d = (6371e3)*c # Earth's radius in meters
    return d # Meters

# Function for finding closest MERRA-2 location to given SUMup location
def closest_location(sumuplat,sumuplon):
    distance = []
    for i in range(len(df_merra2locs)):
        lat1 = sumuplat
        lon1 = sumuplon
        lat2 = df_merra2locs.Latitude[i]
        lon2 = df_merra2locs.Longitude[i]
        d = haversine(lat1,lon1,lat2,lon2)
        distance.append(d)
    p = np.where(distance == min(distance))
    return df_merra2locs.loc[p]

metadata = np.zeros((len(df_suMeta),7))
for i in range(len(df_suMeta)):
    metadata[i,0] = df_suMeta.suID[i]
    metadata[i,1] = np.array(df_suMeta.suTimestamp)[i]
    metadata[i,2] = df_suMeta.suLatitude[i]
    metadata[i,3] = df_suMeta.suLongitude[i]
    merra2 = closest_location(df_suMeta.suLatitude[i],df_suMeta.suLongitude[i])
    metadata[i,4] = merra2.VIR.values[0]
    metadata[i,5] = merra2.Latitude.values[0]
    metadata[i,6] = merra2.Longitude.values[0]

# Create dataframe of metadata and turn float date back into timestamp
df_meta = pd.DataFrame(metadata,columns=['suID','suTimestamp','suLatitude','suLongitude',
                                         'spID','spLatitude','spLongitude'])

df_meta['suTimestamp'] = pd.to_datetime(df_meta.suTimestamp)

# Ignore anny missing files
# if icesheet == 'GrIS':
#     df_meta = df_meta[(df_meta.spID!=107)]
# if icesheet == 'AIS':
#     df_meta = df_meta[(df_meta.spID!=1378)&(df_meta.spID!=1382)&(df_meta.spID!=1395)&(df_meta.spID!=1450)&(df_meta.spID!=1463)&(df_meta.spID!=1567)&(df_meta.spID!=1568)]
df_meta.reset_index(drop=True,inplace=True)
df_meta

Unnamed: 0,suID,suTimestamp,suLatitude,suLongitude,spID,spLatitude,spLongitude
0,0.0,2011-12-28,-79.446800,-117.963501,1405.0,-79.5,-118.125
1,1.0,2011-12-12,-79.347900,-116.290497,1406.0,-79.5,-116.250
2,2.0,2011-12-24,-78.836899,-116.307098,1417.0,-79.0,-116.250
3,3.0,2011-12-14,-78.727997,-114.732201,1432.0,-78.5,-115.000
4,4.0,2011-12-20,-78.424301,-115.292000,1432.0,-78.5,-115.000
...,...,...,...,...,...,...,...
889,1533.0,2009-02-04,-70.888000,133.285004,1453.0,-78.0,153.125
890,1534.0,2009-02-06,-69.827003,134.201996,1453.0,-78.0,153.125
891,1535.0,2009-02-08,-68.014999,136.464005,1453.0,-78.0,153.125
892,1536.0,2009-02-08,-67.415001,138.602005,1453.0,-78.0,153.125


In [32]:
# Some files don't exist (model errors), so check to see which ones don't and exclude them in the dataframe

# Empty list for non-existent files
nonfiles = []

# Loop through all SNOWPACK output
for i in range(len(df_merra2locs)):
    
    vir = df_merra2locs.VIR.values[i]
    
    if icesheet == 'GrIS':
        file = path+'VIR'+vir+'_GrIS_SUMup.pro'
        
    if icesheet == 'AIS':
        file = path+'VIR'+vir+'_AIS_SUMup.pro'    
    
    # If the file does not exist, add to lsit
    if exists(file) == False:
        nonfiles.append(int(vir))

In [33]:
nonfiles

[1378, 1382, 1395, 1450, 1463, 1567, 1568]

## 3. Read in SNOWPACK data and create raw dictionaries

In [8]:
dict_list = []

for i in range(len(df_meta)):
    
    meta = df_meta.loc[i]
        
    if icesheet == 'GrIS':

        file = path+'VIR'+str(int(meta.spID))+'_GrIS_SUMup.pro'
        smet = path+'VIR'+str(int(meta.spID))+'_GrIS_SUMup.smet'
        
    if icesheet == 'AIS':
        file = path+'VIR'+str(int(meta.spID))+'_AIS_SUMup.pro'
        smet = path+'VIR'+str(int(meta.spID))+'_AIS_SUMup.smet'
        
    
### SNOWPACK pro file ###
    
    # Open *.pro file and read in header (44 lines in length)
    f = open(file,'r')
    for j in range(44):
        header = f.readline()
        if j == 1:
            VIR = int(header[29:-1])
        if j == 2:
            spLatitude = float(header[10:-1])
        if j == 3:
            spLongitude = float(header[11:-1])
        if j == 4:
            spElevation = float(header[9:-1])
    
    spTimestamps = [] # Empty list for storing SNOWPACK timestamps

    # Read data line by line
    data = f.readlines()
    for line in data:
        linecode = line[0:4] # SNOWPACK gives each data type a 4-digit code

        # Extract timestamps and save in a list
        if linecode == '0500':
            spRawDate = line[5:24]
            spDate = datetime.strptime(spRawDate,'%d.%m.%Y %H:%M:%S')
            spTimestamp = pd.to_datetime(spDate)
            spTimestamps.append(spTimestamp)

    # Find SNOWPACK timestamp that's closest to the desired SUMup one
    closest = min(spTimestamps, key=lambda sub: abs(sub - meta.suTimestamp))
    k = np.where(np.array(spTimestamps)==closest)[0][0]

    # Read data and extract lines corresponding to closest timestamp
    for line in data:
        linecode = line[0:4] # SNOWPACK gives each data type a 4-digit code

        if linecode == '0500':
            spRawDate = line[5:24]
            spDate = datetime.strptime(spRawDate,'%d.%m.%Y %H:%M:%S')
            spTimestamp = pd.to_datetime(spDate)

            if spTimestamp == closest:

                index = k*27 # Each timestamp has 27 elements, so this allows us to get to the start of each new timestamp

                # Extract variables of interest by spliting the lines and creating lists of the data
                height = list(map(float,data[index+1][5:-1].split(',')))[1:] # Height (cm) (converted to m in dataframe)
                h = np.array(height) # Create array of height for conversion to depth 
                depth = (h-h[-1])*-1 # Depth sets surface as 0
                density = list(map(float,data[index+2][5:-1].split(',')))[1:] # Density (kg/m^3)
                temperature = list(map(float,data[index+3][5:-1].split(',')))[1:] # Temperature (dec C)
                water = list(map(float,data[index+6][5:-1].split(',')))[1:] # Water content (%)
                ice = list(map(float,data[index+14][5:-1].split(',')))[1:] # Ice content (%)
                air = list(map(float,data[index+15][5:-1].split(',')))[1:] # Air content (%)
                t = spTimestamp
                
    spDict = {'ID':meta.spID,'Timestamp':t,'Elevation':spElevation,'Latitude':spLatitude,'Longitude':spLongitude,'Height':np.array(height)/100,
              'Depth':depth/100,'Density':np.array(density),'Temperature':np.array(temperature),
              'Ice':np.array(ice)/100,'Air':np.array(air)/100,'Water':np.array(water)/100}
    
### SNOWPACK smet file ###
    
    # Get smet info from header
    df_smetinfo = pd.read_table(smet,skiprows=9,nrows=8,delim_whitespace=True,header=None)
    df_smetinfo.reset_index(drop=True,inplace=True)
    df_smetinfo.columns = np.array(df_smetinfo.iloc[5])
    df_smetinfo = df_smetinfo.drop([2,3,5,6])
    df_smetinfo = df_smetinfo.drop(columns=['='])
    df_smetinfo.set_index('plot_description',inplace=True)
    
    # Read in smet file and create arrays of relevant data
    df_smet = pd.read_table(smet,skiprows=18,delim_whitespace=True,names=np.array(df_smetinfo.iloc[3]))
    smetTimestamp = np.array(pd.to_datetime(df_smet.timestamp))
    smetSnow = np.array(df_smet.MS_Snow)
    smetWind = np.array(df_smet.MS_Wind)
    smetRain = np.array(df_smet.MS_Rain)
    smetTemp = np.array(df_smet.TA)
    smetMelt = np.array(df_smet.MS_melt)
    smetEvap = np.array(df_smet.MS_Evap)
    smetSubl = np.array(df_smet.MS_Sublimation)
    smetRunoff = np.array(df_smet.MS_SN_Runoff)
    smetSWE = np.array(df_smet.SWE)
    
    # Create smet dictionary
    smetDict = {'ID':meta.spID,'Latitude':meta.spLatitude,'Longitude':meta.spLongitude,'Timestamp':smetTimestamp,'Temperature':smetTemp,'Snow':smetSnow,'Rain':smetRain,
                'Melt':smetMelt,'Wind':smetWind,'Evaporation':smetEvap,'Sublimation':smetSubl,'Runoff':smetRunoff,'SWE':smetSWE}
    
### SUMup ###
    
    # Extract SUMup dictionary for corresponding SNOWPACK data
    suDictIndex = df_suMeta[df_suMeta.suID==meta.suID].index[0]
    suDict = sumup[suDictIndex]
    
    # Create dictionary and append list
    dictionaries = {'MERRA-2':smetDict,'SNOWPACK':spDict,'SUMup':suDict}
    dict_list.append(dictionaries)
    
    f.close()

In [9]:
pickle.dump(dict_list, open(icesheet+'_data.p','wb'))

## 4. Create interpolated dictionaries

In [10]:
data = pickle.load(open(icesheet+'_data.p','rb'))

In [11]:
# List of dictionaries
dict_list = []

# Regrid observations and model output on logarithmic vertical scale
for i in range(len(data)):

    # Select SUMup and SNOWPACK keys
    su = data[i]['SUMup']
    sp = data[i]['SNOWPACK']
    
    # Define relevant variables
    sp_depth = sp['Depth']
    sp_density = sp['Density']
    sp_air = sp['Air']
    sp_water = sp['Water']
    sp_ice = sp['Ice']
    su_density = su['Density']
    su_depth = su['Midpoint']
    
    # Many SUMup measurements are just single point, so just regrid datasets with n > 1
    if len(su_density) > 1:
        
        # Interpolate modeled density onto observations
        su_densityInt = su_density
        sp_densityInt = griddata(sp_depth,sp_density,su_depth)
        
        # Filter out nans
        nanfilter = (~np.isnan(su_densityInt)) & (~np.isnan(sp_densityInt))
        su_densityFilt = su_densityInt[nanfilter]
        sp_densityFilt = sp_densityInt[nanfilter]
        
        dictionaries = {'SNOWPACK':{'ID':sp['ID'],'Timestamp':sp['Timestamp'],
                                    'Latitude':sp['Latitude'],'Longitude':sp['Longitude'],
                                    'Elevation':sp['Elevation'],'Depth':su_depth,'Density':sp_densityFilt},
                        'SUMup':{'CoreID':su['CoreID'],'Citation':su['Citation'],'Timestamp':su['Timestamp'],
                                 'Latitude':su['Latitude'],'Longitude':su['Longitude'],
                                 'Elevation':su['Elevation'],'Depth':su_depth,'Density':su_densityFilt}}
    
    else:
        dictionaries = {'SNOWPACK':{'ID':sp['ID'],'Timestamp':sp['Timestamp'],
                            'Latitude':sp['Latitude'],'Longitude':sp['Longitude'],
                            'Elevation':sp['Elevation'],'Depth':sp_depth,'Density':sp_density},
                        'SUMup':{'CoreID':su['CoreID'],'Citation':su['Citation'],'Timestamp':su['Timestamp'],
                                 'Latitude':su['Latitude'],'Longitude':su['Longitude'],
                                 'Elevation':su['Elevation'],'Depth':su_depth,'Density':su_density}}

    dict_list.append(dictionaries)

In [12]:
pickle.dump(dict_list, open(icesheet+'_data_interpolated.p','wb'))