# Create dictionaries for SUMup and SNOWPACK

#### Author: Megan Thompson-Munson
#### Date created: 20 September 2021

This script reads in SUMup observation data and SNOWPACK output, reformats the data, and creates dictionaries that are saved as pickle files.

In [1]:
# BEGIN

# Select ice sheet
icesheet = 'GrIS'

# Give path of SNOWPACK and CFM output data
pathSP = '/projects/metm9666/snowpack/Scripts/Spinup/'
pathCFM = '/scratch/summit/metm9666/CFMresults/'

# END

In [2]:
import numpy as np
import pandas as pd
import xarray as xr
import datetime
import pickle
from scipy.interpolate import griddata
from os.path import exists
import h5py as h5

## 1. Read in SUMup data

In [3]:
sumup = pickle.load(open(icesheet+'_SUMup.p','rb'))

In [4]:
# SUMup metadata for finding closest MERRA-2 point
suMeta = np.empty((len(sumup),4))

# Loop through each core
for i in range(len(sumup)):
    
    suData = sumup[i]

    # Extract metadata 
    suMeta[i,0] = suData['CoreID']
    suMeta[i,1] = suData['Timestamp']
    suMeta[i,2] = suData['Latitude']
    suMeta[i,3] = suData['Longitude']

df_suMeta = pd.DataFrame(suMeta,columns=['SU_ID','SU_Timestamp','SU_Latitude','SU_Longitude'])
df_suMeta['SU_Timestamp'] = pd.to_datetime(df_suMeta.SU_Timestamp)
df_suMeta

Unnamed: 0,SU_ID,SU_Timestamp,SU_Latitude,SU_Longitude
0,6.0,2011-05-12,73.647102,-38.677200
1,7.0,2011-05-24,74.507965,-41.339031
2,8.0,2011-05-12,75.129303,-40.541832
3,9.0,2010-05-20,75.408035,-41.068619
4,10.0,2010-05-20,75.949409,-42.160610
...,...,...,...,...
677,1571.0,2008-06-08,72.549934,-38.309067
678,1572.0,2008-06-16,72.579552,-38.505466
679,1573.0,2008-06-20,72.549934,-38.309067
680,1574.0,2013-06-04,72.635132,-38.514919


## 2. Collect SUMup and MERRA-2 metadata

In [5]:
# Read in MERRA-2 location data from station lists
if icesheet == 'AIS':
    df_M2 = pd.read_table('AIS_station_list.lst',
                          skiprows=1,delim_whitespace=True,usecols=[0,3,4],
                          names=['Station','Latitude','Longitude'])
if icesheet == 'GrIS':
    df_M2 = pd.read_table('GrIS_station_list.lst',
                          skiprows=1,delim_whitespace=True,usecols=[0,3,4],
                          names=['Station','Latitude','Longitude'])

# Extract VIRs (SNOWPACK IDs)
VIRs = []
for i in range(len(df_M2)):
    VIR = df_M2.Station[i][-3:]
    VIRs.append(VIR)
df_M2['VIR'] = VIRs
df_M2.drop(columns=['Station'])

# Haversine formula for calculating distance between two points on Earth
def haversine(lat1,lon1,lat2,lon2):
    phi1 = np.deg2rad(lat1)
    phi2 = np.deg2rad(lat2)
    theta1 = np.deg2rad(lon1)
    theta2 = np.deg2rad(lon2)
    del_phi = phi2-phi1
    del_theta = theta2-theta1
    a = np.sin(del_phi/2)**2+(np.cos(phi1)*np.cos(phi2)*np.sin(del_theta/2)**2)
    c = 2*np.arctan2(np.sqrt(a),np.sqrt(1-a))
    d = (6371e3)*c # Earth's radius in meters
    return d # Meters

# Function for finding closest MERRA-2 location to given SUMup location
def closest_location(sumuplat,sumuplon):
    distance = []
    for i in range(len(df_M2)):
        lat1 = sumuplat
        lon1 = sumuplon
        lat2 = df_M2.Latitude[i]
        lon2 = df_M2.Longitude[i]
        d = haversine(lat1,lon1,lat2,lon2)
        distance.append(d)
    p = np.where(distance == min(distance))
    return df_M2.loc[p]

# Save metadata in 2-D array
metadata = np.zeros((len(df_suMeta),7))
for i in range(len(df_suMeta)):
    metadata[i,0] = df_suMeta.SU_ID[i]
    metadata[i,1] = np.array(df_suMeta.SU_Timestamp)[i]
    metadata[i,2] = df_suMeta.SU_Latitude[i]
    metadata[i,3] = df_suMeta.SU_Longitude[i]
    merra2 = closest_location(df_suMeta.SU_Latitude[i],df_suMeta.SU_Longitude[i])
    metadata[i,4] = merra2.VIR.values[0]
    metadata[i,5] = merra2.Latitude.values[0]
    metadata[i,6] = merra2.Longitude.values[0]

# Create dataframe of SUMup and MERRA-2 metadata
df_meta = pd.DataFrame(metadata,columns=['SU_ID','SU_Timestamp','SU_Latitude','SU_Longitude',
                                         'VIR','M2_Latitude','M2_Longitude'])

# Convert float back to timestamp and reset index
df_meta['SU_Timestamp'] = pd.to_datetime(df_meta.SU_Timestamp)
df_meta.reset_index(drop=True,inplace=True)
df_meta

Unnamed: 0,SU_ID,SU_Timestamp,SU_Latitude,SU_Longitude,VIR,M2_Latitude,M2_Longitude
0,6.0,2011-05-12,73.647102,-38.677200,347.0,73.5,-38.750
1,7.0,2011-05-24,74.507965,-41.339031,351.0,74.5,-41.250
2,8.0,2011-05-12,75.129303,-40.541832,355.0,75.0,-40.625
3,9.0,2010-05-20,75.408035,-41.068619,359.0,75.5,-41.250
4,10.0,2010-05-20,75.949409,-42.160610,365.0,76.0,-41.875
...,...,...,...,...,...,...,...
677,1571.0,2008-06-08,72.549934,-38.309067,339.0,72.5,-38.125
678,1572.0,2008-06-16,72.579552,-38.505466,338.0,72.5,-38.750
679,1573.0,2008-06-20,72.549934,-38.309067,339.0,72.5,-38.125
680,1574.0,2013-06-04,72.635132,-38.514919,338.0,72.5,-38.750


In [6]:
# Some SNOWPACK files don't exist (model errors), so check to see which ones don't and exclude them in the dataframe

# Empty list for non-existent files
nonfiles = []

# Loop through all SNOWPACK output
for i in range(len(df_M2)):
    
    # Extract the VIR
    vir = df_M2.VIR.values[i]

    # File paths
    if icesheet == 'AIS':
        file = pathSP+'AIS_SUMup_output/VIR'+vir+'_AIS_SUMup.pro'    
    if icesheet == 'GrIS':
        file = pathSP+'GrIS_SUMup_output/VIR'+vir+'_GrIS_SUMup.pro'    
        
    # If the file does not exist, add to lsit
    if exists(file) == False:
        nonfiles.append(int(vir))

# Ignore any missing files
df_meta = df_meta[~df_meta['VIR'].isin(nonfiles)]
df_meta.reset_index(drop=True,inplace=True)
df_meta

Unnamed: 0,SU_ID,SU_Timestamp,SU_Latitude,SU_Longitude,VIR,M2_Latitude,M2_Longitude
0,6.0,2011-05-12,73.647102,-38.677200,347.0,73.5,-38.750
1,7.0,2011-05-24,74.507965,-41.339031,351.0,74.5,-41.250
2,8.0,2011-05-12,75.129303,-40.541832,355.0,75.0,-40.625
3,9.0,2010-05-20,75.408035,-41.068619,359.0,75.5,-41.250
4,10.0,2010-05-20,75.949409,-42.160610,365.0,76.0,-41.875
...,...,...,...,...,...,...,...
602,1571.0,2008-06-08,72.549934,-38.309067,339.0,72.5,-38.125
603,1572.0,2008-06-16,72.579552,-38.505466,338.0,72.5,-38.750
604,1573.0,2008-06-20,72.549934,-38.309067,339.0,72.5,-38.125
605,1574.0,2013-06-04,72.635132,-38.514919,338.0,72.5,-38.750


## 3. Create dictionaries with SNOWPACK, CFM, and SUMup

In [None]:
# Function to convert CFM dates to timestamps (e.g., 1980.0027 --> 1980-1-1)
def decimal2datetime(decimalYear):
    ts = []
    for i in range(len(decimalYear)):
        date = decimalYear[i]
        year = int(date)
        decimal = date - year
        days = decimal*365.25
        timestamp = pd.to_datetime(datetime.datetime(year,1,1)+datetime.timedelta(days))
        ts.append(timestamp)
    return ts

# List for storing dictionaries
dict_list = []

# Loop through each SUMup core
for i in range(0,200):
    
    # Meta information about SUMup and MERRA-2
    meta = df_meta.loc[i]

    # Get VIR and convert to appropriate string for file path
    vir = str(int(meta.VIR))
    if len(vir) == 1:
        strvir = '00'+vir
    if len(vir) == 2:
        strvir = '0'+vir
    if len(vir) == 3:
        strvir = vir
    
    # SNOWPACK .pro and .smet files
    if icesheet == 'AIS':
        proFile = pathSP+'AIS_SUMup_output/VIR'+strvir+'_AIS_SUMup.pro'
        smetFile = pathSP+'AIS_SUMup_output/VIR'+strvir+'_AIS_SUMup.smet'
    if icesheet == 'GrIS':
        proFile = pathSP+'GrIS_SUMup_output/VIR'+strvir+'_GrIS_SUMup.pro'
        smetFile = pathSP+'GrIS_SUMup_output/VIR'+strvir+'_GrIS_SUMup.smet'
        
    
### SNOWPACK pro file ###
    
    # Open *.pro file and read in header (44 lines in length)
    proF = open(proFile,'r')
    for j in range(44):
        header = proF.readline()
        if j == 1:
            VIR = int(header[29:-1]) # Should be same as meta data
        if j == 2:
            SP_Latitude = float(header[10:-1]) # Should be same as meta data
        if j == 3:
            SP_Longitude = float(header[11:-1]) # Should be same as meta data
        if j == 4:
            SP_Elevation = float(header[9:-1])
    
    SP_Timestamps = [] # Empty list for storing SNOWPACK timestamps

    # Read data line by line
    SP_data = proF.readlines()
    for line in SP_data:
        linecode = line[0:4] # SNOWPACK gives each data type a 4-digit code

        # Extract timestamps and save in a list
        if linecode == '0500':
            SP_RawDate = line[5:24]
            SP_Date = datetime.datetime.strptime(SP_RawDate,'%d.%m.%Y %H:%M:%S')
            SP_Timestamp = pd.to_datetime(SP_Date)
            SP_Timestamps.append(SP_Timestamp)

    # Find SNOWPACK timestamp that's closest to the desired SUMup one
    SP_closest = min(SP_Timestamps, key=lambda sub: abs(sub - meta.SU_Timestamp))
    k = np.where(np.array(SP_Timestamps)==SP_closest)[0][0]

    # Read data and extract lines corresponding to closest timestamp
    for line in SP_data:
        linecode = line[0:4] # SNOWPACK gives each data type a 4-digit code

        if linecode == '0500':
            SP_RawDate = line[5:24]
            SP_Date = datetime.datetime.strptime(SP_RawDate,'%d.%m.%Y %H:%M:%S')
            SP_Timestamp = pd.to_datetime(SP_Date)

            if SP_Timestamp == SP_closest:

                index = k*27 # Each timestamp has 27 elements, so this allows us to get to the start of each new timestamp

                # Extract variables of interest by spliting the lines and creating lists of the data
                SP_height = list(map(float,SP_data[index+1][5:-1].split(',')))[1:] # Height (cm) (converted to m in dataframe)
                SP_h = np.array(SP_height) # Create array of height for conversion to depth 
                SP_depth = (SP_h-SP_h[-1])*-1 # Depth sets surface as 0
                SP_density = list(map(float,SP_data[index+2][5:-1].split(',')))[1:] # Density (kg/m^3)
                SP_temperature = list(map(float,SP_data[index+3][5:-1].split(',')))[1:] # Temperature (dec C)
                SP_water = list(map(float,SP_data[index+6][5:-1].split(',')))[1:] # Water content (%)
                SP_ice = list(map(float,SP_data[index+14][5:-1].split(',')))[1:] # Ice content (%)
                SP_air = list(map(float,SP_data[index+15][5:-1].split(',')))[1:] # Air content (%)
                t = SP_Timestamp # Grab the correct timestamp
                
    SP_dict = {'VIR':meta.VIR,'Timestamp':t,'Elevation':SP_Elevation,'Latitude':SP_Latitude,'Longitude':SP_Longitude,'Height':np.array(SP_height)/100,
              'Depth':SP_depth/100,'Density':np.array(SP_density),'Temperature':np.array(SP_temperature),
              'Ice':np.array(SP_ice)/100,'Air':np.array(SP_air)/100,'Water':np.array(SP_water)/100}
    
    proF.close()

### SNOWPACK smet file ###
    
    # Get smet info from header
    df_smetinfo = pd.read_table(smetFile,skiprows=9,nrows=8,delim_whitespace=True,header=None)
    df_smetinfo.reset_index(drop=True,inplace=True)
    df_smetinfo.columns = np.array(df_smetinfo.iloc[5])
    df_smetinfo = df_smetinfo.drop([2,3,5,6])
    df_smetinfo = df_smetinfo.drop(columns=['='])
    df_smetinfo.set_index('plot_description',inplace=True)
    
    # Read in smet file and create arrays of relevant data
    df_smet = pd.read_table(smetFile,skiprows=18,delim_whitespace=True,names=np.array(df_smetinfo.iloc[3]))
    smetTimestamp = np.array(pd.to_datetime(df_smet.timestamp))
    smetSnow = np.array(df_smet.MS_Snow)
    smetWind = np.array(df_smet.MS_Wind)
    smetRain = np.array(df_smet.MS_Rain)
    smetTemp = np.array(df_smet.TA)
    smetMelt = np.array(df_smet.MS_melt)
    smetEvap = np.array(df_smet.MS_Evap)
    smetSubl = np.array(df_smet.MS_Sublimation)
    smetRunoff = np.array(df_smet.MS_SN_Runoff)
    smetSWE = np.array(df_smet.SWE)
    
    # Create smet dictionary
    smet_dict = {'VIR':meta.VIR,'Latitude':meta.M2_Latitude,'Longitude':meta.M2_Longitude,'Timestamp':smetTimestamp,'Temperature':smetTemp,'Snow':smetSnow,
                'Rain':smetRain,'Melt':smetMelt,'Wind':smetWind,'Evaporation':smetEvap,'Sublimation':smetSubl,'Runoff':smetRunoff,'SWE':smetSWE}

### CFM ###
    
    # Get MERRA-2 coordinates from metadata for CFM file name
    M2_lat = meta.M2_Latitude
    M2_lon = meta.M2_Longitude

    # This longitude was manually changed to 0 for SNOWPACK runs, but need to be reverted back to e-13 for CFM
    if (M2_lat==-75.0) & (M2_lon==0.0):
        M2_lon = -5.920304394294029e-13

    # Get CFM file for corresponding coordinate and read it in
    CFM_file = pathCFM + 'IDS_baseline_{}_{}_1D_mean'.format(M2_lat,M2_lon) + '/CFMresults.hdf5'
    CFM_f = h5.File(CFM_file,'r')

    # Extract relevant info from file
    CFM_timesteps = CFM_f['density'][1:,0]
    CFM_depth = CFM_f['depth'][1:,1:]
    CFM_density = CFM_f['density'][1:,1:]
    CFM_dip = CFM_f['DIP'][1:,1:]
    CFM_f.close()

    # Convert decimal years to timestamp
    CFM_timestamps = decimal2datetime(CFM_timesteps)

    # Find closes CFM timestamp to SUMup timestamp
    CFM_closest = min(CFM_timestamps, key=lambda sub: abs(sub - meta.SU_Timestamp))
    l = np.where(np.array(CFM_timestamps)==CFM_closest)[0][0]

    CFM_dict = {'Latitude':M2_lat,'Longitude':M2_lon,'Timestamp':CFM_timestamps[l],
                'Depth':CFM_depth[l],'Density':CFM_density[l],'DIP':CFM_dip[l]}

### SUMup ###
    
    # Extract SUMup dictionary for corresponding SNOWPACK data
    SU_DictIndex = df_suMeta[df_suMeta.SU_ID==meta.SU_ID].index[0]
    SU_dict = sumup[SU_DictIndex]
    
    # Create dictionary and append list
    dictionaries = {'MERRA-2':smet_dict, 'SNOWPACK':SP_dict,
                    'CFM':CFM_dict, 'SUMup':SU_dict}

    dict_list.append(dictionaries)

In [None]:
pickle.dump(dict_list, open(icesheet+'_data.p','wb'))

## 4. Create interpolated dictionaries

In [None]:
data = pickle.load(open(icesheet+'_data.p','rb'))

In [None]:
# List of dictionaries
dict_list = []

# Regrid models on to observations' vertical scales
for i in range(len(data)):

    # Select SUMup, CFM, and SNOWPACK keys
    SU = data[i]['SUMup']
    SP = data[i]['SNOWPACK']
    CFM = data[i]['CFM']
    
    # Get relevant variables from dictionaries
    SU_density = SU['Density']
    SU_depth = SU['Midpoint']
    SP_depth = SP['Depth']
    SP_density = SP['Density']
    SP_air = SP['Air']
    SP_water = SP['Water']
    SP_ice = SP['Ice']
    CFM_depth = CFM['Depth']
    CFM_density = CFM['Density']
    CFM_dip = CFM['DIP']

    
    # Many SUMup measurements are just single point, so just regrid datasets with n > 1
    if len(SU_density) > 1:
        
        # Interpolate modeled density onto observations
        SU_densityInt = SU_density
        SP_densityInt = griddata(SP_depth,SP_density,SU_depth)
        CFM_densityInt = griddata(CFM_depth,CFM_density,SU_depth)
        
        # Filter out nans
        nanfilter = (~np.isnan(SU_densityInt)) & (~np.isnan(SP_densityInt)) & (~np.isnan(CFM_densityInt))
        SU_densityFilt = SU_densityInt[nanfilter]
        SP_densityFilt = SP_densityInt[nanfilter]
        CFM_densityFilt = CFM_densityInt[nanfilter]
        
        dictionaries = {'SNOWPACK':{'VIR':SP['VIR'],'Timestamp':SP['Timestamp'],
                                    'Latitude':SP['Latitude'],'Longitude':SP['Longitude'],
                                    'Elevation':SP['Elevation'],'Depth':SU_depth,'Density':SP_densityFilt},
                        'CFM':{'Timestamp':CFM['Timestamp'],'Latitude':CFM['Latitude'],'Longitude':CFM['Longitude'],
                                    'Elevation':SP['Elevation'],'Depth':SU_depth,'Density':CFM_densityFilt},
                        'SUMup':{'CoreID':SU['CoreID'],'Citation':SU['Citation'],'Timestamp':SU['Timestamp'],
                                 'Latitude':SU['Latitude'],'Longitude':SU['Longitude'],
                                 'Elevation':SU['Elevation'],'Depth':SU_depth,'Density':SU_densityFilt}}
    
    else:
        dictionaries = {'SNOWPACK':{'VIR':SP['VIR'],'Timestamp':SP['Timestamp'],
                            'Latitude':SP['Latitude'],'Longitude':SP['Longitude'],
                            'Elevation':SP['Elevation'],'Depth':SP_depth,'Density':SP_density},
                        'CFM':{'Timestamp':CFM['Timestamp'],'Latitude':CFM['Latitude'],'Longitude':CFM['Longitude'],
                                    'Elevation':SP['Elevation'],'Depth':CFM_depth,'Density':CFM_density},
                        'SUMup':{'CoreID':SU['CoreID'],'Citation':SU['Citation'],'Timestamp':SU['Timestamp'],
                                 'Latitude':SU['Latitude'],'Longitude':SU['Longitude'],
                                 'Elevation':SU['Elevation'],'Depth':SU_depth,'Density':SU_density}}

    dict_list.append(dictionaries)

In [None]:
pickle.dump(dict_list, open(icesheet+'_data_interpolated.p','wb'))