# Create dictionaries for SUMup and SNOWPACK

#### Author: Megan Thompson-Munson
#### Date created: 20 September 2021

In [104]:
import os
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime
import pickle

## Read in SUMup data

In [102]:
# Open latest SUMup dataset
sumup = xr.open_dataset('sumup_density_2020_v060121.nc')

# Extract data and remove no data
su_elev = sumup['Elevation'].values
su_lat = sumup['Latitude'].values

condition = (su_elev>0) & (su_lat>-91)

su_lon = sumup['Longitude'].values[condition]
su_depth0 = sumup['Start_Depth'].values[condition]
su_depth1 = sumup['Stop_Depth'].values[condition]
su_midpoint = sumup['Midpoint'].values[condition]
su_density = sumup['Density'].values[condition]
su_citation = sumup['Citation'].values[condition]
su_date = sumup['Date'].values[condition]
su_elev = su_elev[condition]
su_lat = su_lat[condition]

# Some dates are just year (e.g., 'YYYY0000') so this will create a new column with Jan 1 of the year as the date
su_timestamp = []
for i in range(len(su_date)):
    d = su_date[i]
    date_str = str(d)
    
    # These particular dates appear to be very incorrect
    if date_str == '19999000.0':
        date_str = '19990000.0'
    if date_str == '20089620.0':
        date_str = '20080620.0'
    
    year = date_str[0:4]
    month = date_str[4:6]
    day = date_str[6:8]
    
    # Add Jan 1 to year-only dates, and change any with 32 days to 31 days
    if month == '00':
        month = '01'
    if day == '00':
        day = '01'
    if day == '32':
        day = '31'
    
    d = float(year+month+day)
    su_timestamp.append(d)

su_timestamp = np.array(su_timestamp)

# Create SUMup dataframe
su_data = {'Citation':su_citation,'Timestamp':su_timestamp,'Latitude':su_lat,'Longitude':su_lon,
              'Elevation':su_elev,'Midpoint':su_midpoint,'StartDepth':su_depth0,'StopDepth':su_depth1,
              'Thickness':su_depth1-su_depth0,'Density':su_density*1000}

df = pd.DataFrame(data=su_data)

# Turn date into timestamp
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%Y%m%d')

# Create a unique index for each core
n = -1
id0 = []
for i in range(len(su_citation)-1):
    if (su_citation[i]==su_citation[i-1] and su_lat[i]==su_lat[i-1] and su_lon[i]==su_lon[i-1]):
        index = n
    else:
        n += 1
        index = n
    id0.append(index)
id0.append(id0[-1])

# Give each datapoint within a core index its own index
m = -1
id1 = []
for i in range(len(id0)-1):
    if id0[i] == id0[i-1]:
        m += 1
    else:
        m = 0
    id1.append(m)
id1.append(id1[-1]+1)

# Set indices in dataframe
df['CoreID'] = id0
df['CoreIdx'] = id1
idx0 = pd.Series(data=id0,name='Core')
idx1 = pd.Series(data=id1,name='Index')
idx_arrays = [idx0,idx1]
df.index = idx_arrays

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Citation,Timestamp,Latitude,Longitude,Elevation,Midpoint,StartDepth,StopDepth,Thickness,Density,CoreID,CoreIdx
Core,Index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0,3.0,2011-12-28,-79.446800,-117.963501,1619.0,0.050,0.00,0.10,0.10,368.000000,0,0
0,1,3.0,2011-12-28,-79.446800,-117.963501,1619.0,0.150,0.10,0.20,0.10,381.000000,0,1
0,2,3.0,2011-12-28,-79.446800,-117.963501,1619.0,0.250,0.20,0.30,0.10,368.000000,0,2
0,3,3.0,2011-12-28,-79.446800,-117.963501,1619.0,0.350,0.30,0.40,0.10,363.000000,0,3
0,4,3.0,2011-12-28,-79.446800,-117.963501,1619.0,0.450,0.40,0.50,0.10,389.000000,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1178,5,187.0,2013-06-04,72.579781,-38.458630,3210.0,0.765,0.75,0.78,0.03,274.000000,1178,5
1178,6,187.0,2013-06-04,72.579781,-38.458630,3210.0,0.795,0.78,0.81,0.03,297.699982,1178,6
1178,7,187.0,2013-06-04,72.579781,-38.458630,3210.0,0.825,0.81,0.84,0.03,308.000000,1178,7
1178,8,187.0,2013-06-04,72.579781,-38.458630,3210.0,0.855,0.84,0.87,0.03,305.599976,1178,8


## Metadata about both datasets

In [105]:
# Extract unqiue lat/lon from SUMup and save as new dataframe
df_sumuplocs = df[df.CoreIdx==0][['CoreID','Citation','Timestamp','Latitude','Longitude']]
df_sumuplocs = df_sumuplocs.reset_index(drop=True)

# Read in MERRA-2 location data
df_merra2locs = pd.read_table('GrIS_full_station_list.lst',)

In [106]:
df_merra2locs

Unnamed: 0,[INPUTEDITING]
0,VSTATION1 = latlon 62.0 -46.875 0
1,VSTATION2 = latlon 62.0 -46.25 0
2,VSTATION3 = latlon 62.0 -45.625 0
3,VSTATION4 = latlon 62.0 -45.0 0
4,VSTATION5 = latlon 62.0 -44.375 0
...,...
1307,VSTATION1308 = latlon 82.0 -36.875 0
1308,VSTATION1309 = latlon 82.5 -34.375 0
1309,VSTATION1310 = latlon 82.5 -33.75 0
1310,VSTATION1311 = latlon 82.5 -33.125 0


## Read in SNOWPACK data

## Create dictionary with SUMup and SNOWPACK data

In [103]:
# Create empty list for dictionaries
dict_list = []

# Loop through each core
for i in range(len(np.unique(df.CoreID))):
    
    # Select one core at a time
    core = df.loc[i]
    
    coreid = core.CoreID[0]
    citation = core.Citation[0]
    timestamp = core.Timestamp[0]
    latitude = core.Latitude[0]
    longitude = core.Longitude[0]
    elevation = core.Elevation[0]
    
    midpoint = np.array(core.Midpoint)
    startdepth = np.array(core.StartDepth)
    stopdepth = np.array(core.StopDepth)
    thickness = np.array(core.Thickness)
    density = np.array(core.Density)
    
    dict_data = {'CoreID':coreid,'Citation':citation,'Timestamp':timestamp,'Latitude':latitude,'Longitude':longitude,
                'Elevation':elevation,'Midpoint':midpoint,'StartDepth':startdepth,'StopDepth':stopdepth,
                'Thickness':thickness,'Density':density}
    
    dict_list.append(dict_data)