# Convert .smet files to .nc files

#### Created by Megan Thompson-Munson (2023)


**Input:** `/scratch/alpine/metm9666/project-2_output-4/*DEC*.smet`

**Output:** `/scratch/alpine/metm9666/project-2_processed-output-4/smet-*.nc`

In [11]:
import xarray as xr
import pandas as pd
import numpy as np
import glob
import datetime
import netCDF4 as nc

In [12]:
# files = sorted(glob.glob('/pl/active/metm-greenland/project-2_output/*DEC*.smet'))
files = sorted(glob.glob('/pl/active/metm-greenland/project-2_output/*INC*.smet'))

In [13]:
# .smet file path for any of the files
fileVariables = files[0]

# Get variables, variable codes, and units from .smet header
dfVariables = pd.read_table(fileVariables, skiprows=9, nrows=8, delim_whitespace=True, header=None)
dfVariables.columns = np.array(dfVariables.iloc[5])
dfVariables = dfVariables.drop([2,3,5,6])
dfVariables = dfVariables.drop(columns=['='])
dfVariables.set_index('plot_description', inplace=True)

# Print variables and their abbreviations (note that temperature is actually degrees C)
for i in range(len(dfVariables.columns)):
    variable = np.array(dfVariables.columns)[i]
    abbreviation = np.array(dfVariables.loc['fields'].values)[i]
    unit = np.array(dfVariables.loc['plot_unit'].values)[i]
#     print(variable+': '+abbreviation+' ('+unit+')')

timestamp: timestamp (-)
sensible_heat: Qs (W/m2)
latent_heat: Ql (W/m2)
ground_heat: Qg (W/m2)
ground_temperature: TSG (K)
ground_heat_at_soil_interface: Qg0 (W/m2)
rain_energy: Qr (W/m2)
melt_freeze_energy: Qmf (kJ/m2)
cold_content_snow: CCsnow (kJ/m2)
outgoing_long_wave_radiation: OLWR (W/m2)
incoming_long_wave_radiation: ILWR (W/m2)
net_long_wave_radiation: LWR_net (W/m2)
reflected_short_wave_radiation: OSWR (W/m2)
incoming_short_wave_radiation: ISWR (W/m2)
net_short_wave_radiation: Qw (W/m2)
parametrized_albedo: pAlbedo (-)
measured_albedo: mAlbedo (-)
incoming_short_wave_on_horizontal: ISWR_h (W/m2)
direct_incoming_short_wave: ISWR_dir (W/m2)
diffuse_incoming_short_wave: ISWR_diff (W/m2)
air_temperature: TA (K)
surface_temperature(mod): TSS_mod (K)
surface_temperature(meas): TSS_meas (K)
bottom_temperature: T_bottom (K)
relative_humidity: RH (-)
wind_velocity: VW (m/s)
wind_velocity_drift: VW_drift (m/s)
wind_direction: DW (°)
solid_precipitation_rate: MS_Snow (kg/m2/h)
snow_heig

In [14]:
dateList = []
latList = []
lonList = []
temperatureList = []
snowList = []
rainList = []
evapList = []
sublList = []
runoffList = []
refreezeList = []
meltList = []
smbList = []
windList = []
iswrList = []
ilwrList = []
rhList = []

# Read in SNOWPACK .smet files
for i in range(len(files)):

    # Get .smet file
    file = files[i]

    # Read data line by line
    with open(file) as f:

        # Read in header information
        for h in range(6):
            header = f.readline()
            if h == 4:
                latitude = float(header[19:-1])
                latList.append(latitude)
            if h == 5:
                longitude = float(header[19:-1])
                lonList.append(longitude)

    # Read in .smet file and get column names from dfVariables
    dfSmet = pd.read_table(file, skiprows=18, delim_whitespace=True, names=np.array(dfVariables.iloc[3]))

    # Turn timestamp into pandas datetime and get relevant columns from dataframe
    dfSmet['Timestamp'] = pd.to_datetime(dfSmet.timestamp)
    ts = [datetime.datetime.strptime(t,'%Y-%m-%dT%H:%M:%S') for t in dfSmet.timestamp.values]
    dfSmet = dfSmet[['Timestamp','MS_Snow','MS_Wind','MS_Rain','MS_Evap','MS_Sublimation',
                     'MS_SN_Runoff','MS_freeze','MS_melt','TA','ISWR','ILWR','RH']]

    # Some variables (e.g., MS_Snow) need to be converted from rates so get time change in hours
    dfSmet['dt'] = (dfSmet.Timestamp-dfSmet.Timestamp.shift()).dt.total_seconds()/3600

    # Calculate snowfall and rainfall as kg/m^2 rather than kg/m^2/hr. Need to subtract wind from snow.
    dfSmet['Snowfall'] = (dfSmet.MS_Snow-dfSmet.MS_Wind)*dfSmet.dt
    dfSmet['Rainfall'] = dfSmet.MS_Rain*dfSmet.dt

    # Calculate SMB
    dfSmet['SMB'] = dfSmet.Snowfall + dfSmet.Rainfall + dfSmet.MS_Evap + dfSmet.MS_Sublimation - dfSmet.MS_SN_Runoff

    # Append lists
    dateList.append(ts)
    temperatureList.append(dfSmet['TA'].values)
    snowList.append(dfSmet['Snowfall'].values)
    rainList.append(dfSmet['Rainfall'].values)
    evapList.append(dfSmet['MS_Evap'].values)
    sublList.append(dfSmet['MS_Sublimation'].values)
    runoffList.append(dfSmet['MS_SN_Runoff'].values)
    refreezeList.append(dfSmet['MS_freeze'].values)
    meltList.append(dfSmet['MS_melt'].values)
    smbList.append(dfSmet['SMB'].values)
    windList.append(dfSmet['MS_Wind'].values)
    iswrList.append(dfSmet['ISWR'].values)
    ilwrList.append(dfSmet['ILWR'].values)
    rhList.append(dfSmet['RH'].values)

In [15]:
df = pd.DataFrame(data={'Latitude':latList,'Longitude':lonList,
                        'Temperature':temperatureList,'Snow':snowList,'Rain':rainList,'Evaporation':evapList,
                        'Sublimation':sublList,'Runoff':runoffList,'Refreeze':refreezeList,'Melt':meltList,
                        'SMB':smbList,'Wind':windList,'ISWR':iswrList,'ILWR':ilwrList,'RH':rhList})

In [16]:
# Loop through dataframe and replace short arrays with NaNs
for i in range(2,len(df.columns)):
    for j in range(len(df.index)):
        cell = df.iloc[j,i]
        if len(cell)!=len(dateList[0]):
            df.iloc[j,i] = np.nan

In [None]:
values = ['Temperature','Snow','Rain','Evaporation','Sublimation','Runoff','Refreeze','Melt','SMB','Wind','ISWR','ILWR','RH']
units = ['deg C','kg/m^2','kg/m^2','kg/m^2','kg/m^2','kg/m^2','kg/m^2','kg/m^2','kg/m^2','m/s','W/m^2','W/m^2','%']

for i in range(len(values)):
    dfPivot = df.pivot(index='Latitude',columns='Longitude',values=values[i])
    das = []
    for t in range(len(dateList[0])):
        dfTemp = dfPivot.applymap(lambda x: x[t] if type(x) is np.ndarray else np.nan)
        da = xr.DataArray(data=dfTemp.values,
                          dims=['lat','lon'],
                          coords=[dfTemp.index,dfTemp.columns])
        da.attrs['units'] = units[i]
        das.append(da)

    if i == 0:
        temp = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 1:
        snow = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 2:
        rain = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 3:
        evap = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 4:
        subl = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 5:
        runoff = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 6:
        refreeze = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 7:
        melt = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 8:
        smb = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 9:
        wind = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 10:
        iswr = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 11:
        ilwr = xr.concat(das, pd.Index(dateList[0],name='time'))
    if i == 12:
        rh = xr.concat(das, pd.Index(dateList[0],name='time'))

In [None]:
# Combine data arrays into single dataset
ds = xr.Dataset({'temperature':temp,'snow':snow,'rain':rain,'evaporation':evap,
                 'sublimation':subl,'runoff':runoff,'refreeze':refreeze,'melt':melt,'smb':smb,
                 'wind':wind,'iswr':iswr,'ilwr':ilwr,'rh':rh})

In [None]:
# ds.to_netcdf(path='/scratch/alpine/metm9666/project-2_processed-output-4/smet-DEC-all-vars.nc',
#              mode='w',format='NETCDF4')
ds.to_netcdf(path='/scratch/alpine/metm9666/project-2_processed-output-4/smet-INC-all-vars.nc',
             mode='w',format='NETCDF4')