# Convert FAC from output .pro files to .nc files

#### Created by Megan Thompson-Munson (2023)


**Input:** `/scratch/alpine/metm9666/project-2_output-4/*DEC*.pro`

**Output:** `/scratch/alpine/metm9666/project-2_processed-output-4/FAC-*.nc`

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import datetime
import netCDF4 as nc
import glob

In [5]:
# Print number of files
nfiles = len(glob.glob('/pl/active/metm-greenland/project-2_output/*.pro'))
print('# of files: {}'.format(nfiles))
print('# of locations: {:.0f}'.format(nfiles/2))

# of files: 3450
# of locations: 1725


In [6]:
files = sorted(glob.glob('/pl/active/metm-greenland/project-2_output/*DEC*.pro'))
# files = sorted(glob.glob('/pl/active/metm-greenland/project-2_output/*INC*.pro'))

#### FAC

In [7]:
with open(files[0]) as f:
    for h in range(39):
        header = f.readline()
        print(header)

[STATION_PARAMETERS]

StationName= 0

Latitude= 79.00000000

Longitude= -62.50000000

Altitude= 1197

SlopeAngle= 0.00

SlopeAzi= 0.00



[HEADER]

#2023-06-23T17:51:25, Snowpack POLAR version 3.0.0 run by "metm9666" (research mode)

0500,Date

0501,nElems,height [> 0: top, < 0: bottom of elem.] (cm)

0502,nElems,element density (kg m-3)

0503,nElems,element temperature (degC)

0504,nElems,element mk (1)

0505,nElems,element age (days)

0506,nElems,liquid water content by volume (%)

0508,nElems,dendricity (1)

0509,nElems,sphericity (1)

0510,nElems,coordination number (1)

0511,nElems,bond size (mm)

0512,nElems,grain size (mm)

0513,nElems,grain type (Swiss Code F1F2F3)

0514,3,grain type, grain size (mm), and density (kg m-3) of SH at surface

0515,nElems,ice volume fraction (%)

0516,nElems,air volume fraction (%)

0517,nElems,stress in (kPa)

0518,nElems,viscosity (GPa s)

0519,nElems,soil volume fraction (%)

0520,nElems,temperature gradient (K m-1)

0521,nElems,thermal conducti

In [16]:
# Create empty lists
dateList = []
latList = []
lonList = []
facList = []

# Read in SNOWPACK data
for i in range(len(files)):
    
    # Get .pro file
    pro_file = files[i]

    # Read data line by line
    with open(pro_file) as f:

        # Read in header information
        for h in range(39):
            header = f.readline()
            if h == 1:
                station = int(header[13:])
            if h == 2:
                latitude = float(header[10:-1])
                latList.append(latitude)
            if h == 3:
                longitude = float(header[11:-1])
                lonList.append(longitude)
            if h == 4:
                elevation = float(header[9:-1])

        # Create empty lists for each file,
        # which represents data from all times at a single location
        dateLoc = []
        depthLoc = []
        thicknessLoc = []
        densityLoc = []
        temperatureLoc = []
        waterLoc = []
        iceLoc = []
        airLoc = []

        # Read in data below header using line code and omitting first two 
        # items in list since they are the code and number of elements
        for line in f:
            if line.startswith('0500'): # Date
                date = datetime.datetime.strptime(line[5:24],'%d.%m.%Y %H:%M:%S')
                dateLoc.append(date)
            if line.startswith('0501'): # Height
                height = np.array(list(map(float,line.split(',')))[2:])/100
                depth = (height-height[-1])*-1
                depthLoc.append(depth)
                thicknessShort = np.array(height[1:]-height[:-1])
                thickness = np.insert(thicknessShort,0,height[0])
                thicknessLoc.append(thickness)
            if line.startswith('0516'): # Air
                air = list(map(float,line.split(',')))[2:]
                airLoc.append(np.array(air)/100)

    # Close file
    f.close()

    facLoc = []

    # Calculate firn air content
    for t in range(len(dateLoc)):
        thicknessTemp = thicknessLoc[t]
        airTemp = airLoc[t]
        facTemp = np.sum(thicknessTemp*airTemp)
        facLoc.append(facTemp)

    # Append all lists
    dateList.append(dateLoc)
    facList.append(facLoc)

In [19]:
# Create dataframe with all values
df = pd.DataFrame(data={'lat':latList,'lon':lonList,'FAC':facList})

# # Loop through to see if any are too short
# for i in range(len(df)):
#     nfac = len(df.FAC[i])
#     if nfac != 5219:
#         print(i)

68


In [22]:
# # If any are too short, replace with NaN
df_copy = df.copy()
df_copy.loc[68,'FAC'] = np.nan

In [23]:
# Create dataframe with all values
dfPivot = df_copy.pivot(index='lat',columns='lon',values='FAC')

# Create list of data arrays for each timestamp
das = []
for i in range(len(dateList[0])):
#     dfTemp = dfPivot.applymap(lambda x: x[i],na_action='ignore') # na_action not available on Summit
    dfTemp = dfPivot.applymap(lambda x: x[i] if type(x) is list else np.nan)
    da = xr.DataArray(data=dfTemp.values,
                      dims=['lat','lon'],
                      coords=[dfTemp.index,dfTemp.columns])
    da.attrs['units'] = 'm'
    das.append(da)

# Concat the data arrays and add the time dimension
daConcat = xr.concat(das, pd.Index(dateList[0],name='time'))
ds = xr.Dataset({'fac':daConcat})

In [24]:
# ds.to_netcdf(path='/scratch/alpine/metm9666/project-2_processed-output-4/FAC-DEC.nc',
#              mode='w',format='NETCDF4')
ds.to_netcdf(path='/scratch/alpine/metm9666/project-2_processed-output-4/FAC-INC.nc',
             mode='w',format='NETCDF4')

#### Firn temperature

In [5]:
# Create empty lists
dateList = []
latList = []
lonList = []
temperatureList = []

# Read in SNOWPACK data
for i in range(len(files)):
# for i in range(1):
    
    # Get .pro file
    pro_file = files[i]

    # Read data line by line
    with open(pro_file) as f:

        # Read in header information
        for h in range(39):
            header = f.readline()
            if h == 1:
                station = int(header[13:])
            if h == 2:
                latitude = float(header[10:-1])
                latList.append(latitude)
            if h == 3:
                longitude = float(header[11:-1])
                lonList.append(longitude)
            if h == 4:
                elevation = float(header[9:-1])

        # Create empty lists for each file,
        # which represents data from all times at a single location
        dateLoc = []
        depthLoc = []
        temperatureLoc = []

        # Read in data below header using line code and omitting first two 
        # items in list since they are the code and number of elements
        for line in f:
            if line.startswith('0500'): # Date
                date = datetime.datetime.strptime(line[5:24],'%d.%m.%Y %H:%M:%S')
                dateLoc.append(date)
            if line.startswith('0501'): # Height
                height = np.array(list(map(float,line.split(',')))[2:])/100
                depth = (height-height[-1])*-1
                depthLoc.append(depth)
            if line.startswith('0503'): # Temperature
                temperature = list(map(float,line.split(',')))[2:]
#                 temperatureLoc.append(temperature)
                temperatureLoc.append(np.mean(temperature))

    # Close file
    f.close()

#     tempLoc = []

#     # Get temperature at closest depth to 20 m
#     for t in range(len(dateLoc)):
#         depth20_idx = min(range(len(depthLoc[t])), key=lambda j: abs(depthLoc[t][j]-20))
#         temp20 = temperatureLoc[t][depth20_idx]
#         tempLoc.append(temp20)

    # Append all lists
    dateList.append(dateLoc)
    temperatureList.append(temperatureLoc)
#     tempList.append(tempLoc)

In [6]:
# Create dataframe with all values
df = pd.DataFrame(data={'lat':latList,'lon':lonList,'temperature':temperatureList})

# # Loop through to see if any are too short
# for i in range(len(df)):
#     nfac = len(df.FAC[i])
#     if nfac != 5219:
#         print(i)

In [7]:
# # If any are too short, replace with NaN
df_copy = df.copy()
df_copy.loc[68,'temperature'] = np.nan

In [8]:
# Create dataframe with all values
dfPivot = df_copy.pivot(index='lat',columns='lon',values='temperature')

# Create list of data arrays for each timestamp
das = []
for i in range(len(dateList[0])):
#     dfTemp = dfPivot.applymap(lambda x: x[i],na_action='ignore') # na_action not available on Summit
    dfTemp = dfPivot.applymap(lambda x: x[i] if type(x) is list else np.nan)
    da = xr.DataArray(data=dfTemp.values,
                      dims=['lat','lon'],
                      coords=[dfTemp.index,dfTemp.columns])
    da.attrs['units'] = 'deg C'
    das.append(da)

# Concat the data arrays and add the time dimension
daConcat = xr.concat(das, pd.Index(dateList[0],name='time'))
ds = xr.Dataset({'temperature':daConcat})

In [10]:
ds.to_netcdf(path='/scratch/alpine/metm9666/project-2_processed-output-4/firn-temperature-mean-DEC.nc',
             mode='w',format='NETCDF4')
# ds.to_netcdf(path='/scratch/alpine/metm9666/project-2_processed-output-4/firn-temperature-mean-INC.nc',
#              mode='w',format='NETCDF4')