In [8]:
# written with e3sm_unified
from scipy.io import loadmat
import numpy as np
import xarray
import pandas as pd
from mpas_analysis.shared.io import write_netcdf
from datetime import date

In [11]:
# the raw data file (.mat) was downloaded from 
# https://duke.app.box.com/s/4qbi9ocpvmxdv44wnx2g5rgoyy85jmz6
# the data is made available by the OSNAP project, see https://www.o-snap.org/data-access/

input_datafile = '/lcrc/group/e3sm/ac.abarthel/scratch/osnap_transect_data/OSNAP_Gridded_TSV_201408_201805_2021.mat'
years = '201408-201805'
output_path = '/lcrc/group/e3sm/ac.abarthel/scratch/osnap_transect_data/new'

In [12]:
ncfile = generate_netcdf_fromOSNAPmat(input_datafile, output_path, period=years)
generate_climatologies_from30day(ncfile, output_path, period=years)

In [10]:
def generate_netcdf_fromOSNAPmat(input_datafile, output_path, period='201408-201805'):
    """
    Loads the .mat OSNAP data and outputs a .nc file
    input_datafile : str
        path to local .mat datafile containing 30-day mean OSNAP data (downloaded from https://www.o-snap.org/data-access/)
    output_path : str
        path in which to write the output netcdf file
    period (optional) : str
        years of the original OSNAP data from which the climatology was calculated 
        
    outfile : str
        path to netcdf file containing 30-day mean OSNAP T,S, normal velocities, named 'OSNAP_30day_{period}.{generation_date}.nc'
    """
    # Authors
    # -------
    # Alice Barthel

    # DOCUMENTATION from 'OSNAP Gridded Products 2021.doc'
    # -----------------------------------------------------
    # Gridded property and velocity fields
    # The file “OSNAP_Gridded_TSV_201408_201805_2021.mat” contains 30-day mean potential temperature and salinity fields at OSNAP for the period 31-Jul-2014 to 09-Jun-2018 (Table 1).

    # Data variable	Content
    # TIME	Start date of each 30-day period [days since 1950-01-01]
    # LONGITUDE	Longitude of the OSNAP grid [deg East]
    # LATITUDE	Latitude of the OSNAP grid [deg North]
    # DEPTH	Depth level of the OSNAP grid [m]
    # THETA	Potential temperature referenced to pressure of 0 dbar [deg C]
    # PSAL	Practical salinity
    # VELO	Velocity normal to the section [meter/second]
    # Table 1.  File contents for the gridded data product.

    # Navigating the newly loaded data structure from .mat
    # -----------------------------------------------------
    # iterating over var: raw['osnap'][0][0][-1][0][0][-1][0][ii][0][0]
    # ii = 0
    # old var name is raw['osnap'][0][0][-1][0][0][-1][0][ii][0][0]
    # long name for a var is raw['osnap'][0][0][-1][0][0][-1][0][ii][1][0]
    # units is raw['osnap'][0][0][-1][0][0][-1][0][ii][2][0]
    # -----------------------------------------------------

    # Load
    try:
        raw = loadmat(input_datafile)
    except IOError:
        print("could not read", input_datafile)

    # Generate data array 
    starttime = xarray.DataArray.from_dict({'dims': ('time',),
                                                'data': raw['osnap'][0][0]['TIME'].squeeze(),
                                                'attrs': {'long_name': 'start date of each 30-day period',
                                                          'units': 'days since 1950-01-01 00:00:00'}})

    longitude = xarray.DataArray.from_dict({'dims': ('nPoints',),
                                                'data': raw['osnap'][0][0]['LON'].squeeze(),
                                                'attrs': {'long_name': 'longitude',
                                                          'units': 'degrees'}})
    latitude = xarray.DataArray.from_dict({'dims': ('nPoints',),
                                               'data': raw['osnap'][0][0]['LAT'].squeeze(),
                                               'attrs': {'long_name': 'latitude',
                                                         'units': 'degrees'}})

    area = xarray.DataArray.from_dict({'dims': ('nPoints','nz'),
                                               'data': raw['osnap'][0][0]['AREA'],
                                               'attrs': {'long_name': 'area of each grid cell',
                                                         'units': 'm2'}})

    z = xarray.DataArray.from_dict({'dims': ('nz',),
                                        'data': raw['osnap'][0][0]['DEPTH'].squeeze(),
                                        'attrs':
                                            {'long_name': 'depth',
                                             'units': 'm'}})

    potTemp = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                       'nz', 'time'),
                                              'data': raw['osnap'][0][0]['THETA'],
                                              'attrs':
                                                  {'long_name':
                                                      'potential temperature',
                                                   'units': 'deg C'}})

    salinity = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                        'nz', 'time'),
                                               'data': raw['osnap'][0][0]['PSAL'],
                                               'attrs':
                                                   {'long_name': 'practical salinity',
                                                    'units': 'PSU'}})
    velocity = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                        'nz', 'time'),
                                               'data': raw['osnap'][0][0]['VELO'],
                                               'attrs':
                                                   {'long_name': 'velocity normal to the section (positive poleward)',
                                                    'units': 'm/s'}})

    dsTransect = xarray.Dataset({'lon': longitude,
                                     'lat': latitude,
                                     'area': area ,
                                     'startTime': starttime,
                                     'z': z,
                                     'potentialTemperature': potTemp,
                                     'salinity': salinity,
                                     'normalVelocity': velocity})
    
    formatted_date = date.today().strftime('%Y%m%d')
    outfile = f'{output_path}/OSNAP_30day_{period}.{formatted_date}.nc'
    write_netcdf(dsTransect, outfile)
    
    return outfile


def generate_climatologies_from30day(infile, output_path, period='201408-201805', transect_split = 81):
    """
    Preprocesses the 30day OSNAP data into seasonal JAS, JFM climatologies, and ANN climatologies
    infile : str
        full path to .nc datafile containing 30-day mean OSNAP data
    output_path : str
        full path in which to write the output netcdf file
    period (optional) : str
        years of the original OSNAP data from which the climatology was calculated       
    transect_split (optional) : int
        index along transect to split transect into East and West OSNAP (default = 81)
    
    output : 
        9 netcdfiles containing OSNAP transects (full, East, West) for each climatologies (JFM, JAS, ANN)
        naming convention is e.g. 'OSNAP_East_JFM_{period}.{generation_date}.nc'
    """
    # Authors
    # -------
    # Alice Barthel

    ### Preprocessing the 30day data into seasonal JAS, JFM climatologies

    try:
        tr = xarray.open_dataset(infile)
    except IOError:
        print("could not read", infile)

    formatted_date = date.today().strftime('%Y%m%d')
    
    z = -1.0*tr.variables['z']
    area = tr.variables['area']
    lon = tr.variables['lon']
    lat = tr.variables['lat']


    # JFM calc
    JFMtemp = calc_JFM_mean(tr, 'potentialTemperature')
    JFMsal = calc_JFM_mean(tr, 'salinity')
    JFMvel = calc_JFM_mean(tr, 'normalVelocity')

    # JFM data structure
    potTemp = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                       'nz'),
                                              'data': JFMtemp,
                                              'attrs':
                                                  {'long_name':
                                                      'potential temperature',
                                                   'units': 'deg C'}})

    salinity = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                        'nz'),
                                               'data': JFMsal,
                                               'attrs':
                                                   {'long_name': 'practical salinity',
                                                    'units': 'PSU'}})
    velocity = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                        'nz'),
                                               'data': JFMvel ,
                                               'attrs':
                                                   {'long_name': 'velocity normal to the section (positive poleward)',
                                                    'units': 'm/s'}})


    dsTransect = xarray.Dataset({'lon': lon,
                                     'lat': lat,
                                     'area': area ,
                                     'z': z,
                                     'potentialTemperature': potTemp,
                                     'salinity': salinity,
                                     'normalVelocity': velocity})
    write_netcdf(dsTransect, f'{output_path}/OSNAP_JFM_{period}.{formatted_date}.nc')


    dsTransectW = xarray.Dataset({'lon': lon[:transect_split],
                                     'lat': lat[:transect_split],
                                     'area': area[:transect_split,:] ,
                                     'z': z,
                                     'potentialTemperature': potTemp[:transect_split,:],
                                     'salinity': salinity[:transect_split,:],
                                     'normalVelocity': velocity[:transect_split,:]})
    write_netcdf(dsTransectW, f'{output_path}/OSNAP_West_JFM_{period}.{formatted_date}.nc')


    dsTransectE = xarray.Dataset({'lon': lon[transect_split:],
                                     'lat': lat[transect_split:],
                                     'area': area[transect_split:,:] ,
                                     'z': z,
                                     'potentialTemperature': potTemp[transect_split:,:],
                                     'salinity': salinity[transect_split:,:],
                                     'normalVelocity': velocity[transect_split:,:]})
    write_netcdf(dsTransectE, f'{output_path}/OSNAP_East_JFM_{period}.{formatted_date}.nc')



    # JAS calculation 
    JAStemp = calc_JAS_mean(tr, 'potentialTemperature')
    JASsal = calc_JAS_mean(tr, 'salinity')
    JASvel = calc_JAS_mean(tr, 'normalVelocity')

    # JAS data structure 
    potTemp = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                       'nz'),
                                              'data': JAStemp,
                                              'attrs':
                                                  {'long_name':
                                                      'potential temperature',
                                                   'units': 'deg C'}})

    salinity = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                        'nz'),
                                               'data': JASsal,
                                               'attrs':
                                                   {'long_name': 'practical salinity',
                                                    'units': 'PSU'}})
    velocity = xarray.DataArray.from_dict({'dims': ('nPoints',
                                                        'nz'),
                                               'data': JASvel ,
                                               'attrs':
                                                   {'long_name': 'velocity normal to the section (positive poleward)',
                                                    'units': 'm/s'}})


    dsTransect = xarray.Dataset({'lon': lon,
                                     'lat': lat,
                                     'area': area ,
                                     'z': z,
                                     'potentialTemperature': potTemp,
                                     'salinity': salinity,
                                     'normalVelocity': velocity})

    write_netcdf(dsTransect, f'{output_path}/OSNAP_JAS_{period}.{formatted_date}.nc')

    dsTransectW = xarray.Dataset({'lon': lon[:transect_split],
                                     'lat': lat[:transect_split],
                                     'area': area[:transect_split,:] ,
                                     'z': z,
                                     'potentialTemperature': potTemp[:transect_split,:],
                                     'salinity': salinity[:transect_split,:],
                                     'normalVelocity': velocity[:transect_split,:]})
    write_netcdf(dsTransectW, f'{output_path}/OSNAP_West_JAS_{period}.{formatted_date}.nc')


    dsTransectE = xarray.Dataset({'lon': lon[transect_split:],
                                     'lat': lat[transect_split:],
                                     'area': area[transect_split:,:] ,
                                     'z': z,
                                     'potentialTemperature': potTemp[transect_split:,:],
                                     'salinity': salinity[transect_split:,:],
                                     'normalVelocity': velocity[transect_split:,:]})
    write_netcdf(dsTransectE, f'{output_path}/OSNAP_East_JAS_{period}.{formatted_date}.nc')

    ### Preprocessing the 30day data into annual climatologies
    # Annual data structure 
    dsTransect = xarray.Dataset({'lon': lon,
                                     'lat': lat,
                                     'area': area ,
                                     'z': z,
                                     'potentialTemperature': tr.variables['potentialTemperature'].mean(axis=2),
                                     'salinity': tr.variables['salinity'].mean(axis=2),
                                     'normalVelocity': tr.variables['normalVelocity'].mean(axis=2)})

    write_netcdf(dsTransect, f'{output_path}/OSNAP_ANN_{period}.{formatted_date}.nc')

    # Split the transects into East and West
    dsTransectW = xarray.Dataset({'lon': lon[:transect_split],
                                     'lat': lat[:transect_split],
                                     'area': area[:transect_split,:],
                                     'z': z,
                                     'potentialTemperature': tr.variables['potentialTemperature'][:transect_split,:,:].mean(axis=2),
                                     'salinity': tr.variables['salinity'][:transect_split,:,:].mean(axis=2),
                                     'normalVelocity': tr.variables['normalVelocity'][:transect_split,:,:].mean(axis=2)})

    write_netcdf(dsTransectW, f'{output_path}/OSNAP_West_ANN_{period}.{formatted_date}.nc')


    dsTransectE = xarray.Dataset({'lon': lon[transect_split:],
                                     'lat': lat[transect_split:],
                                     'area': area[transect_split:,:],
                                     'z': z,
                                     'potentialTemperature': tr.variables['potentialTemperature'][transect_split:,:,:].mean(axis=2),
                                     'salinity': tr.variables['salinity'][transect_split:,:,:].mean(axis=2),
                                     'normalVelocity': tr.variables['normalVelocity'][transect_split:,:,:].mean(axis=2)})

    write_netcdf(dsTransectE, f'{output_path}/OSNAP_East_ANN_{period}.{formatted_date}.nc')

####### Functions
# JAS
def calc_JAS_mean(tr, varname):
    dates = pd.DatetimeIndex(tr.variables['startTime'][:])
    var = tr.variables[varname][:]
    mean = np.zeros([np.shape(var)[0], np.shape(var)[1]])
    counter=0
    for ii in np.arange(5,len(tr.variables['startTime'][:])): #skipping the July-Nov 2014 because not full JAS
        if (dates[ii].month==7) or (dates[ii].month==8): # a July or Aug start always counts for 30 days
            mean += 30 * var[:,:,ii]
            counter += 30

        if (dates[ii].month==6): # a June starts counts only for the number of July days (/30)
            mean += (dates[ii].day-1) * var[:,:,ii]
            counter += (dates[ii].day-1)

        if (dates[ii].month==9): # a Sept starts counts only for the remaining Sept days (/30)
            mean += (30 - dates[ii].day+1) * var[:,:,ii]
            counter += (30 - dates[ii].day+1)

    if (counter % 92 != 0 ): 
        print('number of days accounted for is: %s days when the expected number for JAS should be divisible by 92 (1 JAS season)'%(counter))
    mean = mean * 1./counter
    #print(counter)
    return mean 

# JFM
def calc_JFM_mean(tr, varname): 
    dates = pd.DatetimeIndex(tr.variables['startTime'][:])
    var = tr.variables[varname][:]
    mean = np.zeros([np.shape(var)[0], np.shape(var)[1]])
    counter=0
    for ii in np.arange(5,len(tr.variables['startTime'][:])):
        if (dates[ii].month==1) or (dates[ii].month==2): # a Jan or Feb start always counts for full time period (30 days)
            mean += 30 * var[:,:,ii]
            counter += 30

        if (dates[ii].month==12) and (dates[ii].day >= 3): # a Dec start only counts if it starts 3rd Dec or later (ie includes Jan days)
            mean += (dates[ii].day - 2) * var[:,:,ii]
            counter += (dates[ii].day - 2)

        if (dates[ii].month==3) and (dates[ii].day >= 3):# a later March start counts for however many days are left in March.
            mean += (30 - dates[ii].day+2) * var[:,:,ii]
            counter += (30 - dates[ii].day+2)

        if (dates[ii].month==3) and (dates[ii].day <= 2): # an early March start counts for 30 days
            mean += 30 * var[:,:,ii]
            counter += 30

    if (counter!= 361): 
        print('number of days accounted for is: %s days when the expected number for JFM over 4 years is 361'%(counter))
    mean = mean * 1./counter

    return mean 