# Mooring Data from the Rocakall Trough
EB1, WB1 & WB2

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import datetime
import xarray as xr
import pandas as pd
import os
import gsw
from pathlib import Path
import re

In [2]:
import warnings
warnings.filterwarnings("ignore")

## Load data

In [3]:
data_path =  Path('data/SAMS_moorings/') # Path('./data/')

Mooring              = rteb1_01_2014    

SerialNumber         = 11324      

WaterDepth           = 1800      

InstrDepth           = 730      

Start_Date           = 2014/07/18

Start_Time           = 12:30

End_Date             = 2015/06/20

End_Time             = 06:30

Latitude             = 57 05.960 N

Longitude            = 009 32.880 W

Columns              = YY:MM:DD:HH:T:C:P

In [4]:
def load_moor_data(data_path,moor,year):
    def __dms2dd(degrees, minutes, direction):
        dd = float(degrees) + float(minutes)/60;
        if direction == 'W' or direction == 'S':
            dd *= -1
        return dd;
    
    def __depth_func(value_chosen):

        minimum = float("inf")
        setted_list = [50, 75, 100, 250, 500, 750, 1000, 1250, 1500, 1750]
        for val in setted_list:
           if abs(val - value_chosen) < minimum:
               final_value = val
               minimum = abs(val - value_chosen)
        return final_value

    def __read_header(file):

        with open(file) as myfile:
            head = [next(myfile) for x in range(10)]
            attr_str={}
            coord_str={}
            for line in head:
                info = re.split('[ \n]+',line)
                if info[0]=='Latitude' or info[0]=='Longitude':
                    attr_str[info[0]] = __dms2dd(info[2],info[3],info[4])
                elif info[0]=='InstrDepth':
                    coord_str['depth'] = ('depth',[__depth_func(float(info[2]))])
                else:
                    attr_str[info[0]] = info[2]
            attr_str['Latitude_units'] = 'degrees_north'
            attr_str['Longitude_units'] = 'degrees_east'
        return attr_str,coord_str
    
    def __load_csv_as_xr(file):
        attr_str = __read_header(file)
        cols = ['yyyy', 'mm', 'dd', 'hh', 'temp', 'cond', 'pres']
        df = pd.read_csv(file, sep='\s+' ,parse_dates={'dates': [0,1,2,3]}, squeeze=True, 
                         header = None, skiprows = 11, names = cols, engine='python')

        for idx,dt in enumerate(df['dates']):
            date_str =dt[:4]+dt[5:7]+dt[8:10]
            df['dates'][idx]=(pd.to_datetime(date_str,format='%Y%m%d')+datetime.timedelta(hours=np.float(dt[11:])))

        ds = xr.Dataset.from_dataframe(df)
        attr_str,coord_str = __read_header(file)
        ds.coords['time']=ds.dates
        ds = ds.swap_dims({'index':'time'}).drop('index').drop_vars('dates')
        ds.attrs = attr_str
        ds = ds.expand_dims('depth')
        ds = ds.assign_coords(coord_str)
        ds['sal'] = gsw.SP_from_C(ds.cond, ds.temp, ds.pres)
        ds = ds.drop_vars('cond')
        return ds

    def __get_filelist(data_path,moor,year):
        file_list = list(sorted(
            (data_path).glob(f"{moor}_??_{year}*/microcat/{moor}_*.microcat")
            ))
        return file_list

    files = __get_filelist(data_path,moor,year)
    ds = __load_csv_as_xr(files[0])
    for file in files[1:]:
        ds = xr.concat([ds,__load_csv_as_xr(file)],dim='depth')
    
    ds.depth.attrs['units']='m'
    
    ds.temp.attrs['standard_name'] = "sea_water_temperature" ;
    ds.temp.attrs['long_name'] = "In Situ Temperature of Sea Water" ;
    ds.temp.attrs['units'] = "degree_C" ;
    
    ds.sal.attrs['standard_name'] = "sea_water_salinity" ;
    ds.sal.attrs['long_name'] = "Practical Salinity on the PSS-78 scale" ;
    ds.sal.attrs['units'] = "unitless" ;
    
    ds.pres.attrs['standard_name'] = 'sea_water_pressure'
    ds.pres.attrs['long_name'] = 'Pressure of Sea Water'
    ds.pres.attrs['units'] = 'dbar'
    
    ds.time.attrs['units'] = 'days since 1950-01-01 00:00:00'
        
    ds.attrs['Mooring'] = f'{moor}'
    del ds.attrs['SerialNumber']
    
    return ds

In [12]:
%%time
moor = 'rtwb1'
ds = load_moor_data(data_path,moor,2014)
   
for year in np.arange(2015,2018):
    ds = xr.concat([ds,load_moor_data(data_path,moor,year)],dim='time')
    


ValueError: cannot reindex or align along dimension 'depth' because the index has duplicate values

In [10]:
with xr.set_options(keep_attrs=True):
    ds1D = ds.resample(time = "1D").mean()

#ds1D = ds.resample(time = "1D").mean(keep_attrs=True);
#ds1D.time.attrs = ds.time.attrs

In [11]:
ds1D.to_netcdf(f'data/{moor}_all_merged')

In [None]:
ds.pres.plot.line(x='time')