Here I download and preprocess current data


see
- from https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/ae86e2f5-eaaf-459e-a405-e654d85adb9c
- http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/WA/WATR20/Velocity/catalog.html

In [1]:
from pathlib import Path
import xarray as xr
import pandas as pd
import numpy as np
from urllib import request
import os, shutil
from matplotlib import pyplot as plt

In [2]:
from torchvision.datasets.utils import download_url, extract_archive, download_and_extract_archive

In [None]:
import uptide

# https://en.wikipedia.org/wiki/Theory_of_tides#Harmonic_analysis
default_tidal_constituents = [
        'M2', 'S2', 'N2', 'K2', # Semi-diurnal
        'K1', 'O1', 'P1', 'Q1',  # Diurnal
        'M4', 'M6', 'S4', 'MK3', # Short period
        'MM', 'SSA', 'SA' # Long period
    ]

def generate_tidal_periods(t:pd.Series, constituents:list=default_tidal_constituents):
    tide = uptide.Tides(constituents)
    t0 = t[0]
    td = t-t0
    td = td.dt.total_seconds().to_numpy().astype(int)
    tide.set_initial_time(t0)

    # calc tides
    amplitudes=np.ones_like(td)
    phases=np.zeros_like(td)
    eta = {}
    for name, f, amplitude, omega, phase, phi, u in zip(tide.constituents, tide.f, amplitudes, tide.omega,
                                                  phases, tide.phi, tide.u):
        eta[name] = f*amplitude*np.cos(omega*td-phase+phi+u)
    df_eta = pd.DataFrame(eta, index=t)
    return df_eta

In [30]:
# 'ANMN Two Rocks, WA, 204m mooring, Jul2009 - Dec2009. Preprocessed with DepthPP.'

def get_current_timeseries(
    cache_folder=Path("../data/raw/IMOS_ANMN/"), 
    outfile=Path('../data/processed/currents/MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc')
    ):
    if not outfile.exists():

        files = [
            "IMOS_ANMN-WA_AETVZ_20090715T080000Z_WATR20_FV01_WATR20-0907-Continental-194_END-20090716T181317Z_C-20191122T052830Z.nc",
            "IMOS_ANMN-WA_AETVZ_20100409T080000Z_WATR20_FV01_WATR20-1004-Continental-194_END-20100430T084500Z_C-20191122T053845Z.nc",
            "IMOS_ANMN-WA_AETVZ_20101222T080000Z_WATR20_FV01_WATR20-1012-Continental-194_END-20110518T051500Z_C-20200916T020035Z.nc",
            "IMOS_ANMN-WA_AETVZ_20110608T080000Z_WATR20_FV01_WATR20-1106-Continental-194_END-20111122T035000Z_C-20200916T025619Z.nc",
            "IMOS_ANMN-WA_AETVZ_20111221T060300Z_WATR20_FV01_WATR20-1112-Continental-194_END-20120704T050500Z_C-20200916T043212Z.nc", 
            "IMOS_ANMN-WA_AETVZ_20120726T044000Z_WATR20_FV01_WATR20-1207-Continental-194_END-20130204T044000Z_C-20200916T032027Z.nc",

            "IMOS_ANMN-WA_AETVZ_20130221T080000Z_WATR20_FV01_WATR20-1302-Continental-194_END-20131003T035000Z_C-20180529T020609Z.nc",
            "IMOS_ANMN-WA_AETVZ_20131111T080000Z_WATR20_FV01_WATR20-1311-Continental-194_END-20140519T035000Z_C-20200114T033335Z.nc",
            "IMOS_ANMN-WA_AETVZ_20140710T080000Z_WATR20_FV01_WATR20-1407-Continental-194_END-20150121T021500Z_C-20180529T055902Z.nc",
            "IMOS_ANMN-WA_AETVZ_20150213T080000Z_WATR20_FV01_WATR20-1502-Continental-194_END-20150424T134002Z_C-20200114T035347Z.nc",
            "IMOS_ANMN-WA_AETVZ_20150914T080000Z_WATR20_FV01_WATR20-1509-Continental-194_END-20160331T043000Z_C-20180601T013623Z.nc",
            "IMOS_ANMN-WA_AETVZ_20160427T080000Z_WATR20_FV01_WATR20-1604-Continental-194_END-20160531T021800Z_C-20180531T071709Z.nc",
        #     "IMOS_ANMN-WA_AETVZ_20170512T080000Z_WATR20_FV01_WATR20-1705-Continental-194_END-20170717T014558Z_C-20190805T004647Z.nc",
            "IMOS_ANMN-WA_AETVZ_20171204T080000Z_WATR20_FV01_WATR20-1712-Continental-194_END-20180618T030000Z_C-20180620T233149Z.nc",
            "IMOS_ANMN-WA_AETVZ_20180802T080000Z_WATR20_FV01_WATR20-1807-Continental-194_END-20190225T054500Z_C-20190227T001343Z.nc",
            "IMOS_ANMN-WA_AETVZ_20190307T080000Z_WATR20_FV01_WATR20-1903-Continental-194_END-20190911T003144Z_C-20200114T045053Z.nc",
            "IMOS_ANMN-WA_AETVZ_20190926T080000Z_WATR20_FV01_WATR20-1909-Continental-194_END-20200326T030000Z_C-20200420T064334Z.nc",
        ]
        base="http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/"

        # Download files
        [download_url(base+f, cache_folder) for f in files]

        # load and merge
        xds=[xr.open_dataset(cache_folder/f) for f in files]
        vars=['VCUR', 'UCUR', 'WCUR', 'TEMP', 'PRES_REL', 'DEPTH', 'ROLL', 'PITCH']
        xds2= [x[vars].isel(HEIGHT_ABOVE_SENSOR=18) for x in xds]
        xd = xr.concat(xds2, dim='TIME')
        xd = xd.where(xd.DEPTH>150) # remove outliers


        xd['TIME'] = xd['TIME'].dt.round('10T')
        xd = xd.dropna(dim='TIME', subset=['VCUR', 'UCUR', 'WCUR'])
        # xd = xd.resample(TIME='30T').first()
        # Add tides, these are features that can be forecast

        # Generate tidal freqs
        t = xd.TIME.to_series()
        df_eta = generate_tidal_periods(t)

        # Add tidal freqs
        xd = xd.merge(df_eta)

        # Cache to nc
        xd.to_netcdf(outfile)
        print(f'wrote "{outfile}" with size {outfile.stat().st_size*1e-6:2.2f} MB')
    return outfile

In [40]:
xd.to_dataframe().drop(columns=['HEIGHT_ABOVE_SENSOR', 'NOMINAL_DEPTH'])#.columns#[['VCUR', 'UCUR', 'WCUR', 'TEMP', 'PRES_REL', 'DEPTH', 'ROLL', 'PITCH']]

Unnamed: 0_level_0,VCUR,UCUR,WCUR,TEMP,PRES_REL,DEPTH,ROLL,PITCH,LATITUDE,LONGITUDE,...,O1,P1,Q1,M4,M6,S4,MK3,MM,SSA,SA
TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-07-15 08:00:00,-0.396391,0.089687,-0.009671,18.549999,205.076004,203.550812,4.6,-3.4,-31.728650,115.037217,...,0.286288,0.116457,-1.014973,-0.146817,-0.801534,-0.500000,0.370082,0.132683,-0.686775,-0.395743
2009-07-15 08:10:00,-0.407620,0.085398,-0.019875,18.650000,205.078003,203.552795,4.6,-2.4,-31.728650,115.037217,...,0.242810,0.159551,-1.031149,-0.304345,-0.900573,-0.642788,0.494417,0.134147,-0.686601,-0.395853
2009-07-15 08:20:00,-0.365314,0.104038,0.000991,18.730000,205.076996,203.551788,4.8,-2.7,-31.728650,115.037217,...,0.198932,0.202343,-1.045759,-0.453239,-0.942304,-0.766044,0.610654,0.135610,-0.686427,-0.395963
2009-07-15 08:30:00,-0.406632,0.119376,-0.003729,18.799999,205.067001,203.541901,4.7,-2.4,-31.728650,115.037217,...,0.154727,0.244751,-1.058780,-0.589276,-0.924071,-0.866025,0.716890,0.137073,-0.686253,-0.396072
2009-07-15 08:40:00,-0.383744,0.090066,-0.008921,18.860001,205.065994,203.540894,4.9,-2.9,-31.728650,115.037217,...,0.110268,0.286697,-1.070194,-0.708598,-0.847034,-0.939693,0.811384,0.138535,-0.686080,-0.396182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-26 01:00:00,-0.436635,-0.784922,-0.012147,16.610001,197.384003,195.919662,-2.9,3.0,-31.728717,115.042133,...,-0.734741,0.190139,0.964792,0.882484,0.770444,0.505439,1.028587,-0.881951,0.990514,0.997626
2020-03-26 01:30:00,-0.355067,-0.845100,-0.005201,16.629999,197.408005,195.943497,-2.7,3.0,-31.728717,115.042133,...,-0.629257,0.316317,0.895545,0.957914,0.933774,0.006292,0.851981,-0.880483,0.990416,0.997601
2020-03-26 02:00:00,-0.568277,-0.816935,-0.024944,16.660000,197.412994,195.948425,-2.6,2.9,-31.728717,115.042133,...,-0.514470,0.437113,0.814067,0.793395,0.584762,-0.494541,0.551159,-0.878996,0.990316,0.997576
2020-03-26 02:30:00,-0.306141,-0.773147,-0.028096,16.719999,197.419006,195.954407,-2.6,2.7,-31.728717,115.042133,...,-0.392074,0.550470,0.721473,0.430136,-0.085096,-0.862862,0.169980,-0.877489,0.990217,0.997551


In [7]:
# for x in xds:
#     x.DEPTH.plot()
#     plt.ylim(190, 210)

# plt.show()
# for x in xds:
#     x.plot.scatter('LONGITUDE', 'LONGITUDE')
# plt.show()

# xd['VCUR'].plot(alpha=0.5)
# xd['UCUR'].plot(alpha=0.5)
# xd['WCUR'].plot(alpha=0.5)

PosixPath('../data/processed/currents/MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc')

43.107293