In [1]:
from datetime import datetime
import dateutil
import os
import numpy as np
import shutil
import requests
import time
import re

## Setup Request Conditions

In [2]:
site = 'CE05'
node = 'GL382'
instrument = 'CTD'
stream = 'ctd'

bdt = datetime(2020,1,1,0,0,0,0)
edt = datetime(2024,12,31,23,59,59,999999)

## Get Complete Catalog

In [3]:
def get_base_catalog():
    base_catalog_url = "https://thredds.dataexplorer.oceanobservatories.org/thredds/catalog/ooigoldcopy/public/catalog.html"
    txt = requests.get(base_catalog_url).text
    pattern = r'a href=\"(.*?)/catalog.html"'
    base_catalog =[str(x) for x in list(np.unique(re.findall(pattern, txt))) if 'thredds' not in x]
    return base_catalog

In [4]:
catalog = get_base_catalog()
catalog

['CE01ISSM-MFD35-01-VEL3DD000-recovered_host-vel3d_cd_dcl_velocity_data_recovered',
 'CE01ISSM-MFD35-01-VEL3DD000-telemetered-vel3d_cd_dcl_velocity_data',
 'CE01ISSM-MFD35-02-PRESFA000-recovered_host-presf_abc_dcl_tide_measurement_recovered',
 'CE01ISSM-MFD35-02-PRESFA000-recovered_host-presf_abc_dcl_wave_burst_recovered',
 'CE01ISSM-MFD35-02-PRESFA000-recovered_inst-presf_abc_tide_measurement_recovered',
 'CE01ISSM-MFD35-02-PRESFA000-recovered_inst-presf_abc_wave_burst_recovered',
 'CE01ISSM-MFD35-02-PRESFA000-telemetered-presf_abc_dcl_tide_measurement',
 'CE01ISSM-MFD35-04-ADCPTM000-recovered_host-adcp_velocity_earth',
 'CE01ISSM-MFD35-04-ADCPTM000-recovered_inst-adcp_velocity_earth',
 'CE01ISSM-MFD35-04-ADCPTM000-recovered_inst-adcpt_m_instrument_dspec_recovered',
 'CE01ISSM-MFD35-04-ADCPTM000-recovered_inst-adcpt_m_instrument_log9_recovered',
 'CE01ISSM-MFD35-04-ADCPTM000-recovered_inst-adcpt_m_wvs_recovered',
 'CE01ISSM-MFD35-04-ADCPTM000-telemetered-adcp_velocity_earth',
 'CE01IS

## Find Matching Reference Designators

In [5]:
def find_matching_refdes(base_catalog:list):
    pattern = f'(.*?{site}.*?{node}.*?{instrument}.*?{stream}.*)'
    matching_refdes = []
    for refdes in base_catalog:
        try:
            match = re.findall(pattern, refdes)[0]
            matching_refdes.append(match)
        except:
            continue
    return matching_refdes

In [6]:
matching_refdes = find_matching_refdes(catalog)
matching_refdes

['CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered',
 'CE05MOAS-GL382-05-CTDGVM000-telemetered-ctdgv_m_glider_instrument']

## Find Files Associated With a Reference Designator

In [7]:
def get_remote_file_locations(refdes):
    rd = refdes.split('-')
    site, node, instrument, method, stream = rd[0], rd[1], '-'.join((rd[2],rd[3])), rd[4], rd[5]
    refdes_catalog_url = f'https://thredds.dataexplorer.oceanobservatories.org/thredds/catalog/ooigoldcopy/public/{refdes}/catalog.html'
    txt = requests.get(refdes_catalog_url).text        
    pattern = r'dataset=(ooigoldcopy.*?\.nc)\"'
    refdes_catalog = [str(x) for x in list(np.unique(re.findall(pattern, txt)))]
    matching_refdes_catalog = []
    for rds in refdes_catalog:
        ds_split = rds.split('/')
        if instrument not in ds_split[-1]:
            continue
        else:
            matching_refdes_catalog.append(rds)
            
    thredds_base_url = 'https://thredds.dataexplorer.oceanobservatories.org/thredds/fileServer/'
    ncs = [requests.compat.urljoin(thredds_base_url, nc) for nc in matching_refdes_catalog]            
    return ncs

In [8]:
refdes = matching_refdes[0]

In [9]:
remote_locs = get_remote_file_locations(refdes)
remote_locs

['https://thredds.dataexplorer.oceanobservatories.org/thredds/fileServer/ooigoldcopy/public/CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0001_CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20150121T220412.590480-20150422T235653.527590.nc',
 'https://thredds.dataexplorer.oceanobservatories.org/thredds/fileServer/ooigoldcopy/public/CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0001_CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20150423T001906.635310-20150428T235959.338440.nc',
 'https://thredds.dataexplorer.oceanobservatories.org/thredds/fileServer/ooigoldcopy/public/CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0002_CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20160928T210412.939240-20161218T235958.602080.nc',
 'https://thredds.dataexplorer.oceanobservatories.or

## Sift By Time

In [10]:
def find_deployment_files(refdes_catalog, bdt, edt):
    matching_deployments = []
    for rds in refdes_catalog:
        ds_split = rds.split('/')[-1]
        t_split = ds_split.split('_')[-1].replace('.nc','')
        file_bdt_str, file_edt_str = t_split.split('-')
        file_bdt, file_edt = dateutil.parser.parse(file_bdt_str), dateutil.parser.parse(file_edt_str)
        if (bdt <= file_bdt <= edt or bdt <= file_edt <= edt 
            or file_bdt <= bdt <= file_edt or file_bdt <= edt <= file_edt):
            matching_deployments.append(rds)
            
    return matching_deployments

In [11]:
matching_files = find_deployment_files(remote_locs, bdt, edt)
matching_files

['https://thredds.dataexplorer.oceanobservatories.org/thredds/fileServer/ooigoldcopy/public/CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0007_CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20210325T205917.143950-20210404T062658.304230.nc',
 'https://thredds.dataexplorer.oceanobservatories.org/thredds/fileServer/ooigoldcopy/public/CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0007_CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20210404T062658.304230-20210404T062658.304230.nc',
 'https://thredds.dataexplorer.oceanobservatories.org/thredds/fileServer/ooigoldcopy/public/CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0008_CE05MOAS-GL382-05-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20210429T165207.665770-20210710T235958.151580.nc',
 'https://thredds.dataexplorer.oceanobservatories.or