In [1]:
from datetime import datetime
import fsspec
import numpy as np
import os
import requests
from requests.compat import urljoin
import shutil
import urllib.request

from ormhw.core import DATA_DIR
from ormhw.blobtracker import get_blobtracker

## Download CUTI-BEUTI

Note: You will need to manually download these files after they are found.

In [None]:
base = 'https://www.mjacox.com/wp-content/uploads/'

# This is a for loop for seeking out cuti/beuti uploads on the mjacox website, since the upload location in the future might not be guaranteed.
remote = fsspec.filesystem('http')
year = datetime.now().year
months = [str(month).zfill(2) for month in range(1, 13,1)]
all_files = []
for month in months:
    url = urljoin(urljoin(base,str(year)) + '/', month + '/')
    files = remote.glob(url + '*.nc')
    for file in files: 
        all_files = all_files + files
        
all_files = np.unique(all_files)
cuti_file = [file for file in all_files if 'CUTI_daily' in file][0]
beuti_file = [file for file in all_files if 'BEUTI_daily' in file][0]

save_directory = os.path.join(DATA_DIR, 'cuti_beuti')
os.makedirs(save_directory, exist_ok = True)

print(cuti_file)
print(beuti_file)

## Download GEBCO

In [2]:
%%time
url = 'https://www.bodc.ac.uk/data/open_download/gebco/gebco_2023/zip/' # GEBCO zip remote location.
save_directory = os.path.join(DATA_DIR,'gebco')
os.makedirs(save_directory, exist_ok = True) # Make the save directory.
save_zip = os.path.join(save_directory,'gebco_2023.zip') # Define save filepath.
if not os.path.isfile(save_zip) and not os.path.isfile(os.path.join(save_directory,'GEBCO_2023.nc')):
    with requests.get(url, stream = True) as req: # Download zip file.
        with open(save_zip, 'wb') as fileobj:
            shutil.copyfileobj(req.raw, fileobj)
            
if os.path.isfile(save_zip):
    shutil.unpack_archive(save_zip, save_directory) # Unpack zip file in gebco directory.
    os.remove(save_zip)

CPU times: user 458 µs, sys: 1.26 ms, total: 1.71 ms
Wall time: 1.11 ms


## Download Blobtracker

In [4]:
%%time
save_directory = os.path.join(DATA_DIR,'blobtracker')
os.makedirs(save_directory, exist_ok = True) # Make the save directory.
save_filepath = os.path.join(save_directory,'blobtracker.csv')
if not os.path.isfile(save_filepath):
    data = get_blobtracker()
    data.to_csv(save_filepath)

CPU times: user 516 µs, sys: 1.4 ms, total: 1.92 ms
Wall time: 1.17 ms


## Download OISSTV2

In [3]:
def find_remote_files(years: list = range(1981, 2100), ignore_ltm: bool = True):
    """
    Find OISSTV2 daily mean sst files.
    
    :param years: A list of years to seek associated files. Default is year 1981 to 2100.
    :param ignore_ltm: Boolean to filter out long-term mean files. Default is True.
    :return: A list of urls for downloading remote files.
    """
    
    url = "https://downloads.psl.noaa.gov/Datasets/noaa.oisst.v2.highres/"
    remote = fsspec.filesystem('https')
    files = remote.glob(url + f"*sst.day.mean*")
    if ignore_ltm is True:
        files = [f for f in files if 'ltm' not in f]
    files_of_interest = []
    for year in years:
        files_of_interest += [f for f in files if str(year) in f]
    return files_of_interest


def download_files(remote_files: list, save_directory: os.path.abspath, overwrite: bool = True, verbose: bool = False):
    
    """
    Download remote OISSTV2 files from a supplied list.
    
    :param remote_files: A list of urls that indicated remote file locations.
    :param save_directory: A user defined local directory for saving copies of remote files. 
    :param overwrite: Indicates whether or not to overwrite previously downloaded files. Default is True.
    :param verbose: Indicates whether to print messages. Default is False.
    :return: A list of local filepaths containing downloaded data.
    """
    
    downloaded_files = []
    os.makedirs(save_directory, exist_ok=True)
    for remote_file in remote_files:
        filename = os.path.basename(remote_file)
        filepath = os.path.join(save_directory, filename)
        if overwrite is False:
            if os.path.isfile(filepath):
                continue
        with requests.get(remote_file, stream = True) as req:
            with open(filepath, 'wb') as fileobj:
                shutil.copyfileobj(req.raw, fileobj)
                if os.path.isfile(filepath):
                    downloaded_files.append(filepath)
                    if verbose is True:
                        print(f'Downloaded OISSTV2: {filename}')
                else:
                    raise ConnectionError
    return downloaded_files

In [4]:
%%time
years = range(1981, 2100)
overwrite = False
remote_files = find_remote_files(years)
save_directory = os.path.join(DATA_DIR, 'oisstv2')
os.makedirs(save_directory, exist_ok = True)
downloaded_files = download_files(remote_files, save_directory, overwrite = overwrite)

CPU times: user 7.43 s, sys: 6.26 s, total: 13.7 s
Wall time: 3min 53s
