## Setup

In [None]:
#!pip install fsspec
#!pip install requests
#!pip install netCDF4
#!pip install xarray

In [1]:
from datetime import datetime
import fsspec
import os
import requests
from requests.compat import urljoin
import shutil
import xarray as xr

## Helper Functions

In [2]:
def find_oisstv2_files(years: list = range(1981, datetime.now().year + 1)) -> list:
    """
    Find the remote filepaths for OISSTV2 SST Day Mean files for the supplied years of interest.
    :param years: A list of years between 1981 to present. The default argument is 1981 to the present year.
    :return: A list of filepaths containing data of interest.
    """
    
    remote = fsspec.filesystem('https')  
    base_url = 'https://downloads.psl.noaa.gov/Datasets/noaa.oisst.v2.highres/'
    all_files = remote.glob(urljoin(base_url,'sst.day.mean.*.nc'))
    sst_day_files = [file for file in all_files if 'ltm' not in file]  # Ditch long-term mean and only return day mean files.
    files_of_interest = [file for file in sst_day_files if any(str(year) in file for year in years)]
    return files_of_interest


def download_oisstv2_files(remote_files: list, save_dir: str, overwrite: bool = False, verbose: bool = False) -> list:
    """
     Download OISSTV2 SST Day Mean files.
    
    :param remote_files: A list of remote filepaths generated from the find_oisstv2_files function.
    :param save_dir: The directory to save data to.
    :param overwrite: Set to True if you want to overwrite previously downloaded files.
    :param verbose:  Set to True if you want to print progress messages during the download process.
    :return: A list of local filepaths containing downloaded data.
    """
    
    os.makedirs(save_dir, exist_ok = True)
    downloaded_files = []
    for remote_file in remote_files:
        fn = os.path.basename(remote_file)
        save_fp = os.path.normpath(os.path.join(save_dir, fn))
        if overwrite is False:
            if os.path.isfile(save_fp):
                downloaded_files.append(save_fp)
                if verbose is True:
                    msg = f"File containing {fn} already exists. Skipping download."
                    print(msg)
                continue
        else:
            with requests.get(remote_file, stream = True) as response:
                with open(save_fp, 'wb') as fileobj:
                    shutil.copyfileobj(response.raw, fileobj)
                    if os.path.isfile(save_fp):
                        downloaded_files.append(save_fp)
                        if verbose is True:
                            msg = f'Downloaded {fn}.'
                            print(msg)
                    else:
                        raise ConnectionError(f'Error downloading {fn}.')
    return downloaded_files


def open_oisstv2_file(filepath: str, convert_lon: bool = True) -> xr.Dataset:
    """
    Open an OISSTV2 file for viewing and processing.
    
    :param filepath: The filepath of the file.
    :param convert_lon: If True, converts lon from [0,360] to [-180,180].
    :return: An xarray.Dataset.
    """
    _ds = xr.open_dataset(filepath)
    if convert_lon is True:
        lon_attrs = _ds['lon'].attrs
        _ds['lon'] = ((_ds.lon + 180) % 360) - 180  # Convert longitude from [0 to 360] to [-180 to 180].
        lon_attrs['actual_range'] = [float(_ds.lon.min().values), float(_ds.lon.max().values)]
        _ds['lon'].attrs = lon_attrs
        _ds = _ds.sortby([_ds.lon, _ds.lat])
    return _ds


def combine_oisstv2_files(filepaths: list, convert_lon: bool = True) -> xr.Dataset:
    """
    Combine multiple OISSTV2 files into a single dataset.
    
    :param filepaths: A list of local filepaths.
    :param convert_lon:  If True, converts lon from [0,360] to [-180,180].
    :return: An xarray.Dataset.
    """
    ds_list = []
    for filepath in filepaths:
        ds_list.append(open_oisstv2_file(filepath, convert_lon))
    _ds = xr.combine_by_coords(ds_list, combine_attrs='drop_conflicts')
    return _ds

## Download OISSTV2

In [3]:
%%time
years = range(2021,2023)
#years = range(1981,2025)  # Uncomment this to download all OISSTV2 SST Day Mean Files to date.
save_dir = 'C:/Users/Ian/projects/scratch/test_data'
overwrite = True
verbose = True

remote_files = find_oisstv2_files(years = years)
downloaded_files = download_oisstv2_files(remote_files,save_dir, overwrite,verbose)

Downloaded sst.day.mean.2021.nc.
Downloaded sst.day.mean.2022.nc.
CPU times: total: 375 ms
Wall time: 6min 35s


## Combine Multiple Files Into A Single Dataset

In [4]:
%%time
ds = combine_oisstv2_files(downloaded_files, convert_lon=True)

CPU times: total: 18.4 s
Wall time: 19.5 s


In [5]:
ds