# ESSE NOTEBOOK NÃO DEVE SER RODADO! ELE EXISTE APENAS PARA DOCUMENTAR COMO OBTIVEMOS OS DADOS DO HYCOM!!

Site [1] to portal do HyCOM que descreve as diversas implementações do modelo ao longo dos anos:

O acesso por THREDDS/OPeNDAP ou ftp é super lento e falha bastante.
Nossa melhor chance é pegar por HTTPS mesmo da URL em [2].
A descrição do experimento `GLBu0.08` 91.2 é: _Uniform (1/12° resolution) grid between 80.48°S and 80.48°N_

Os arquivos são nomeados no formato "`hycon_grade_experimento_data_hora_campo`", por exemplo `hycom_glb_912_2017010100_t000_uv3z.nc`. Os campos ice, ssh, sur, ts3z, uv3z, etc são descritos em [1].

> **Note**
> Note que nem todos os experimentos tem todos os formatos!

1. https://www.hycom.org/data/glbu0pt08/expt-91pt2
2. https://data.hycom.org/datasets/GLBu0.08/expt_91.2/data/hindcasts
 

> **Warning**
> Existem experimentos mais novos que o GLB0.08 mas esse é o único que tem 2016/2017.

In [None]:
from pathlib import Path
import requests
from tqdm import tqdm
import lxml.html
import subprocess


def url_lister(url: str) -> list:
    urls = []
    r = requests.get(url, allow_redirects=True)
    dom = lxml.html.fromstring(r.text)
    for link in dom.xpath("//a/@href"):
        urls.append(link)
    return urls


def compress_netcdf(fname: Path):
    ret = None
    fname = fname.absolute()
    outfile = fname.with_suffix(".nc4")
    if not outfile.is_file():
        call = ["ncks", "-4", "-L 1", str(fname), str(outfile)]
        ret = subprocess.run(call, capture_output=True)
        print(ret.stdout.decode())
    if ret and ret.returncode == 0:
        print(f"Converted {fname} to {outfile}.")
    else:
        raise Exception(f"Could no compress file {fname}")


def download(url: str, outfile: Path):
    response = requests.get(url, stream=True, allow_redirects=True)
    total_size_in_bytes= int(response.headers.get("content-length", 0))
    block_size = 1024 #1 Kibibyte
    progress_bar = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
    with outfile.open(mode="wb") as f:
        for data in response.iter_content(block_size):
            progress_bar.update(len(data))
            f.write(data)
    progress_bar.close()
    if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
        print(f"Something went wrong while writing {outfile}.")

In [None]:
# Filter for year and month b/c we do not want to download everything in one go.
year, month = "2016", "12"

url = f"http://data.hycom.org/datasets/GLBu0.08/expt_91.2/data/hindcasts/{year}"
urls = url_lister(url)

# Filter ony the velocity files, chanfe it to t000_ts3z.nc for temperature and salinity.
fnames = [fname for fname in urls if fname.endswith("t000_uv3z.nc")]

name_list = set([fname for fname in fnames if fname.startswith(f"hycom_glb_912_{year}{month}")])

In [None]:
for fname in name_list:
    fname = Path(fname)
    local_name = fname.parent.joinpath("hycom", fname.name)
    final_name = local_name.with_suffix(".nc4")

    if not final_name.is_file():
        remote = f"{url}/{fname.name}"
        download(remote, local_name)
        print(f"Downloading file from {remote}\n")
        compress_netcdf(local_name)
        local_name.unlink()
    else:
        print(f"File {final_name} already exits.")