# Notebook to create aggregated reference files (json) into monthly or yearly zarrs

In [None]:
%load_ext autoreload
%autoreload 2

## Step 0: Import the library of code

In [None]:
import sys
import os
sys.path.append('/home/jovyan/intake-aodn/')

import intake_aodn
import intake

from intake_aodn.utils import get_local_cluster, get_distributed_cluster
from intake_aodn.indexing import process_aggregate
from intake_aodn.indexing import keep_fields  

In [None]:
# client = get_local_cluster()
client = get_distributed_cluster(worker_cores=1, worker_memory=2.0, min_workers=1, max_workers=1)

In [None]:
# requires a working installation of kerchunk in the notebook environment and on the workers if using distributed
from dask.distributed import PipInstall
plugin = PipInstall(packages=["kerchunk"], pip_options=["--upgrade"])
client.register_worker_plugin(plugin)

# If using a distributed cluster on EASI build eggs using "python setup.py bdist_egg" and upload to workers
# otherwise dask workers wont have code for imports
client.upload_file('/home/jovyan/intake-aodn/dist/intake_aodn-0+untagged.62.gcd7d4df.dirty-py3.8.egg')

# Unzip existing references

In [None]:
!cd ../../intake_aodn/catalogs/ && unzip -q aodn_refs.zip

# SST Data

In [None]:
variables = ['time',
             'dt_analysis',
             'l2p_flags',
             'quality_level',
             'satellite_zenith_angle',
             'sea_surface_temperature',
             'sea_surface_temperature',
             'sses_bias',
             'sses_count',
             'sses_standard_deviation',
             'sst_dtime']


In [None]:
'quality_level1' in variables

In [None]:
%%time
# s3://imos-data-pixeldrill/IMOS/SRS/SST/ghrsst/L3S-1d/ngt/2016/20161001152000-ABOM-L3S_GHRSST-SSTskin-AVHRR_D-1d_night.nc
kwargs = dict(root='imos-data/IMOS/SRS/SST/ghrsst/L3S-1d/ngt/',
               year='2021',
               month='01',
               mask='{year}/{year}{month}',
               suffix='-ABOM-L3S_GHRSST-SSTskin-AVHRR_D-1d_night',
               extension='nc',
               check_chunking='sea_surface_temperature',
               preprocess=keep_fields(variables),
               storage_options=dict(anon=True),
               dest='../../intake_aodn/catalogs/',
               dask=True)
#process_aggregate(**kwargs)

In [None]:
import pandas as pd
dt = pd.date_range('1988-01-01',pd.Timestamp.now() + pd.DateOffset(months=1),freq='M')
# dt = pd.date_range('2022-02-01',pd.Timestamp.now() + pd.DateOffset(months=1),freq='M')
print(dt)

In [None]:
results = []
for d in dt:
    kws = kwargs.copy()
    kws['year'] = d.strftime('%Y')
    kws['month'] = d.strftime('%m')
    results.append(process_aggregate(**kws))

# MODIS Ocean Colour

In [None]:
kwargs = dict(root='imos-data/IMOS/SRS/OC/gridded/aqua/P1D/',
              mask='{year}/{month}/A.P1D.{year}{month}',
              dest='../../intake_aodn/catalogs/',
              dask=True
             ) 

results = []

for d in dt:
    for var in ['K_490','chl_oc3','chl_oc3','chl_gsm']:
        kws = kwargs.copy()
        kws['year'] = d.strftime('%Y')
        kws['month'] = d.strftime('%m')
        kws['suffix'] = f'.aust.{var}'
        kws['check_chunking'] = var
        results.append(process_aggregate(**kws))

## Zip references

In [None]:
!cd ../../intake_aodn/catalogs/ && rm aodn_refs.zip  && zip -r -q aodn_refs.zip imos-data && rm -rf ../../intake_aodn/catalogs/imos-data/

Exception in callback None()
handle: <Handle cancelled>
Traceback (most recent call last):
  File "/env/lib/python3.8/site-packages/tornado/iostream.py", line 1391, in _do_ssl_handshake
    self.socket.do_handshake()
  File "/usr/lib/python3.8/ssl.py", line 1309, in do_handshake
    self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1131)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/env/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 189, in _handle_events
    handler_func(fileobj, events)
  File "/env/lib/python3.8/site-packages/tornado/iostream.py", line 696, in _handle_events
    self._handle_read()
  File "/env/lib/python3.8/site-packages/tornado/iostream.py", line 1478, in 

In [None]:
client.shutdown()