In [3]:
from importlib import reload
import os 
import pathlib
import sys
sys.path.append('..')

from kerchunk.hdf import SingleHdf5ToZarr 
from kerchunk.combine import MultiZarrToZarr

import cartopy.crs as ccrs
import cartopy.feature as cfeature 
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import dask
from dask.distributed import Client
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
import s3fs
import datetime as dt

import logging
import fsspec
import ujson
from tqdm import tqdm
from glob import glob
import seaborn as sb

from src import processing as pr, parallel_plotting as pp
reload(pr)
reload(pp)

<module 'src.parallel_plotting' from '/Users/geet/Documents/Repositories/Owned/goesdata/notebooks/../src/parallel_plotting.py'>

In [2]:
fs = fsspec.filesystem('s3', anon=True)
urls = []

for i in tqdm(range(210,240)):
    for f in fs.glob(f"s3://noaa-goes16/ABI-L2-CMIPF/2021/{i}/*/*C13*.nc"):
        urls.append('s3://' + f)

100%|██████████| 30/30 [01:54<00:00,  3.83s/it]


In [4]:
len(urls)

4321

In [5]:
client = Client(n_workers=8)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 8
Total threads: 8,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:52607,Workers: 8
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 16.00 GiB

0,1
Comm: tcp://127.0.0.1:52632,Total threads: 1
Dashboard: http://127.0.0.1:52633/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52610,
Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-m5y6oswk,Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-m5y6oswk

0,1
Comm: tcp://127.0.0.1:52626,Total threads: 1
Dashboard: http://127.0.0.1:52627/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52611,
Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-ce_kigle,Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-ce_kigle

0,1
Comm: tcp://127.0.0.1:52629,Total threads: 1
Dashboard: http://127.0.0.1:52630/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52612,
Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-uzg32dj8,Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-uzg32dj8

0,1
Comm: tcp://127.0.0.1:52638,Total threads: 1
Dashboard: http://127.0.0.1:52639/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52613,
Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-67f4smi_,Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-67f4smi_

0,1
Comm: tcp://127.0.0.1:52635,Total threads: 1
Dashboard: http://127.0.0.1:52636/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52614,
Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-u0dqwgu8,Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-u0dqwgu8

0,1
Comm: tcp://127.0.0.1:52641,Total threads: 1
Dashboard: http://127.0.0.1:52642/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52615,
Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-bo59k1qq,Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-bo59k1qq

0,1
Comm: tcp://127.0.0.1:52644,Total threads: 1
Dashboard: http://127.0.0.1:52645/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52616,
Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-3xj_6usd,Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-3xj_6usd

0,1
Comm: tcp://127.0.0.1:52647,Total threads: 1
Dashboard: http://127.0.0.1:52648/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52617,
Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-dv5c8303,Local directory: /var/folders/fb/jvrfb4xx59g6_79sjdwd8pzh0000gn/T/dask-scratch-space/worker-dv5c8303


In [6]:
def gen_json(u):
    so = dict(
        mode="rb", anon=True, default_fill_cache=False,
        default_cache_type="none"
    )
    with fsspec.open(u, **so) as inf:
        h5chunks = SingleHdf5ToZarr(inf, u, inline_threshold=300)
        if os.path.isfile("jsons/{u.split('/')[-1]}.json"):
            pass
        else:
            with open(f"jsons/{u.split('/')[-1]}.json", 'wb') as outf:
                outf.write(ujson.dumps(h5chunks.translate()).encode())

In [None]:
pathlib.Path('./jsons/').mkdir(exist_ok=True)

dask.compute(*[dask.delayed(gen_json)(u) for u in urls]);

KeyboardInterrupt: 

2023-06-11 02:17:17,510 - tornado.application - ERROR - Exception in callback <bound method SystemMonitor.update of <SystemMonitor: cpu: 0 memory: 52 MB fds: 257>>
Traceback (most recent call last):
  File "/Users/geet/Documents/Repositories/Owned/goesdata/goesdata_env/lib/python3.10/site-packages/tornado/ioloop.py", line 919, in _run
    val = self.callback()
  File "/Users/geet/Documents/Repositories/Owned/goesdata/goesdata_env/lib/python3.10/site-packages/distributed/system_monitor.py", line 160, in update
    net_ioc = psutil.net_io_counters()
  File "/Users/geet/Documents/Repositories/Owned/goesdata/goesdata_env/lib/python3.10/site-packages/psutil/__init__.py", line 2119, in net_io_counters
    rawdict = _psplatform.net_io_counters()
OSError: [Errno 12] Cannot allocate memory
2023-06-11 03:37:26,249 - tornado.application - ERROR - Exception in callback <bound method SystemMonitor.update of <SystemMonitor: cpu: 0 memory: 34 MB fds: 33>>
Traceback (most recent call last):
  File "/U