In [3]:
import concurrent.futures
import json
import xarray as xr

import sys
sys.path.append("..")
import helpers
from create_stac_item import create_stac_item

In [4]:
bucket = 'nasa-eodc-scratch'
timechunks = '24'
directory = f'NLDAS/netcdf/.timechunk{timechunks}'
num_workers = 16

In [5]:
credentials = helpers.get_credentials()
s3fsfs = helpers.create_s3filesystem(credentials)
s3fsfs

<s3fs.core.S3FileSystem at 0x7fa59c394290>

In [6]:
files = s3fsfs.glob(f's3://{bucket}/{directory}/*.nc')

In [7]:
len(files)

31

In [8]:
%%time
stac_items = []

def open_and_generate_stac(file: str):
    ds = xr.open_dataset(s3fsfs.open(f's3://{file}'), chunks={})
    return create_stac_item(ds, f's3://{file}')

with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
    stac_items = list(executor.map(open_and_generate_stac, files))

KeyboardInterrupt: 

In [7]:
len(stac_items)

31

In [8]:
stac_items_as_dicts = []
for stac_item in stac_items:
    item_dict = stac_item.to_dict()
    item_dict['properties'] = helpers.convert_numpy(item_dict['properties'])
    stac_items_as_dicts.append(item_dict)

In [9]:
stac_collection = {
    "type": "FeatureCollection",
    "features": stac_items_as_dicts
}
#stac_collection

In [10]:
stac_json_filename = f'stac_item_collection_{timechunks}.json'
with open(stac_json_filename, 'w') as f:
    f.write(json.dumps(stac_collection, indent=2))

In [11]:
s3fsfs.put(stac_json_filename, f's3://{bucket}/{directory}/{stac_json_filename}')

[None]

In [12]:
stac_items[0]