In [3]:
#!/usr/bin/env python
"""
"""
import json
import logging
import re
import os
import uuid
from osgeo import ogr
from osgeo import osr


from collections import defaultdict
from datetime import datetime, timedelta
from pathlib import Path

import netCDF4
import numpy as np
import rasterio

from datacube import Datacube
from datacube.index.hl import Doc2Dataset
from datacube.utils import changes

#os.environ['GDAL_NETCDF_BOTTOMUP'] = 'NO'

origin = { 'longitude':146.983743780177917, 'latitude':-27.998249160776897}

#'latitude': -0.01349999997940784166,
resolution = { 'latitude': 0.01349999997940784166, 'longitude': 0.01349998847228077621}

# BARRA var types are of the form: [an|fc]
# BARRA streams: [spec|slv|prs]
# BARRA domains: BARRA_[R|SY|AD|PH|TA]
variables = [
    {
        'var_name':'accum_ls_prcp',
        'type':'fc',
        'stream':'spec',
        'domain':'BARRA_SY',
        'description':'Total large-scale rain amount at the surface. It does not include a convective contribution.',
        'unit':'kg m-2',
        'path':'/g/data/ma05/BARRA_SY/v1/forecast/spec/accum_ls_prcp',
        'product_name':'barra_sy_fc_spec_av_accum_ls_prcp',
        'origin':origin,
        'resolution':resolution
    },
    {
        'var_name':'av_sfc_temp',
        'type':'fc',
        'stream':'spec',
        'domain':'BARRA_SY',
        'description':'Temperature of the land or sea/sea-ice surface. On land points this is the surface "skin" temperature. On ice-free sea points it is the temperature of the sea surface, and on sea points with ice it is a gridbox mean given by: [(ice fraction)*(temperature of top ice layer computed by the atmosphere surface/boundary layer scheme)] + [(1 - ice fraction)*(freezing point of sea water)].',
        'unit':'K',
        'path':'/g/data/ma05/BARRA_SY/v1/forecast/spec/av_sfc_temp',
        'product_name':'barra_sy_fc_spec_av_sfc_temp',
        'origin':origin,
        'resolution':resolution
    },
    {
        'var_name':'sfc_temp',
        'type':'fc',
        'stream':'spec',
        'domain':'BARRA_SY',
        'description':'Temperature of the land or sea/sea-ice surface. On land points this is the surface "skin" temperature. On ice-free sea points it is the temperature of the sea surface, and on sea points with ice it is a gridbox mean given by: [(ice fraction)*(temperature of top ice layer computed by the atmosphere surface/boundary layer scheme)] + [(1 - ice fraction)*(freezing point of sea water)].',
        'unit':'K',
        'path':'/g/data/ma05/BARRA_SY/v1/forecast/spec/sfc_temp',
        'product_name':'barra_sy_fc_spec_sfc_temp',
        'origin':origin,
        'resolution':resolution
    },
    {
        'var_name':'max_wndgust10m',
        'type':'fc',
        'stream':'spec',
        'domain':'BARRA_SY',
        'description':'Gust windspeed at 10m above-ground-level.',
        'unit':'m s-1',
        'path':'/g/data/ma05/BARRA_SY/v1/forecast/spec/max_wndgust10m',
        'product_name':'barra_sy_fc_spec_max_wndgust10m',
        'origin':origin,
        'resolution':resolution
    },
    {
        'var_name':'min_wndgust10m',
        'type':'fc',
        'stream':'spec',
        'domain':'BARRA_SY',
        'description':'Gust windspeed at 10m above-ground-level.',
        'unit':'m s-1',
        'path':'/g/data/ma05/BARRA_SY/v1/forecast/spec/min_wndgust10m',
        'product_name':'barra_sy_fc_spec_min_wndgust10m',
        'origin':origin,
        'resolution':resolution
    },
    {
        'var_name':'uwnd10m',
        'type':'fc',
        'stream':'spec',
        'domain':'BARRA_SY',
        'description':'Zonal (U) component of the wind velocity at 10m above-ground-level.',
        'unit':'m s-1',
        'path':'/g/data/ma05/BARRA_SY/v1/forecast/spec/uwnd10m',
        'product_name':'barra_sy_fc_spec_uwnd10m',
        'origin':origin,
        'resolution':resolution
    },
    {
        'var_name':'vwnd10m',
        'type':'fc',
        'stream':'spec',
        'domain':'BARRA_SY',
        'description':'Meridional (V) component of the wind velocity at 10m above-ground-level.',
        'unit':'m s-1',
        'path':'/g/data/ma05/BARRA_SY/v1/forecast/spec/vwnd10m',
        'product_name':'barra_sy_fc_spec_vwnd10m',
        'origin':origin,
        'resolution':resolution
    }
]

LOG = logging.getLogger(__name__)

def print_dict(doc):
    print(json.dumps(doc, indent=4, sort_keys=True, cls=NumpySafeEncoder))

def find_datasets(variable):
    path = Path(variable['path'])
    path = sorted(path.glob('**/*.nc'))
    pattern = re.compile(r'(?P<barra_var>'+variable['var_name']+')\-(?P<barra_var_type>'+variable['type']+')\-(?P<barra_var_stream>'+variable['stream']+')\-(?P<barra_sampling_frequency>PT1H)\-(?P<barra_domain>'+variable['domain']+')\-v(?P<barra_version>\d{1}\.?\d?)\-(?P<date>\d{8})T(?P<time>\d{4})Z\.sub.nc')
    datasets = defaultdict(dict)
    for ncfile in path:
        match = pattern.search(str(ncfile))
        if match:
            barra_var, barra_var_type, barra_var_stream, barra_sampling_frequency, barra_domain, barra_version, date, hour = match.groups()
            dataset = barra_var + date + hour+ barra_domain + barra_version
            datasets[dataset][barra_var] = ncfile
    return datasets

def generate_product_defn(variable):
    return {
        'name': variable['product_name'],
        'metadata_type': 'eo',
        'metadata': {
            'product_type': variable['product_name'],
            'format' : { 'name': 'NetCDF'}
        },
        'storage': {
            'crs': 'GEOGCS["unknown",DATUM["unknown",SPHEROID["Sphere",6371229,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]]',
            'resolution': variable['resolution'],
            'origin': variable['origin']
        },
        'description': variable['description'],
        'measurements': [
            {
                'name': variable['var_name'],
                'dtype':'float32',
                'nodata': -1073741824,
                'units':variable['unit']
            }
        ]
    }

def generate_dataset_docs(dataset_name, dataset, variable):
    """
    """
    barra_var_name = variable['var_name']
    sample_ncfile = dataset[barra_var_name]
    sample_ncfile_gdal = f'NETCDF:{sample_ncfile}:{barra_var_name}'

    creation_time = datetime.fromtimestamp(sample_ncfile.stat().st_mtime)
    geo_ref_points, spatial_ref = get_grid_spatial_projection(
        sample_ncfile_gdal)
    name_offset = len(barra_var_name)
    date_length = 12
    date = name[name_offset:name_offset+date_length]

    start_time = datetime.strptime(date, '%Y%m%d%H%M')
    end_time = start_time + timedelta(hours=1) - timedelta(microseconds=1)
    center_time = start_time + timedelta(minutes=30)
    docs = []
    for i in range(6):
        unique_ds_uri = f'{sample_ncfile.as_uri()}#{creation_time}#{start_time+timedelta(hours=i)}'
        doc = {
            'id': str(uuid.uuid5(uuid.NAMESPACE_URL, unique_ds_uri)),
            'product_type': variable['product_name'],
            'creation_dt': str(creation_time),
            'extent': {
                'from_dt': str(start_time+timedelta(hours=i)),
                'to_dt': str(end_time+timedelta(hours=i)),
                'center_dt': str(center_time+timedelta(hours=i)),
                'coord': to_lat_long_extent(geo_ref_points),
            },
            'format': {'name': 'NetCDF'},
            'grid_spatial': {
                'projection': {
                    'geo_ref_points': geo_ref_points,
                    'spatial_reference': spatial_ref,
                }
            },
            'image': {
                'bands': {
                    barra_var_name: {
                        'path': '',
                        'layer': barra_var_name,
                    }
                }
            },
            'lineage': {'source_datasets': {}}
        }
        docs.append(('file:'+str(dataset[barra_var_name])+'#part='+str(i),doc))
    return docs


def to_lat_long_extent(geo_ref_points):
    return {corner: {'lat': points['y'], 'lon': points['x']}
for corner, points in geo_ref_points.items()}


def get_grid_spatial_projection(fname):
    with rasterio.open(fname, 'r') as img:
        left, bottom, right, top = img.bounds
        spatial_reference = str(str(getattr(img, 'crs_wkt', None) or img.crs.wkt))
        geo_ref_points = {
            'ul': {'x': left, 'y': top},
            'ur': {'x': right, 'y': top},
            'll': {'x': left, 'y': bottom},
            'lr': {'x': right, 'y': bottom},
        }
        return geo_ref_points, spatial_reference

In [4]:
dc = Datacube(config='datacube.conf',env='barra-dev')
index = dc.index


In [6]:
for variable in variables:
    product_def = generate_product_defn(variable)
    product = index.products.from_doc(product_def)
    indexed_product = index.products.add(product)
    print(indexed_product)

DatasetType(name='barra_sy_fc_spec_av_accum_ls_prcp', id_=21)
DatasetType(name='barra_sy_fc_spec_av_sfc_temp', id_=22)
DatasetType(name='barra_sy_fc_spec_sfc_temp', id_=23)
DatasetType(name='barra_sy_fc_spec_max_wndgust10m', id_=24)
DatasetType(name='barra_sy_fc_spec_min_wndgust10m', id_=25)
DatasetType(name='barra_sy_fc_spec_uwnd10m', id_=26)
DatasetType(name='barra_sy_fc_spec_vwnd10m', id_=27)


In [8]:
for variable in variables:
    datasets = find_datasets(variable)
    resolver = Doc2Dataset(index)

    for name, dataset in datasets.items():
        docs = generate_dataset_docs(name, dataset, variable)
        for doc in docs:       
            dataset, err = resolver(doc[1], doc[0])
            try:
                indexed_dataset = index.datasets.add(dataset)
            except Exception as e:
                logging.error("Couldn't index %s/%s", path, name)
                logging.exception("Exception", e)