In [1]:
#!/usr/bin/env python
"""
"""
import json
import logging
import re
import os
import uuid
from osgeo import ogr
from osgeo import osr

from datetime import datetime
from dateutil.relativedelta import relativedelta

from collections import defaultdict
from datetime import datetime, timedelta
from pathlib import Path

import numpy as np
import rasterio

from datacube import Datacube
from datacube.index.hl import Doc2Dataset
from datacube.utils import changes

LOG = logging.getLogger(__name__)

def print_dict(doc):
    print(json.dumps(doc, indent=4, sort_keys=True, cls=NumpySafeEncoder))

def find_datasets(path: Path):
    """
    """
    path = sorted(path.glob('**/*.tiff'))
    pattern = re.compile(r'(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})_total_accum_prcp.tiff')
    datasets = defaultdict(dict)
    for tifffile in path:
        match = pattern.search(str(tifffile))
        if match:
            year, month, day = match.groups()
            dataset = year + month + day
            datasets[dataset] = tifffile
    return datasets

def generate_product_defn():
    return {
        'name': 'accum_prcp_daily',
        'metadata_type': 'eo',
        'metadata': {
            'product_type': 'accum_prcp_daily',
            'format' : { 'name': 'GeoTIFF'}
        },
        'storage': {
            'crs': 'GEOGCS["unknown",DATUM["unknown",SPHEROID["Sphere",6371229,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]]',
            'resolution': {
                'latitude': -0.1100000051935545453,
                'longitude': 0.1100000051935545453
            },
            'origin': { 'longitude':65, 'latitude':19.48}
        },
        'description': 'BARRA Daily precipitation accumulation',
        'measurements': [
            {
                'name':'accum_prcp',
                'dtype':'float64',
                'nodata': -1073741824,
                'units':'kg m-2'
            }
        ]
    }

def generate_dataset_doc(dataset_name, dataset):
    """
    """
    sample_tifffile = dataset
    sample_tifffile_gdal = f'{sample_tifffile}'
    creation_time = datetime.fromtimestamp(sample_tifffile.stat().st_mtime)
    geo_ref_points, spatial_ref = get_grid_spatial_projection(
        sample_tifffile_gdal)
    

    start_time = datetime.strptime(name, '%Y%m%d')
    
    end_time = start_time + relativedelta(days=1) - timedelta(microseconds=1)
    center_time = (end_time - start_time)/2 + start_time
    docs = []
    unique_ds_uri = f'{sample_tifffile.as_uri()}#{creation_time}#{start_time}'
    doc = {
        'id': str(uuid.uuid5(uuid.NAMESPACE_URL, unique_ds_uri)),
        'product_type': 'accum_prcp_daily',
        'creation_dt': str(creation_time),
        'extent': {
            'from_dt': str(start_time),
            'to_dt': str(end_time),
            'center_dt': str(center_time),
            'coord': to_lat_long_extent(geo_ref_points),
        },
        'format': {'name': 'GeoTIFF'},
        'grid_spatial': {
            'projection': {
                'geo_ref_points': geo_ref_points,
                'spatial_reference': spatial_ref,
            }
        },
        'image': {
            'bands': {
                'accum_prcp': {
                    'path': '',
                    'layer': '1',
                }
            }
        },
        'lineage': {'source_datasets': {}}
    }
    return ('file:'+str(dataset),doc)


def to_lat_long_extent(geo_ref_points):
    return {corner: {'lat': points['y'], 'lon': points['x']}
for corner, points in geo_ref_points.items()}


def get_grid_spatial_projection(fname):
    with rasterio.open(fname, 'r') as img:
        left, bottom, right, top = img.bounds
        spatial_reference = str(str(getattr(img, 'crs_wkt', None) or img.crs.wkt))
        geo_ref_points = {
            'ul': {'x': left, 'y': top},
            'ur': {'x': right, 'y': top},
            'll': {'x': left, 'y': bottom},
            'lr': {'x': right, 'y': bottom},
        }
        return geo_ref_points, spatial_reference

In [2]:
dc = Datacube(config='datacube.conf',env='barra-dev')
index = dc.index

product_def = generate_product_defn()
product = index.products.from_doc(product_def)
indexed_product = index.products.add(product)
print(indexed_product)

DatasetType(name='accum_prcp_daily', id_=16)


In [3]:
path = Path('/g/data/u46/users/dg6911/BARRA_Daily/')
datasets = find_datasets(path)
resolver = Doc2Dataset(index)

for name, dataset in datasets.items():
    doc = generate_dataset_doc(name, dataset)
    dataset, err = resolver(doc[1], doc[0])
    try:
        indexed_dataset = index.datasets.add(dataset)
    except Exception as e:
        logging.error("Couldn't index %s/%s", path, name)
        logging.exception("Exception", e)
