In [None]:
from datetime import datetime
import json
# import logging
import os
import pprint
import base64


import ee
from google.cloud import storage
import pystac
from pystac_client import Client
import requests

import IPython.display
import matplotlib.pyplot as plt

%matplotlib inline


endpoint = 'https://fusion-stac.hydrosat.com/'


In [None]:
ee.Initialize(project='dri-hydrosat')

In [None]:
with open('creds.json') as f:
    creds = json.loads(f.read())

userpass = f"{creds['username']}:{creds['password']}"
b64 = base64.b64encode(userpass.encode()).decode()
headers = {'Authorization':'Basic ' + b64}

cat_url = 'https://stac.hydrosat.com'
catalog = Client.open(cat_url, headers)


In [None]:
workspace = '/Users/Charles.Morton@dri.edu/Projects/hydrosat-assets/geotiffs/'
# workspace = os.getcwd()

project_id = 'dri-hydrosat'
bucket_name = 'hydrosat'

# Output band names
lst_band_name = 'lst'
qa_band_name = 'qa'

overwrite_flag = False

start_date = "2019-01-01T00:00:00Z"
end_date = "2020-01-01T00:00:00Z"

bbaoi = [-111.51, 36.85, -110.36, 37.86]  # Powell
#aoi = {"type": "Point", "coordinates": [-120.0, 38.8]}     # Tahoe
#aoi = {"type": "Point", "coordinates": [-111.3, 37.06]}    # Powell
#aoi = {"type": "Point", "coordinates": [-114.75, 36.08]}   # Mead
#aoi = {"type": "Point", "coordinates": [-114.65, 35.43]}   # Mojave

collection = "starfm_predictions_modis_landsat"
# collection = "pydms_sharpened_landsat"
# collection = "pydms_sharpened_modis"
# collection = "pydms_sharpened_viirs"
print(f'Collection: {collection}\n')

search = catalog.search(
    bbox = bbaoi,
    #intersects = aoi,
    datetime = [start_date, end_date],
    collections = [collection],
    max_items = 1000,
)
#items = search.get_all_items()
items = search.item_collection()
itemjson = items.to_dict()
print(f'Number of catalog items: {len(items)}\n')


# Get the list of local file names (not paths)
file_list = [
    item
    # os.path.join(root, item)
    for root, dirs, files in os.walk(workspace, topdown=False)
    for item in files
    if item.endswith('.tif')
]


asset_coll = f'projects/{project_id}/assets/{collection}'
if not ee.data.getInfo(asset_coll):
    print('\nCollection does not exist and will be built\n  {}'.format(asset_coll))
    input('Press ENTER to continue')
    ee.data.createAsset({'type': 'IMAGE_COLLECTION'}, asset_coll)

# TODO: Get the list of assets IDs instead of calling ee.data.getInfo in the loop
# asset_id_list = ee.ImageCollection(asset_coll).aggregate_array('system:index').getInfo()


storage_client = storage.Client(project=project_id)


for item in itemjson["features"]:

    # pprint.pprint(item['assets'])
    # break
    
    lst_image_url = item["assets"]["lst"]["href"]
    qa_image_url = item["assets"]["combined_qa"]["href"]

    collection = lst_image_url.split('?', 1)[0].split('/')[-4]
    year = lst_image_url.split('?', 1)[0].split('/')[-3]
    #temp = lst_image_url.split('?', 1)[0].split('/')[-2]
    
    lst_file_name = lst_image_url.split('?', 1)[0].split('/')[-1]
    #.replace('.tif', f'_{lst_band_name}.tif')
    qa_file_name = qa_image_url.split('?', 1)[0].split('/')[-1]
    #.replace('.tif', f'_{qa_band_name}.tif')
    print(f'{lst_file_name}')
    print(f'{qa_file_name}')
    # input('ENTER')

    year_folder = os.path.join(workspace, collection, year)
    lst_local_path = os.path.join(year_folder, lst_file_name)
    qa_local_path = os.path.join(year_folder, qa_file_name)
    lst_bucket_path = f'gs://{bucket_name}/{collection}/{year}/{lst_file_name}'
    qa_bucket_path = f'gs://{bucket_name}/{collection}/{year}/{qa_file_name}'
    
    image_dt = datetime.fromisoformat(item['properties']['datetime'])
    image_id = f'{item["properties"]["mgrs_tile"]}_{image_dt.strftime("%Y%m%d")}'
    asset_id = f'projects/{project_id}/assets/{collection}/{image_id}'
    print(asset_id)


    # Get the properties dictionary from the item
    properties = item['properties']
    properties['file_name'] = file_name
    properties['date_ingested'] = f'{datetime.today().strftime("%Y-%m-%d")}'

    # Converting properties to string
    # This is needed especially for the nested dictionary properties
    # TODO: Check if JSON dump would be better for this
    str_properties = [
        'processing:time_of_day_range', 'processing:nrt', 'processing:overwrite_outputs',
        'processing:public', 'processing:sr_only', 'processing:test_mode',
        'processing:qa_screen_opts', 'processing:starfm_opts', 
        'processing:pydms_common_opts', 'processing:pydms_dt_opts', 
        'processing:starfm_opts', 'hydrosat:fusion_inputs',
    ]
    for p in str_properties:
        if p in properties.keys():
            properties[p] = str(properties[p])
    
    # TODO: Check for a cleaner way to rename properties in a dictionary
    del_properties = [
        'processing:software', 'processing:pydms_nn_opts', 'processing:pydms_sknn_opts', 'processing:lineage', 
    ]
    new_properties = {}
    for k, v in properties.items():
        if ((":" in k) or ('-' in k)) and (k not in del_properties):
            new_properties[k.replace(':', '_').replace('-', '_')] = v
            del_properties.append(k)
    for k in del_properties:
        del properties[k] 
    properties.update(new_properties)
    #pprint.pprint(properties)
    #input('ENTER')


    # Download the image locally
    if overwrite_flag or (lst_file_name not in file_list):
        print('  Downloading LST image from API')
        if not os.path.isdir(year_folder):
            os.makedirs(year_folder)
            
        with requests.get(lst_image_url, stream=True) as result:
            result.raise_for_status()
            with open(lst_local_path, 'wb') as f:
                for chunk in result.iter_content(chunk_size=10000000):
                    f.write(chunk)
                    
    if overwrite_flag or (qa_file_name not in file_list):
        print('  Downloading QA image from API')
        with requests.get(qa_image_url, stream=True) as result:
            result.raise_for_status()
            with open(qa_local_path, 'wb') as f:
                for chunk in result.iter_content(chunk_size=10000000):
                    f.write(chunk)

    if ee.data.getInfo(asset_id):
        if overwrite_flag:
            print(f'  Asset already exists, removing')
            try:
                ee.data.deleteAsset(asset_id)
            except Exception as e:
                logging.exception(f'unhandled exception: {e}')
                continue
        else:
            print(f'  Asset already exists and overwrite is False')
            continue

    
    # Upload the image to the bucket
    print('  Uploading to bucket')
    bucket = storage_client.bucket(bucket_name)
    lst_blob = bucket.blob(lst_bucket_path.replace(f'gs://{bucket_name}/', ''))
    lst_blob.upload_from_filename(lst_local_path, timeout=120)
    qa_blob = bucket.blob(qa_bucket_path.replace(f'gs://{bucket_name}/', ''))
    qa_blob.upload_from_filename(qa_local_path, timeout=120)

    
    # Ingest into Earth Engine
    print('  Ingesting into Earth Engine')
    params = {
        'name': asset_id,
        'bands': [
            {'id': lst_band_name, 'tilesetId': 'lst_image', 'tilesetBandIndex': 0},
            {'id': qa_band_name, 'tilesetId': 'qa_image', 'tilesetBandIndex': 0},
        ],
        'tilesets': [
            {'id': 'lst_image', 'sources': [{'uris': [lst_bucket_path]}]},
            {'id': 'qa_image', 'sources': [{'uris': [qa_bucket_path]}]},
        ],
        'properties': properties,
        'startTime': image_dt.isoformat(),
        # 'startTime': image_dt.isoformat() + '.000000000Z',
        # 'pyramiding_policy': 'MEAN',
        # 'missingData': {'values': [nodata_value]},
    }
    task_id = ee.data.newTaskId()[0]
    ee.data.startIngestion(task_id, params, allow_overwrite=True)

    # break

print('Done')
