In [122]:
#papermill_description=imports

import json
import os
import geopandas as gpd
from io import StringIO

from geodata_fetch import harvest
from gis_utils.dataframe import get_bbox_from_geodf
from gis_utils.stac import save_metadata_sidecar
from gis_utils.visualisation import get_geotiff_statistics, colour_geotiff_and_save_cog
from gis_utils.colormap import get_colormap, display_colormap_as_html

import rasterio.plot
import logging
import sys

# remove these once git_utile.vis working:
import numpy as np
import rasterio
from rio_cogeo.cogeo import cog_translate
from rio_cogeo.profiles import cog_profiles
from rasterio.io import MemoryFile
from matplotlib import cm
from matplotlib.colors import Normalize
from rasterio.plot import reshape_as_raster
from rasterio.warp import calculate_default_transform


# Configure logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(__name__)

# Set environment variable for AWS public datasets
os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'

In [123]:
#papermill_description= slga_metadata

slga_json = {
  "slga_layers": {
    "Organic_Carbon": {
      "layer_name": "Organic_Carbon",
      "title": "Organic Carbon",
      "unit": "%"
      },
    "Bulk_Density": {
      "layer_name": "Bulk_Density",
      "title": "Bulk Density (whole earth)",
      "unit": "g/cm3"
      },
    "Clay": {
      "layer_name": "Clay",
      "title": "Clay",
      "unit": "%"
       },
    "Sand": {
      "layer_name": "Sand",
      "title": "Sand",
      "unit": "%"
      },
    "Silt": {
      "layer_name": "Silt",
      "title": "Silt",
      "unit": "%"
       },
    "pH_CaCl2": {
      "layer_name": "pH_CaCl2",
      "title": "pH (CaCl2)",
      "unit": "pH"
       },
    "Available_Water_Capacity": {
      "layer_name": "Available_Water_Capacity",
      "title": "Available water capacity",
      "unit": "%"
       },
    "Total_Nitrogen": {
      "layer_name": "Total_Nitrogen",
      "title": "Total nitrogen",
      "unit": "%"
      },
    "Total_Phosphorus": {
      "layer_name": "Total_Phosphorus",
      "title": "Total phosphorus",
      "unit": "%"
       },
    "Effective_Cation_Exchange_Capacity": {
      "layer_name": "Effective_Cation_Exchange_Capacity",
      "title": "Effective cation exchange capacity",
      "unit": "meg/100g"
       },
    "Depth_of_Regolith": {
      "layer_name": "Depth_of_Regolith",
      "title": "Depth of regolith",
      "unit": "m"
       }
  },
  "layer_depth":["0-5cm", "5-15cm", "15-30cm", "30-60cm", "60-100cm"]
}

In [124]:
#papermill_description=parameters

notebook_key = "localjupyter"
geojson = {
    'body': {
        "type": "FeatureCollection",
        "name": "dissolved-boundaries",
        "crs": {
            "type": "name",
            "properties": {
                "name": "urn:ogc:def:crs:OGC:1.3:CRS84" 
            }
        },
        "features": [
            {
                "type": "Feature",
                "properties": {
                    "fid": 1
                },
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [
                        [
                            [116.26012130269045, -29.225295369642396],
                            [116.261724812149055, -29.241374854584375],
                            [116.283751968396274, -29.256813692452539],
                            [116.284342735038919, -29.268250184258388],
                            [116.292247755352392, -29.265992437426529],
                            [116.292360282331941, -29.293057573630019],
                            [116.314865678242256, -29.293523728033122],
                            [116.326259034921833, -29.293033039128805],
                            [116.326315298411629, -29.305397680579894],
                            [116.355065941687045, -29.307016748931797],
                            [116.355065941687045, -29.306575187382712],
                            [116.383366477044206, -29.307384715430175],
                            [116.384322956370426, -29.290407813444993],
                            [116.387586238777402, -29.282629879611861],
                            [116.386517232471661, -29.259807919053017],
                            [116.359201308185533, -29.259488866292969],
                            [116.359229439930417, -29.259243440415627],
                            [116.35242155766754, -29.259292525638209],
                            [116.352140240218716, -29.220237788279107],
                            [116.302234524787593, -29.223503148505326],
                            [116.281388901825679, -29.2239696200396],
                            [116.26012130269045, -29.225295369642396]
                        ]
                    ]
                }
            }
        ]
    }
}
propertyName = "test"
output_type = "overlay"
colormap = "viridis"

# new parameters to enable multiple SLGA attributes to be fetched
slga_layer = "Organic_Carbon"
slga_layer_depth = "0-5cm"



In [125]:
#papermill_description=process_variables

# Construct the filenames using propertyName
# name_property-name_attribute.extension

output_tiff_directory = f"/tmp/{notebook_key}"

#initialise empty dictionary for metadata
asset_metadata={}

In [126]:
#papermill_description=processing_file_io

req = geojson
geojson_data = req['body']  # Directly accessing the 'body' since it's already a dictionary in this mock setup

# Convert the GeoJSON string to a GeoDataFrame
gdf = gpd.read_file(StringIO(json.dumps(geojson_data)))

In [127]:
#papermill_description=processing_bounding_box

geom = gdf.geometry #for data-harvester clip function

# Get bounding box from GeoJSON
bbox = get_bbox_from_geodf(geojson_data)

gdf_lon = gdf.centroid.x[0] #approximate centre latitude based on the input geojson - point roughly in the middle
gdf_lat = gdf.centroid.y[0]


  gdf_lon = gdf.centroid.x[0] #approximate centre latitude based on the input geojson - point roughly in the middle

  gdf_lat = gdf.centroid.y[0]


In [128]:
#papermill_description=processing_input_harvest_params

data_mask=True
add_buffer = False
resample=False

# Resolution of data download in arcseconds (1 arcsec ~ 30m)
target_res = 1
target_proj = "EPSG:3857" #find way to set this once at top of notebook and  have it feed through to everything else

date_start = "2022-10-01" #these aren't needed for SLGA but are expected in the package and throw error if excluded - need to fix
date_end = "2022-11-30"
time_intervals = 0 #same issue as above, need to fix in geodata package to be optional.

# layer depth should be in list format to avoid errors and allow for multiple depths later on.
target_sources = {"SLGA":{
                      slga_layer: [slga_layer_depth]
                  }}

json_data = {
    "property_name":propertyName,
    "outpath": output_tiff_directory,
    "data_mask": data_mask,
    "target_res": str(target_res),
    "target_crs": target_proj,
    "date_start": date_start if date_start is not None else "2022-10-01", #a date of some kind must be provided or the harvester complains
    "date_end": date_end if date_end is not None else "2022-11-30",
    "target_centroid_lat": gdf_lat,
    "target_centroid_lng": gdf_lon,
    "time_intervals": time_intervals,
    "target_sources": target_sources,
    "target_bbox": bbox,
    "add_buffer": add_buffer,
    "resample": resample
}

# Converting dictionary to JSON formatted string
data = json.dumps(json_data)
#print(data)

# Create a file-like object from JSON string
json_file_like = StringIO(data)

In [129]:
#papermill_description=download_slga_data

df = harvest.run(json_file_like, geom)

INFO:geodata_fetch.harvest:Starting the data harvester
INFO:geodata_fetch.harvest:Requested the following 1 sources: ['SLGA']
INFO:geodata_fetch.harvest:Begin fetching SLGA data.
INFO:geodata_fetch.getdata_slga:Downloaded SLGA_Organic_Carbon_0-5cm_test.tif
INFO:geodata_fetch.harvest:SLGA data downloaded successfully: ['/tmp/localjupyter/SLGA_Organic_Carbon_0-5cm_test.tif']
INFO:geodata_fetch.harvest:Mask is true, applying to geotifs.
INFO:geodata_fetch.harvest:files to mask: ['SLGA_Organic_Carbon_0-5cm_test.tif']


In [130]:
#papermill_description=slga_legend_metadata


if slga_layer in slga_json['slga_layers']:
    #print(slga_layer)
    slga_attribute_name = slga_json['slga_layers'][slga_layer]['title']
    unit = slga_json['slga_layers'][slga_layer]['unit']
    #print(f"Name of {slga_layer} = {slga_attribute_name}")
    #print(f"Unit of {slga_layer} = {unit}")

    # add stats to metadata variable
    asset_metadata['slga_meta'] = {
    'attribute_name': slga_attribute_name,
    'unit': unit}
    print(asset_metadata)

print(asset_metadata)

{'slga_meta': {'attribute_name': 'Organic Carbon', 'unit': '%'}}
{'slga_meta': {'attribute_name': 'Organic Carbon', 'unit': '%'}}


In [133]:
#papermill_description=processing_metadata_and_statistics

# asset_type signifies the type of asset, e.g. overlay that is stored in the application DB

"""
This has been put within a loop to ensure the metadata and raster stats are calculated on the correct version of the geotif. Because there is a '.tif' and '_masked.tif', we want to ensure the '_masked.tif' is the one being used.
"""

tiff_filepath = 'SLGA_' + slga_layer + '_' + slga_layer_depth + '_' + propertyName + '_masked.tiff'

asset_metadata = {
'properties': {
    'output_type': output_type,
}}
output_tiff_filename = os.path.join(output_tiff_directory, tiff_filepath)

print(output_tiff_filename)

# get statistics from input geotiff
raster_stats = get_geotiff_statistics(output_tiff_filename)

# add stats to metadata variable
asset_metadata['data'] = {
'raster_stats': raster_stats}

# generate the coloured geotiff and save as a COG
cog_filename = colour_geotiff_and_save_cog(output_tiff_filename, colormap)

# generate the colormap for the legend
colormap_legend = get_colormap(colormap, [raster_stats['min'], raster_stats['mean'],  raster_stats['max']], 21)

# add colormap to metadata
asset_metadata['legend'] = {
    'colormap': {
        'type': 'discrete',
        'colors': colormap_legend
    }
}

asset_metadata['properties']['overlayType'] = 'SLGA'

save_metadata_sidecar(cog_filename, asset_metadata)



/tmp/localjupyter/SLGA_Organic_Carbon_0-5cm_test_masked.tiff
INFO:rasterio._env:GDAL signalled an error: err_no=4, msg='/tmp/localjupyter/SLGA_Organic_Carbon_0-5cm_test_masked.tiff: No such file or directory'


RasterioIOError: /tmp/localjupyter/SLGA_Organic_Carbon_0-5cm_test_masked.tiff: No such file or directory

In [None]:
print(asset_metadata)