In [1]:
#papermill_description=imports

"""
TODO: review imported packages and remove those not being used.
"""

import json
import os
import geopandas as gpd
from io import StringIO

from geodata_fetch import harvest
from gis_utils.dataframe import get_bbox_from_geodf
from gis_utils.stac import save_metadata_sidecar

import rasterio.plot
import logging
import sys

# remove these once git_utile.vis working:
import numpy as np
import rasterio
from rio_cogeo.cogeo import cog_translate
from rio_cogeo.profiles import cog_profiles
from rasterio.io import MemoryFile
from matplotlib import cm
from matplotlib.colors import Normalize
from rasterio.plot import reshape_as_raster
from rasterio.warp import calculate_default_transform


# Configure logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(__name__)

# Set environment variable for AWS public datasets
os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'

In [2]:
#papermill_description=functions_for_cog_and_colour

def colour_geotiff_and_save_cog(input_geotiff, colour_map):
    
    output_colored_tiff_filename = input_geotiff.replace('.tif', '_colored.tiff')
    output_cog_filename = input_geotiff.replace('.tif', '_cog.public.tiff')
    
    with rasterio.open(input_geotiff) as src:
        meta = src.meta.copy()
        dst_crs = rasterio.crs.CRS.from_epsg(4326) #change so not hardcoded?
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds
        )

        meta.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        tif_data = src.read(1, masked=True).astype('float32') #setting masked=True here tells rasterio to use masking information if present, but we need to add the mask itself first.
        tif_formatted = tif_data.filled(np.nan)

        cmap = cm.get_cmap(colour_map) #can also use 'terrain' cmap to keep this the same as the preview image from above.
        na = tif_formatted[~np.isnan(tif_formatted)]

        min_value = min(na)
        max_value = max(na)

        norm = Normalize(vmin=min_value, vmax=max_value)

        coloured_data = (cmap(norm(tif_formatted))[:, :, :3] * 255).astype(np.uint8)

        meta.update({"count":3})


        with rasterio.open(output_colored_tiff_filename, 'w', **meta) as dst:
            reshape = reshape_as_raster(coloured_data)
            dst.write(reshape)

    try:
        dst_profile = cog_profiles.get('deflate')
        with MemoryFile() as mem_dst:
            cog_translate(
                output_colored_tiff_filename,
                output_cog_filename,
                config=dst_profile,
                in_memory=True,
                dtype="uint8",
                add_mask=False,
                nodata=0,
                dst_kwargs=dst_profile
            )
        return output_cog_filename
        
    except:
        raise Exception('Unable to convert to cog')

In [3]:
#papermill_description=parameters

notebook_key = "localjupyter"
geojson = {
    'body': {
        "type": "FeatureCollection",
        "name": "dissolved-boundaries",
        "crs": {
            "type": "name",
            "properties": {
                "name": "urn:ogc:def:crs:OGC:1.3:CRS84" 
            }
        },
        "features": [
            {
                "type": "Feature",
                "properties": {
                    "fid": 1
                },
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [
                        [
                            [116.26012130269045, -29.225295369642396],
                            [116.261724812149055, -29.241374854584375],
                            [116.283751968396274, -29.256813692452539],
                            [116.284342735038919, -29.268250184258388],
                            [116.292247755352392, -29.265992437426529],
                            [116.292360282331941, -29.293057573630019],
                            [116.314865678242256, -29.293523728033122],
                            [116.326259034921833, -29.293033039128805],
                            [116.326315298411629, -29.305397680579894],
                            [116.355065941687045, -29.307016748931797],
                            [116.355065941687045, -29.306575187382712],
                            [116.383366477044206, -29.307384715430175],
                            [116.384322956370426, -29.290407813444993],
                            [116.387586238777402, -29.282629879611861],
                            [116.386517232471661, -29.259807919053017],
                            [116.359201308185533, -29.259488866292969],
                            [116.359229439930417, -29.259243440415627],
                            [116.35242155766754, -29.259292525638209],
                            [116.352140240218716, -29.220237788279107],
                            [116.302234524787593, -29.223503148505326],
                            [116.281388901825679, -29.2239696200396],
                            [116.26012130269045, -29.225295369642396]
                        ]
                    ]
                }
            }
        ]
    }
}
propertyName = "test"
output_type = "overlay"
colormap = "viridis"


In [4]:
#papermill_description=process_variables

# Construct the filenames using propertyName
# name_property-name_attribute.extension


output_tiff_directory = f"/tmp/{notebook_key}"

In [5]:
#papermill_description=processing_file_io

req = geojson
geojson_data = req['body']  # Directly accessing the 'body' since it's already a dictionary in this mock setup

# Convert the GeoJSON string to a GeoDataFrame
gdf = gpd.read_file(StringIO(json.dumps(geojson_data)))

In [6]:
#papermill_description=processing_bounding_box

geom = gdf.geometry #for data-harvester clip function

# Get bounding box from GeoJSON
bbox = get_bbox_from_geodf(geojson_data)

gdf_lon = gdf.centroid.x[0] #approximate centre latitude based on the input geojson - point roughly in the middle
gdf_lat = gdf.centroid.y[0]


  gdf_lon = gdf.centroid.x[0] #approximate centre latitude based on the input geojson - point roughly in the middle

  gdf_lat = gdf.centroid.y[0]


In [7]:
#papermill_description=processing_input_harvest_params

"""
TODO: These are also parametrs, so I think this cell needs papermill parameter tags? (also needs to be rewritten to be concise)
"""

data_mask=True
add_buffer = False
resample=False

# Resolution of data download in arcseconds (1 arcsec ~ 30m)
target_res = 1
target_proj = "EPSG:3857" #find way to set this once at top of notebook and  have it feed through to everything else

date_start = "2022-10-01" #these aren't needed for SLGA but are expected in the package and throw error if excluded - need to fix
date_end = "2022-11-30"
time_intervals = 0 #same issue as above, need to fix in geodata package to be optional.

# Only including a single SLGA - Soil ORganic Carbon for surface soil (0-5cm) for now.
target_sources = {"SLGA":{
                      "Organic_Carbon": ["0-5cm"]
                  }}

json_data = {
    "property_name":propertyName,
    "outpath": output_tiff_directory,
    "data_mask": data_mask,
    "target_res": str(target_res),
    "target_crs": target_proj,
    "date_start": date_start if date_start is not None else "2022-10-01", #a date of some kind must be provided or the harvester complains
    "date_end": date_end if date_end is not None else "2022-11-30",
    "target_centroid_lat": gdf_lat,
    "target_centroid_lng": gdf_lon,
    "time_intervals": time_intervals,
    "target_sources": target_sources,
    "target_bbox": bbox,
    "add_buffer": add_buffer,
    "resample": resample
}

# Converting dictionary to JSON formatted string
data = json.dumps(json_data)
#print(data)

# Create a file-like object from JSON string
json_file_like = StringIO(data)

In [8]:
#papermill_description=download_slga_data

df = harvest.run(json_file_like, geom)

INFO:geodata_fetch.harvest:Starting the data harvester
INFO:geodata_fetch.harvest:Requested the following 1 sources: ['SLGA']
INFO:geodata_fetch.harvest:Begin fetching SLGA data.
ERROR:geodata_fetch.getdata_slga:Error loading slga_soil.json: [Errno 2] No such file or directory: '/var/lang/lib/python3.10/site-packages/config/slga_soil.json'
ERROR:geodata_fetch.getdata_slga:Failed to get SLGA layers: 'NoneType' object is not subscriptable
INFO:geodata_fetch.harvest:SLGA data downloaded successfully: None
INFO:geodata_fetch.harvest:Mask is true, applying to geotifs.
INFO:geodata_fetch.harvest:files to mask: []


In [9]:
#papermill_description=processing_metadata

#df.plot() #check data download by visualising one of the downloaded files

"""
TODO: write function that gets zonal statistics for the raster layers e.g. max and min soil organic carbon
"""

"""
TODO: write out metadata needed for staging/production to a sidecar file
"""

# asset_type signifies the type of asset, e.g. overlay that is stored in the application DB
# If I don't include any asset_metadata I get an error, so for now its an empty variable.
asset_metadata = {
    'properties': {
        'output_type': output_type,
    }}

In [10]:
#papermill_description=processing_cog

"""
Function to handle colouring and cogs now in git_utils.visualisation.
"""

for tiff in os.listdir(output_tiff_directory):
    if tiff.endswith('_masked.tif'):  
        print(f"reading in geotiff: {tiff}")
        
        output_tiff_filename = os.path.join(output_tiff_directory, tiff)
        cog_filename = colour_geotiff_and_save_cog(output_tiff_filename, colormap)
        print(cog_filename)
    else:
        pass

In [None]:
# Apply additional characteristics to notebook artefacts

asset_metadata['properties']['overlayType'] = 'SLGA'

save_metadata_sidecar(cog_filename, asset_metadata)