In [1]:
import numpy as np
import xarray as xr
import geopandas as gpd
import pandas as pd
from matplotlib import pyplot as plt
from osgeo import gdal, ogr, gdal_array
import dask
import datacube 
from datacube.helpers import ga_pq_fuser
from datacube.storage import masking
from datacube.utils import geometry
import os
#import custom functions
import sys
sys.path.append('src')
import DEAPlotting, SpatialTools, BandIndices, DEADataHandling
from load_data import load_data
from transform_tuple import transform_tuple
from query_from_shp import query_from_shp
from rsgislib.segmentation import segutils
from rasterstats import zonal_stats
from imageSeg import imageSeg
import fiona
import rasterio.features

In [2]:
# where is your data and results folder?
data = 'data/'
results = 'results/'

#do I need to load in new data from the datacube
#or have you already saved it previously?
load_fresh_data = True

sensors = ['ls5','ls7','ls8']

#are we using a polygon to mask the AOI?
polygon_mask = True
shp_fpath = 'data/spatial/wagga_paddockDrill_AOI.shp'

#If not using a polygon then enter your AOI coords
#below:
lat, lon = -34.578728, 146.264338
latLon_adjust = 0.2

#Input your area of interest's name, coords, and 
#the year you're interested in?
AOI = 'WaggaWagga'
year= '2018'
time_period = ('2018-03-01', '2018-10-30')

#-----------------------------------------

In [3]:
#Creating a folder to keep things neat
directory = results + AOI + "_" + year
if not os.path.exists(directory):
    os.mkdir(directory)

results = results + AOI + "_" + year + "/"

In [4]:
if load_fresh_data == True:
    if polygon_mask == True:
        #set up query
        query = query_from_shp(shp_fpath, time_period[0], time_period[1], dask_chunks = 0)
        #landsat
        landsat = load_data(dc_name = 'irrigated_areas', sensors=sensors,
                  export_name = data + AOI + "_" + year + '.nc', query=query)
        #wofs
        dc = datacube.Datacube(app='wofs')
        del query['time'] 
        wofs_alltime = dc.load(product = 'wofs_summary', **query)
        
        #masking the returned array to the polygon area
        with fiona.open(shp_fpath) as shapes:
                crs = geometry.CRS(shapes.crs_wkt)
                first_geometry = next(iter(shapes))['geometry']
                geom = geometry.Geometry(first_geometry, crs=crs)

        mask = rasterio.features.geometry_mask([geom.to_crs(landsat.geobox.crs) for geoms in [geom]],
                                                   out_shape=landsat.geobox.shape,
                                                   transform=landsat.geobox.affine,
                                                   all_touched=False,
                                                   invert=True)
        # Mask the xarrays
        landsat = landsat.where(mask)
        #wofs_alltime = wofs_alltime.where(mask)
        #datacube.storage.storage.write_dataset_to_netcdf(landsat, results + AOI + "_" + year + '.nc')
    else:
        # Set up query
        query = {'lon': (lon - latLon_adjust, lon + latLon_adjust),
                 'lat': (lat - latLon_adjust, lat + latLon_adjust),
                 'time': time_period}
        query['dask_chunks']= {'x': 500, 'y': 500}

        #landsat
        landsat = load_data(dc_name = 'irrigated_areas', sensors=sensors,
                  export_name = data + AOI + "_" + year + '.nc', query=query)
        #wofs
        dc = datacube.Datacube(app='wofs')
        del query['time'] 
        wofs_alltime = dc.load(product = 'wofs_summary', **query)
        
else:
    #load in data from saved netcdf file
    landsat = xr.open_dataset("data/wagga_Summer2017-18.nc")
    
    #landsat = xr.open_dataset('data/' + AOI +  "_" + year + '.nc')
    #load wofs for masking
    query_wofs = {'lon': (lon - latLon_adjust, lon + latLon_adjust),
                 'lat': (lat - latLon_adjust, lat + latLon_adjust)} 
    dc = datacube.Datacube(app='wofs')
    wofs_alltime = dc.load(product = 'wofs_summary', **query_wofs)

ls5_loading...
ls5_loaded


  if not landsat_ds:


ls7_loading...
ls7_loaded


  if not landsat_ds:


ls8_loading...
ls8_loaded


  if not landsat_ds:


In [6]:
#band indices calculation
def ndvi_func(nir, red):
    return ((nir - red)/(nir + red))

def ndvi_ufunc(ds):
    return xr.apply_ufunc(
        ndvi_func, ds.nir, ds.red,
        dask='parallelized',
        output_dtypes=[float])

NDVI_landsat = ndvi_ufunc(landsat).compute()

In [16]:
#calculate per pixel summary stats
# NDVI_max = NDVI_landsat.max('time').rename('NDVI_max').compute()
NDVI_95 = NDVI_landsat.quantile(dim='time', q=[0.95], keep_attrs=True).rename('95%_ndvi')
NDVI_95 = NDVI_95.squeeze()
NDVI_95 = NDVI_95.drop('quantile')

  overwrite_input, interpolation)


In [18]:
#export Gtiff for use in Image segmentation
transform, projection = transform_tuple(NDVI_max, (NDVI_max.x, NDVI_max.y), epsg=3577)

SpatialTools.array_to_geotiff(results + AOI + "_" + year + "ndvi_max.tif",
              NDVI_max.values, geo_transform = transform, 
              projection = projection, nodata_val=np.nan)

SpatialTools.array_to_geotiff(results + AOI + "_" + year + "ndvi_95.tif",
              NDVI_95.values, geo_transform = transform, 
              projection = projection, nodata_val=np.nan)

In [20]:
# setup input filenames
InputNDVIStats = results + AOI + "_" + year + "ndvi_95.tif"
KEAFile = results + AOI + '_' + year + '.kea'
SegmentedKEAFile = results + AOI + '_' + year + '_sheperdSEG.kea'
SegmentedTiffFile = results + AOI + '_' + year + '_sheperdSEG.tif'
SegmentedPolygons = results + AOI + '_' + year + '_SEGpolygons.shp'
imageSeg(InputNDVIStats, KEAFile, SegmentedKEAFile, SegmentedTiffFile, SegmentedPolygons, minPxls = 200)

Stretch Input Image
Add 1 to stretched file to ensure there are no all zeros (i.e., no data) regions created.
Create Input Image Mask.
Mask stretched Image.
Deleting file: ./WaggaWagga_2018_stchdonly.kea
Deleting file: ./WaggaWagga_2018_stchdonlyOff.kea
Deleting file: ./WaggaWagga_2018_stchdmaskonly.kea
Performing KMeans.
Apply KMeans to image.
Eliminate Single Pixels.
Perform clump.
Eliminate small pixels.
Relabel clumps.
Calculate image statistics and build pyramids.
Deleting file: ./WaggaWagga_2018_kmeansclusters.gmtxt
Deleting file: ./WaggaWagga_2018_kmeans.kea
Deleting file: ./WaggaWagga_2018_kmeans.kea.aux.xml
Deleting file: ./WaggaWagga_2018_kmeans_nosgl.kea
Deleting file: ./WaggaWagga_2018_kmeans_nosglTMP.kea
Deleting file: ./WaggaWagga_2018_clumps.kea
Deleting file: ./WaggaWagga_2018_clumps_elim.kea
Deleting file: ./WaggaWagga_2018_stchd.kea


In [None]:
# gdf = gpd.read_file(results + AOI + '_' + year + '_SEGpolygons.shp')
# #calculate zonal mean of NDVI
# gdf['mean'] = pd.DataFrame(zonal_stats(vectors=gdf['geometry'], raster=InputNDVIStats, stats='mean'))['mean']
# #calculate area of polygons
# gdf['area'] = gdf['geometry'].area
# #filter by area and mean NDVI
# highNDVI = gdf['mean'] >= 0.1
# smallArea = gdf['area'] <= 5500000
# gdf = gdf[highNDVI & smallArea]
# #export shapefile
# gdf.to_file(results + AOI + "_" + year + "_Irrigated.shp")

In [None]:
def zonal_timeseries(dataArray, shp_loc, results_loc, feature_name, stat='mean', csv=False, netcdf=False, plot=False):

    """
    Given an xarray dataArray and a shapefile, generates a timeseries of zonal statistics across n number of 
    uniquely labelled polygons. The function exports a .csv of the stats, a netcdf containing the stats, and .pdf plots.
    Requires the installation of the rasterstats module: https://pythonhosted.org/rasterstats/installation.html
    
    Inputs:
    data = xarray dataarray (note dataarray, not dataset - it is a requirement the data only have a single variable).
    shp_loc = string. Location of the shapefile used to extract the zonal timseries.
    results_loc = string. Location of the directory where results should export.
    feature_name = string. Name of attribute column in the shapefile that is of interest - used to label dataframe, plots etc.
    stat = string.  The statistic you want to extract. Options include 'count', 'max', 'median', 'min', 'std', 'mean'.
    plot = Boolean. If True, function will produce pdfs of timeseries for each polygon in the shapefile.
    csv = Boolean. If True, function will export results as a .csv.
    netcdf = Boolean. If True, function will export results as a netcdf.
    
    Last modified: May 2018
    Author: Chad Burton    
    """

    #use dask to chunk the data along the time axis in case its a very large dataset
    dataArray = dataArray.chunk(chunks = {'x':500, 'y':500})
    
    #create 'transform' tuple to provide ndarray with geo-referencing data. 
    one = float(dataArray.x[0])
    two = float(dataArray.y[0] - dataArray.y[1])
    three = 0.0
    four = float(dataArray.y[0])
    five = 0.0
    six = float(dataArray.x[0] - dataArray.x[1])

    transform_zonal = (one, two, three, four, five, six)

    #import shapefile, make sure its in the right projection to match the dataArray
    #and set index to the feature_name
    project_area = gpd.read_file(shp_loc)               #get the shapefile
    reproj=int(str(dataArray.crs)[5:])                  #do a little hack to get EPSG from the dataArray 
    project_area = project_area.to_crs(epsg=reproj)     #reproject shapefile to match dataArray
    project_area = project_area.set_index(feature_name) #set the index
    
    #define the general function
    def zonalStats(dataArray, stat=stat): 
        """extract the zonal statistics of all
        pixel values within each polygon"""
        stats = [] 
        for i in dataArray:
            x = rs.zonal_stats(project_area, i, transform=transform_zonal, stats=stat)    
            stats.append(x)
        #extract just the values from the results, and convert 'None' values to nan
        stats = [[t[stat] if t[stat] is not None else np.nan for t in feature] for feature in stats]
        stats = np.array(stats)
        return stats

    #use the zonal_stats functions to extract the stats:
    n = len(project_area) #number of polygons in the shapefile (defines the dimesions of the output)
    statistics = dataArray.data.map_blocks(zonalStats, chunks=(-1,n), drop_axis=1, dtype=np.float64).compute()

    #get unique identifier and timeseries data from the inputs 
    colnames = pd.Series(project_area.index.values)
    time = pd.Series(dataArray['time'].values)

    #define functions for cleaning up the results of the rasterstats operation
    def tidyresults(results):
        x = pd.DataFrame(results).T #transpose
        x = x.rename(colnames, axis='index') #rename the columns to the timestamp
        x = x.rename(columns = time)
        return x

    #place results into indexed dataframes using tidyresults function
    statistics_df = tidyresults(statistics)
    
    #convert into xarray for merging into a dataset
    stat_xr = xr.DataArray(statistics_df, dims=[feature_name, 'time'], coords={feature_name: statistics_df.index, 'time': time}, name= stat)
    
    #options for exporting results as csv, netcdf, pdf plots
    #export results as a .csv
    if csv:
        statistics_df.to_csv('{0}{1}.csv'.format(results_loc, stat))
                             
    if netcdf:
        #export out results as netcdf
        stat_xr.to_netcdf('{0}zonalstats_{1}.nc'.format(results_loc, stat), mode='w',format='NETCDF4') 

    if plot:     
        #place the data from the xarray into a list
        plot_data = []
        for i in range(0,len(stat_xr[feature_name])):
            x = stat_xr.isel([stat], **{feature_name: i})
            plot_data.append(x)

        #extract the unique names of each polygon
        feature_names = list(stat_xr[feature_name].values)

        #zip the both the data and names together as a dictionary 
        monthly_dict = dict(zip(feature_names,plot_data))

        #create a function for generating the plots
        def plotResults(dataArray, title):
            """a function for plotting up the results of the
            fractional cover change and exporting it out as pdf """
            x = dataArray.time.values
            y = dataArray.data          

            plt.figure(figsize=(15,5))
            plt.plot(x, y,'k', color='#228b22', linewidth = 1)
            plt.grid(True, linestyle ='--')
            plt.title(title)
            plt.savefig('{0}{1}.pdf'.format(results_loc, title), bbox_inches='tight')

        #loop over the dictionaries and create the plots
        {key: plotResults(monthly_dict[key], key + "_"+ stat) for key in monthly_dict} 
    
    #return the results as a dataframe
    return statistics_df