In [None]:
import numpy as np
import xarray as xr
import geopandas as gpd
import pandas as pd
from matplotlib import pyplot as plt
from osgeo import gdal, ogr, gdal_array
import dask
import datacube 
from datacube.helpers import ga_pq_fuser
from datacube.storage import masking
from datacube.utils import geometry
import os

#import custom functions
import sys
sys.path.append('src')
from reproject_image_to_master import reproject_image_to_master
import DEAPlotting, SpatialTools, BandIndices, DEADataHandling
from load_data import load_data
from transform_tuple import transform_tuple
from query_from_shp import query_from_shp
from load_masked_FC import load_masked_FC

from rsgislib.segmentation import segutils
from rasterstats import zonal_stats
from imageSeg import imageSeg
import fiona
import rasterio.features
from osgeo import gdal

import warnings
warnings.filterwarnings('ignore')

### user inputs

In [None]:
# where is your data and results folder?
data = 'data/'
results = 'results/'

#do I need to load in new data from the datacube
#or have you already saved it previously?
load_fresh_data = True

sensors = ['ls8']

#are we using a polygon to mask the AOI?
polygon_mask = False
shp_fpath = 'data/spatial/wagga_paddockDrill_AOI_epsg3577.shp'

#If not using a polygon then enter your AOI coords
#below:
lat, lon = -35.1, 147.1
latLon_adjust = 0.15

#Input your area of interest's name, coords, and 
#the year you're interested in?
AOI = 'WaggaWagga'
year= '2018'
time_period = ('2018-01-01', '2018-12-31')

#-----------------------------------------

In [None]:
#Creating a folder to keep things neat
directory = results + AOI + "_" + year
if not os.path.exists(directory):
    os.mkdir(directory)

results = results + AOI + "_" + year + "/"

### get data from one year for image seg

In [None]:
if load_fresh_data == True:
    if polygon_mask == True:
        #set up query
        query = query_from_shp(shp_fpath, time_period[0], time_period[1], dask_chunks = 0)
        #landsat
        
        landsat = load_data(dc_name = 'irrigated_areas', sensors=sensors,
                  export_name = data + AOI + "_" + year + '.nc', query=query)
        #wofs
        dc = datacube.Datacube(app='wofs')
        del query['time'] 
        wofs_alltime = dc.load(product = 'wofs_summary', **query)
        
        #masking the returned array to the polygon area
        with fiona.open(shp_fpath) as shapes:
                crs = geometry.CRS(shapes.crs_wkt)
                first_geometry = next(iter(shapes))['geometry']
                geom = geometry.Geometry(first_geometry, crs=crs)

        mask = rasterio.features.geometry_mask([geom.to_crs(landsat.geobox.crs) for geoms in [geom]],
                                                   out_shape=landsat.geobox.shape,
                                                   transform=landsat.geobox.affine,
                                                   all_touched=False,
                                                   invert=True)
        # Mask the xarrays
        landsat = landsat.where(mask)
        #wofs_alltime = wofs_alltime.where(mask)
        #datacube.storage.storage.write_dataset_to_netcdf(landsat, results + AOI + "_" + year + '.nc')
    else:
        # Set up query
        query = {'lon': (lon - latLon_adjust, lon + latLon_adjust),
                 'lat': (lat - latLon_adjust, lat + latLon_adjust),
                 'time': time_period}
#         query['dask_chunks']= {'x': 500, 'y': 500}

        #landsat
        dc = datacube.Datacube(app='fc')
#         data = DEADataHandling.load_clearlandsat(dc, query,product='fc', ls7_slc_off =True, masked_prop=0.90)
        data = DEADataHandling.load_clearlandsat(dc, query, ls7_slc_off =True, masked_prop=0.70)
        
#         landsat = load_data(dc_name = 'irrigated_areas', sensors=sensors,
#                   export_name = data + AOI + "_" + year + '.nc', query=query)
        #wofs
#         dc = datacube.Datacube(app='wofs')
#         del query['time'] 
#         wofs_alltime = dc.load(product = 'wofs_summary', **query)
        
else:
    #load in data from saved netcdf file
    landsat = xr.open_dataset("data/wagga_Summer2017-18.nc")
    
    #landsat = xr.open_dataset('data/' + AOI +  "_" + year + '.nc')
    #load wofs for masking
    query_wofs = {'lon': (lon - latLon_adjust, lon + latLon_adjust),
                 'lat': (lat - latLon_adjust, lat + latLon_adjust)} 
    dc = datacube.Datacube(app='wofs')
    wofs_alltime = dc.load(product = 'wofs_summary', **query_wofs)

### If using FC and quickshift for image seg

In [None]:
data = data.drop(['UE', 'data_perc'])
BS = data.BS[0].values
NPV = data.NPV[0].values
PV = data.PV[0].values

In [None]:
img = np.stack((PV,NPV,BS), axis=-1)

In [None]:
from skimage.segmentation import quickshift
segments_quickshift = quickshift(img, kernel_size=11, convert2lab=True, max_dist=500, ratio=0.5)
print("Quickshift number of segments: %d" % len(np.unique(segments_quickshift)))

In [None]:
# plt.subplots(1,1, figsize=(20,20))
# plt.imshow(segments_quickshift)

In [None]:
#if importing from earlier work
# segments_quickshift = xr.open_rasterio(results + "/quickshift_test/" + AOI + "_" + year + "quickshift_segs.tif")
# segments_quickshift = segments_quickshift.drop('band').squeeze()
# segments_quickshift = segments_quickshift.astype(np.uint64)


In [None]:
#export Gtiff for use in Image segmentation
transform, projection = transform_tuple(fc_perc, (fc_perc.x, fc_perc.y), epsg=3577)

SpatialTools.array_to_geotiff(results + "/quickshift_test/" + AOI + "_" + year + "quickshift_segs_FCMedian.tif",
              segments_quickshift, geo_transform = transform, 
              projection = projection, nodata_val=np.nan)

In [None]:
SegmentedTiffFile = results + "/quickshift_test/" + AOI + "_" + year + "quickshift_segs_FCMedian.tif"
SegmentedPolygons = results + "/quickshift_test/" + AOI + "_" + year + "quickshift_segs_FCMedian.shp"
os.system('gdal_polygonize.py ' + SegmentedTiffFile + ' -f' + ' ' + '"ESRI Shapefile"' + ' ' + SegmentedPolygons)

In [None]:
gdf = gpd.read_file(SegmentedPolygons)
#calculate area of polygons
gdf['area'] = gdf['geometry'].area 
#filter by area and mean NDVI
smallArea = gdf['area'] >= 50000 # area greater than 5 hectares
gdf = gdf[smallArea]
# export shapefile
# gdf.to_file(results + "/quickshift_test/" + AOI + "_" + year + "quickshift_segs_FCMedian_filtered5Ha.shp")

gdf.plot(figsize=(20,20), linewidth=0.5, alpha=0.7, edgecolor='black')

### calculate band indices and stats

In [None]:
#band indices calculation
def ndvi_func(nir, red):
    return ((nir - red)/(nir + red))

def ndvi_ufunc(ds):
    return xr.apply_ufunc(
        ndvi_func, ds.nir, ds.red,
        dask='parallelized',
        output_dtypes=[float])

NDVI_landsat = ndvi_ufunc(landsat).compute()

In [None]:
#calculate per pixel summary stats
NDVI_max = NDVI_landsat.max('time').rename('NDVI_max').compute()
NDVI_95 = NDVI_landsat.quantile(dim='time', q=[0.95], keep_attrs=True).rename('95%_ndvi')
NDVI_95 = NDVI_95.squeeze()
NDVI_95 = NDVI_95.drop('quantile')

### image segmentation if using RSGISlib on MaxNDVI

In [None]:
from skimage.segmentation import quickshift
segments_quickshift = quickshift(NDVI_max, kernel_size=11, convert2lab=True, max_dist=500, ratio=0.5)

In [None]:
#export Gtiff for use in Image segmentation
transform, projection = transform_tuple(NDVI_max, (NDVI_max.x, NDVI_max.y), epsg=3577)

SpatialTools.array_to_geotiff(results + AOI + "_" + year + "ndvi_max.tif",
              NDVI_max.values, geo_transform = transform, 
              projection = projection, nodata_val=np.nan)

SpatialTools.array_to_geotiff(results + AOI + "_" + year + "ndvi_95.tif",
              NDVI_95.values, geo_transform = transform, 
              projection = projection, nodata_val=np.nan)

In [None]:
# setup input filenames
InputNDVIStats = results + AOI + "_" + year + "ndvi_95.tif"
KEAFile = results + AOI + '_' + year + '.kea'
SegmentedKEAFile = results + AOI + '_' + year + '_sheperdSEG.kea'
SegmentedTiffFile = results + AOI + '_' + year + '_sheperdSEG.tif'
SegmentedPolygons = results + AOI + '_' + year + '_SEGpolygons.shp'
imageSeg(InputNDVIStats, KEAFile, SegmentedKEAFile, SegmentedTiffFile, SegmentedPolygons, minPxls = 200)

### Use RF classified map to mask the imag seg file

In [None]:
#grab the map
classified_map = xr.open_rasterio("/g/data1a/r78/cb3058/dea-notebooks/ICE_project/results/Murrum_randomForest_Winter2013/Murrum_randomForest_Winter2013classpredict_handtrain.tif")
classified_map = classified_map.drop('band').squeeze()

#get the areas that are just cultivated
cultivated = classified_map.values
cultivated = np.where((cultivated == 330) | (cultivated == 430), 1, 0) #using numpy where because it alters the values
cultivated = xr.DataArray(cultivated, coords = [classified_map.y, classified_map.x], dims = ['y', 'x'], name='cultivated areas')
cultivated = cultivated.astype(bool)

In [None]:
#clip the cultivated areas map to the aoi
transform, projection = transform_tuple(cultivated, (cultivated.x, cultivated.y), epsg=3577)
width,height = cultivated.shape

new_shp = 'data/spatial/wagga_paddockDrill_AOI_epsg3577.shp'
aoi_raster = SpatialTools.rasterize_vector(new_shp, height, width, transform, projection, raster_path=None)
aoi_raster = aoi_raster.astype(bool)
aoi_raster = xr.DataArray(aoi_raster, coords = [cultivated.y, cultivated.x], dims = ['y', 'x'], name='aoi_raster')

cultivated = cultivated.where(aoi_raster, drop=True)
cultivated = cultivated.fillna(0).astype(bool)
cultivated.attrs = classified_map.attrs

In [None]:
imageSeg_raster = xr.open_rasterio(results + "/quickshift_test/" + AOI + "_" + year + "quickshift_segs.tif")
imageSeg_raster = imageSeg_raster.drop('band').squeeze()
imageSeg_raster = imageSeg_raster.astype(np.uint64)

#### fixing extent of cultivated area...weird

In [None]:
# writing out cultivated field
transform, projection = transform_tuple(cultivated, (cultivated.x, cultivated.y), epsg=3577)
SpatialTools.array_to_geotiff(results + AOI + "_" + year + "cultivated.tif",
              cultivated.values, geo_transform = transform, 
              projection = projection, nodata_val=np.nan)
reproject_image_to_master('results/WaggaWagga_2018/ndvi95_imageSeg/WaggaWagga_2018_sheperdSEG.tif', 'results/WaggaWagga_2018/WaggaWagga_2018cultivated.tif', res=None)
cultivated = xr.open_rasterio('results/WaggaWagga_2018/WaggaWagga_2018cultivated_crop.tif')
cultivated = cultivated.drop('band').squeeze()

In [None]:
imageSeg_cultivated = imageSeg_raster.where(cultivated)


### Polygonize imageSeg culitvated tif

Then we'll add a unique ID

In [None]:
transform, projection = transform_tuple(imageSeg_cultivated, (imageSeg_cultivated.x, imageSeg_cultivated.y), epsg=3577)
SpatialTools.array_to_geotiff(results + AOI + "_" + year + "imageSeg_cultivated.tif",
              imageSeg_cultivated.values, geo_transform = transform, 
              projection = projection, nodata_val=np.nan)

imageSeg_cultivated_tif = results + AOI + "_" + year + "imageSeg_cultivated.tif"
imagSeg_cultivated_polygons = results + AOI + '_' + year + '_imageSeg_cultivated_polygons.shp'
os.system('gdal_polygonize.py ' + imageSeg_cultivated_tif + ' -f' + ' ' + '"ESRI Shapefile"' + ' ' + imagSeg_cultivated_polygons)

In [None]:
# gdf = gpd.read_file(imagSeg_cultivated_polygons)
# #calculate area of polygons
# gdf['area'] = gdf['geometry'].area 
# #filter by area and mean NDVI
# smallArea = gdf['area'] >= 50000 # area greater than 5 hectares
# gdf = gdf[smallArea]
# export shapefile
# gdf.to_file(results + AOI + "_" + year + "quickshift_segs_filtered5Ha.shp")
gdf.plot(linewidth=0.5, alpha=0.7, edgecolor='black',  figsize=(20,20))

In [None]:
seg_poly = gpd.read_file('results/WaggaWagga_2018/WaggaWagga_2018_imageSeg_cultivated_polygons.shp')
seg_poly['id'] = list(range(1,(len(seg_poly.DN)+1))) #give each segment a unique ID
seg_poly.to_file(results + 'WaggaWagga_2018_imageSeg_cultivated_polygons_withID.shp')

### Paddock Drill

In [None]:
#some user inputs
results = "results/"
AOI = 'WaggaWagga'
year= '2018'

results = results + AOI + "_" + year + "/"

time_period = ('1990-01-01', '2019-01-01')

shp_path = results + 'WaggaWagga_2018_imageSeg_cultivated_polygons_withID.shp'

dc = datacube.Datacube(app='fc_fun')

with fiona.open(shp_path) as input:
    crs = geometry.CRS(input.crs_wkt)
    
def paddockDrill(feat, crs):
    first_geom = feat['geometry']
    poly_id = feat['properties']['id']
    progress = round((poly_id/8412) * 100, 4)
    print("\r", "working on polygon: " + str(poly_id) + ", " + str(progress) + "%" + " complete. ", end = '')
    geom = geometry.Geometry(first_geom, crs=crs)

    query = {'geopolygon': geom,
             'time': time_period}

    data = DEADataHandling.load_clearlandsat(dc, query,product='fc', ls7_slc_off =True, masked_prop=0.90)

    mask = rasterio.features.geometry_mask([geom.to_crs(data.geobox.crs)for geoms in [geom]],
                                       out_shape=data.geobox.shape,
                                       transform=data.geobox.affine,
                                       all_touched=False,
                                       invert=False)

    mask_xr = xr.DataArray(mask, dims = ('y','x'))
    fc = data.where(mask_xr==False)

    PV_mean = fc.PV.mean(dim=['x', 'y']).values
    PV_std = fc.PV.std(dim=['x', 'y']).values
    NPV_mean = fc.NPV.mean(dim=['x', 'y']).values
    BS_mean = fc.BS.mean(dim=['x', 'y']).values        
    time = fc.time.values

    #export csv
    statistics_df = pd.DataFrame({'dateTime': time, 
                                  'Mean PV (%)': PV_mean, 
                                  'Std. Dev. PV (%)':PV_std,
                                  'Mean NPV (%)': NPV_mean,
                                  'Mean BS (%)': BS_mean
                                 })
    statistics_df.to_csv(results + "paddock_zonal/" + str(poly_id) + ".csv")

p = multiprocessing.Pool()
for feat in fiona.open(shp_path):
    p.apply_async(paddock_drill, [feat, crs])  
        
print("finished")