## Testing a time-weighted dynamic time warping algorithm
This scripts generates and exports data for the testing, along with a training dataset

### get some data and export netcdfs

In [1]:
import numpy as np
import xarray as xr
import geopandas as gpd
import pandas as pd
import dask
import datacube 
from datacube.helpers import ga_pq_fuser
from datacube.storage import masking
from datacube.utils import geometry

import fiona
import rasterio.features
from osgeo import gdal, ogr
import os
from rsgislib.segmentation import segutils
from rasterstats import zonal_stats

#import custom functions
import sys
sys.path.append('src')
import DEAPlotting, SpatialTools, BandIndices
from load_data import load_data
from transform_tuple import transform_tuple
from imageSeg import imageSeg
from query_from_shp import query_from_shp

In [5]:
# where is your data and results folder?
data = 'data/'
results = 'results/'

sensors = ['ls7','ls8']

#are we using a polygon to mask the AOI?
polygon_mask = True
shp_fpath = 'data/spatial/PeelR_AOI_test.shp'

#If not using a polygon then enter your AOI coords
#below:
lat, lon = -35.125, 147.55
latLon_adjust = 0.2

#Input your area of interest's name, coords, and 
#the year you're interested in?
AOI = 'Peel_TWDTW_test'
year = 'Summer2018-19'

time_period = ('2013-01-01', '2013-12-31')

#What thresholds should I use?
threshold = 0.8
wofs_theshold = 0.15
#-----------------------------------------

In [6]:
#Creating a folder to keep things neat
directory = results + AOI + "_" + year
if not os.path.exists(directory):
    os.mkdir(directory)

results = results + AOI + "_" + year + "/"

In [7]:
#load landsat data    

if polygon_mask == True:
    #set up query
    query = query_from_shp(shp_fpath, time_period[0], time_period[1], dask_chunks = 0)
    #landsat
    landsat = load_data(dc_name = 'irrigated_areas', sensors=sensors,
              export_name = data + AOI + "_" + year + '.nc', query=query)
    #wofs
    dc = datacube.Datacube(app='wofs')
    del query['time'] 
    wofs_alltime = dc.load(product = 'wofs_summary', **query)

    #masking the returned array to the polygon area
    with fiona.open(shp_fpath) as shapes:
            crs = geometry.CRS(shapes.crs_wkt)
            first_geometry = next(iter(shapes))['geometry']
            geom = geometry.Geometry(first_geometry, crs=crs)

    mask = rasterio.features.geometry_mask([geom.to_crs(landsat.geobox.crs) for geoms in [geom]],
                                               out_shape=landsat.geobox.shape,
                                               transform=landsat.geobox.affine,
                                               all_touched=False,
                                               invert=True)
    # Mask the xarrays
    landsat = landsat.where(mask)
    #wofs_alltime = wofs_alltime.where(mask)
    #datacube.storage.storage.write_dataset_to_netcdf(landsat, results + AOI "_" + year + '.nc')
else:
    # Set up query
    query = {'lon': (lon - latLon_adjust, lon + latLon_adjust),
             'lat': (lat - latLon_adjust, lat + latLon_adjust),
             'time': time_period}
    query['dask_chunks']= {'x': 200, 'y': 200}

    #landsat
    landsat = load_data(dc_name = 'irrigated_areas', sensors=sensors,
              export_name = data + AOI + "_" + year + '.nc', query=query)
    #wofs
    dc = datacube.Datacube(app='wofs')
    del query['time'] 
    wofs_alltime = dc.load(product = 'wofs_summary', **query)

ls7_loading...
ls7_loaded


  if not landsat_ds:


ls8_loading...
ls8_loaded


  if not landsat_ds:


In [9]:
#band indices calculation
def ndvi_func(nir, red):
    return ((nir - red)/(nir + red))

def ndvi_ufunc(ds):
    return xr.apply_ufunc(
        ndvi_func, ds.nir, ds.red,
        dask='parallelized',
        output_dtypes=[float])

NDVI_landsat = ndvi_ufunc(landsat)
NDMI_landsat = BandIndices.calculate_indices(landsat, index='NDMI-nir')

The formula we are using is (nir - swir1)/(nir + swir1)


In [10]:
# expprt band indices for exploration

x = NDVI_landsat.to_dataset(name='NDVI')
y = NDMI_landsat.to_dataset(name='NDMI')
a = xr.merge([x,y])
a.attrs=dict(crs=landsat.crs)

#datacube.storage.storage.write_dataset_to_netcdf(a, results + AOI + "_" + year + 'bandIndices.nc')

<xarray.Dataset>
Dimensions:  (time: 35, x: 849, y: 837)
Coordinates:
  * y        (y) float64 -3.506e+06 -3.506e+06 ... -3.527e+06 -3.527e+06
  * x        (x) float64 1.759e+06 1.759e+06 1.759e+06 ... 1.78e+06 1.78e+06
  * time     (time) datetime64[ns] 2013-01-04T23:45:15.500000 ... 2013-12-30T23:50:30
Data variables:
    NDVI     (time, y, x) float64 nan nan nan nan nan ... nan nan nan nan nan
    NDMI     (time, y, x) float64 nan nan nan nan nan ... nan nan nan nan nan
Attributes:
    crs:      EPSG:3577

In [14]:
dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]}

def leap_year(year, calendar='standard'):
    """Determine if year is a leap year"""
    leap = False
    if ((calendar in ['standard', 'gregorian',
        'proleptic_gregorian', 'julian']) and
        (year % 4 == 0)):
        leap = True
        if ((calendar == 'proleptic_gregorian') and
            (year % 100 == 0) and
            (year % 400 != 0)):
            leap = False
        elif ((calendar in ['standard', 'gregorian']) and
                 (year % 100 == 0) and (year % 400 != 0) and
                 (year < 1583)):
            leap = False
    return leap

def get_dpm(time, calendar='standard'):
    """
    return a array of days per month corresponding to the months provided in `months`
    """
    month_length = np.zeros(len(time), dtype=np.int)

    cal_days = dpm[calendar]

    for i, (month, year) in enumerate(zip(time.month, time.year)):
        month_length[i] = cal_days[month]
        if leap_year(year, calendar=calendar):
            month_length[i] += 1
    return month_length

def season_mean(ds, calendar='standard'):
    # Make a DataArray of season/year groups
    year_season = xr.DataArray(ds.time.to_index().to_period(freq='Q-NOV').to_timestamp(how='E'),
                               coords=[ds.time], name='year_season')

    # Make a DataArray with the number of days in each month, size = len(time)
    month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar),
                                coords=[ds.time], name='month_length')
    # Calculate the weights by grouping by 'time.season'
    weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()

    # Test that the sum of the weights for each season is 1.0
    np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))

    # Calculate the weighted average
    return (ds * weights).groupby('time.season').sum(dim='time'), year_season

b, c = season_mean(a)
time = (np.unique(c.values))[0:4]
b = b.assign_coords(season = time)
b = b.rename({'season':'time'})

#b.attrs=dict(crs=landsat.crs)
b

<xarray.Dataset>
Dimensions:  (time: 4, x: 849, y: 837)
Coordinates:
  * y        (y) float64 -3.506e+06 -3.506e+06 ... -3.527e+06 -3.527e+06
  * x        (x) float64 1.759e+06 1.759e+06 1.759e+06 ... 1.78e+06 1.78e+06
  * time     (time) datetime64[ns] 2013-02-28 2013-05-31 2013-08-31 2013-11-30
Data variables:
    NDVI     (time, y, x) float64 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0
    NDMI     (time, y, x) float64 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0

In [15]:
b.to_netcdf(results + AOI + "_" + year + 'SeasonalbandIndices.nc')
#datacube.storage.storage.write_dataset_to_netcdf(b, results + AOI + "_" + year + 'SeasonalbandIndices.nc')

In [17]:
b.time.values

array(['2013-02-28T00:00:00.000000000', '2013-05-31T00:00:00.000000000',
       '2013-08-31T00:00:00.000000000', '2013-11-30T00:00:00.000000000'],
      dtype='datetime64[ns]')

## Make rudimentry training set

In [None]:
import numpy as np
import xarray as xr
import geopandas as gpd
import pandas as pd

peel_landuse = gpd.read_file('data/spatial/Peel_landuse_small.shp')
peel_landuse = peel_landuse.to_crs(epsg=3577)

peel_trainset = peel_landuse[(peel_landuse.d_Tertiary == '4.3.0 Irrigated cropping') | 
                        (peel_landuse.d_Tertiary == '3.3.0 Cropping') |
                        (peel_landuse.d_Tertiary == '5.4.1 Urban residential')]

peel_trainset.d_Tertiary.unique()

def getXY(pt):
    return (pt.x, pt.y)
centroidseries = peel_trainset['geometry'].centroid
lon,lat = [list(t) for t in zip(*map(getXY, centroidseries))]

from datetime import datetime
start = datetime(2013,1,1)
finish =datetime(2013,12,31)

dict_ = {'longitude': lon, 'latitude': lat, 'from': start,'to':finish, 'label':peel_trainset.d_Tertiary}
peel_samples = pd.DataFrame(dict_)

peel_samples.to_csv('results/peel_fieldsamples.csv', index=False)

peel_samples.head()