## Notebook for testing code snippets

In [None]:
import numpy as np
import xarray as xr
import datacube
from datacube.utils import geometry
from datacube.utils.geometry import CRS
from dask.distributed import Client
from datacube.helpers import write_geotiff
import fiona
import rasterio.mask
import rasterio.features
from datacube.drivers.netcdf import write_dataset_to_netcdf

In [None]:
#delete old client if one still exists
client = locals().get('client', None)
if client is not None:
    client.close()
    del client
    
client = Client(n_workers=2, threads_per_worker=1, memory_limit='12GB')
client

In [None]:
import sys
import os
import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import rasterio.features

results = "results/"
directory = "results/nmdb/"
suffix = "_LSandOEH_masked"

#FUNCTIONS for SCRIPT
def convertIrrShpToTiff(shp, year):  
    #open a tif and get transform info
    tif = directory+'/nmdb_'+year+"/nmdb_Summer"+year+"_multithreshold_65Thres.tif"
    ds = xr.open_rasterio(tif).squeeze()
    
    #convert to tif
    gdf = gpd.read_file(shp)
    shapes = zip(gdf['geometry'], gdf['DN'])
    transform = ds.transform
    y, x = ds.values.shape

    # Now convert the polgons into a numpy array
    shp_arr = rasterio.features.rasterize(shapes=shapes,
                                         out_shape=(y, x),
                                         all_touched=False,
                                         fill=np.nan,
                                         transform=transform)

    #convert numpy array into xarray
    shp_xr = xr.DataArray(shp_arr, coords = [ds.y, ds.x], dims = ['y', 'x'])
    #append xarray to list
    da_list.append(shp_xr)


#-----------SCRIPT-----------------
#list of years to help for-loop iterate through folders
x = range(1987,2019,1)
years = []
for i in x:
    nextyear = str(i + 1)[2:]
    y = str(i) + "_" + nextyear
    years.append(str(y))
# removing years that didn't work
years =  [e for e in years if e not in ('2011_12', '2012_13')]
years.sort()

#list of folders to help with loop
folders = os.listdir(directory)
folders.sort()

da_list = []
for year, folder in zip(years, folders): 
    print("\r", "working on year: " + year, end = '')
    convertIrrShpToTiff(directory+folder+"/"+"nmdb_Summer"+ year + "_Irrigated"+suffix+".shp", year)

#generate date ranges to use as coordinates in xrray dataset
dates = pd.date_range(start='1/1/1987', end='1/01/2019', freq='Y')
dates = dates.drop([pd.Timestamp('2011-12-31'), pd.Timestamp('2012-12-31')])
#concatenate all xarrays into a single multi-dim xarray with time ('dates') as coords.
da = xr.concat(da_list, dim=dates).rename({'concat_dim':'time'}).rename('Irrigated_Area')
#convert to dataset
ds = da.to_dataset()
#export as netcdf
ds.to_netcdf(results + "NMDB_irrigation.nc")

In [None]:
results/nmdb/nmdb_1987_88/nmdb_Summer1987_88_multithreshold_65Thres.tif

### this is some code for copying files that meet some string criteria

In [None]:
directory = "/g/data/r78/cb3058/dea-notebooks/ICE_project/results/nmdb/"
suffix = "_80polys_10ha"

In [None]:
#list of years to help for-loop iterate through folders
x = range(1987,2019,1)
years = []
for i in x:
    nextyear = str(i + 1)[2:]
    y = str(i) + "_" + nextyear
    years.append(str(y))
# removing years that didn't work
years =  [e for e in years if e not in ('2011_12', '2012_13')]
years.sort()

folders = os.listdir(directory)
folders.sort()

In [None]:
import shutil
import glob
# source_dir = "this/is/source/folder"
dest_dir = "/g/data/r78/cb3058/dea-notebooks/sica_paper/data/"

for year, folder in zip(years, folders): 
#     os.mkdir(dest_dir+"nmdb_"+year)
    os.chdir(directory+"/"+folder+"/")
    for name in glob.glob("*multithreshold_65Thres.tif*"):
        shutil.copy(directory+folder+"/"+name, os.path.join(dest_dir+"nmdb_"+year, name))


# for top, dirs, files in  os.walk(directory):
#     for filename in files:
#         file_path = os.path.join(top, filename)
#         with open(file_path, 'r') as f:
#             if suffix in f.read():
#                 shutil.copy(file_path, os.path.join(dest_dir, filename))

### TESTING GEOMTERIES OF POLYGONS TO REMOVE V. SKINNY POLYGONS. TRYNG TO REMOVE TRACTOR TREAD

In [None]:
z = x.envelope.bounds

width = z.maxx - z.minx
height = z.maxy - z.miny
skinny = height/width

x['skinniness'] = skinny
x['height'] = height
x['width'] = width

x = x.drop('envelope', axis=1)
x.to_file('../data/test_envelopes.shp')

a = x[x.skinniness > 2]
a = x[x.skinniness < 0.3]

## function to identify the amount of winter irrigation miss-identified as summer irrigation

In [None]:
import numpy as np
import xarray as xr
import sys
sys.path.append('src')
import SpatialTools

In [None]:
irrigated = 'results/nmdb/nmdb_Summer2013_14/nmdb_Summer2013_14_Irrigated_OEHandLS_masked.shp'
argmax = "/g/data/r78/cb3058/dea-notebooks/dcStats/results/mdb_NSW/summer/ndviArgMaxMin/mosaics/ndviArgMaxMin_20131101_mosaic.tif"

In [None]:
def maxNDVIisinNovember(irrigated, argmax):
    timeofmax = xr.open_rasterio(argmax)#.isel(x=range(50000, 60000)).isel(y=range(50000,60000)).squeeze()
    print(timeofmax)
    timeofmax = timeofmax.isel(band = 0)
    print("--------------------------------")
    print(timeofmax)
    nov = timeofmax.where(timeofmax == 11)
    transform, projection = SpatialTools.geotransform(timeofmax, (timeofmax.x, timeofmax.y), epsg=3577)
    width,height = timeofmax.shape
    print("--------------------------------")
    print("rasterizing vector")
    mask = SpatialTools.rasterize_vector(irrigated, height, width,transform, projection, raster_path=None)
    result = nov.where(mask)
    print("--------------------------------")
    print('calculating counts')
    unique,counts=np.unique(result.values, return_counts=True)
    x = dict(zip(unique, counts))
    print(x)
    print("--------------------------------")
    print("area where max NDVI occurs in Novemeber is "  + str(x['11'] * (25*25) / 10000) + ' ha')

In [None]:
maxNDVIisinNovember(irrigated, argmax)

## Simplifying polygons

In [None]:
import sys
import os
import time
import copy
import geopandas as gpd

from shapely.geometry import asShape
from shapely.geometry import MultiLineString
from shapely.geometry import asLineString
from shapely.wkt import dumps
#from pprint import pprint

import shapefile
import numpy as np

sys.path.append('src/')
import bezier
import bendsimplify

In [None]:
shape = "results/SICA/nmdb_Summer1998_99_Irrigated_OEHandLS_masked.shp"

In [None]:
#nothihng seems to happen...
bendsimplify.bend_simplify("results/nmdb_Summer1998_99_Irrigated_OEHandLS_masked_objectID.shp")

In [None]:
# This works but is not topologically aware so overlaps/slivers occur (if not
# for important analysis then could use overlay to remove overlaps)
gdf = gpd.read_file(shape)
gdf_simplified = gdf.copy()
gdf_simplified["geometry"] = gdf.geometry.simplify(tolerance=50,preserve_topology=True)
gdf.to_file("results/test_simplify_1998_99_tolerance50_shapely.shp")