## Notebook for testing code snippets

In [None]:
# date of first emergence of irrigated cropping area

In [1]:
import sys
import os
import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
from rasterstats import zonal_stats
from scipy.ndimage.morphology import binary_erosion
from scipy.ndimage.morphology import binary_dilation
from scipy import ndimage
from scipy.stats import mode

import sys
sys.path.append('src')
import DEAPlotting, SpatialTools

In [2]:
firstyearshp = 'results/nmdb/nmdb_Summer1987_88/nmdb_Summer1987_88_Irrigated_OEHandLS_masked.shp'
differenceFolder = 'results/nmdb_plots/shapes/difference_NMDB/'
results = 'results/nmdb_plots/yearFirstObserved/'
cumulative_all = "results/nmdb_plots/shapes/interim_NMDB/NMDB_1987_2018.shp"

### Part 1

In [None]:
firstyear = gpd.read_file(firstyearshp)
firstyear['firstObser'] = 1987

In [None]:
#get list of NMDB difference files 
diff = []
for file in os.listdir(differenceFolder):
    if file.endswith(".shp"):
        diff.append(os.path.join(differenceFolder, file))
diff.sort()
#create empty list for results
diffwithDate = []
#append the first year to the list
diffwithDate.append(firstyear)
#loop through the difference files and,
#add a datefirstobserved column and append to lisy
for file in diff:
    gdf = gpd.read_file(file)
    gdf['firstObser'] = int(file[47:51])
    diffwithDate.append(gdf) 
#concatenate all polygons together
x = pd.concat(diffwithDate, sort=True)
#clean up dataframe
x = x.reset_index()
x = x.drop(['DN',  'index','area', 'year_range', 'catchment', 'area_diff'], axis=1)
#export
x.to_file(results + "yearFirstObserved_prelim.shp")

### Part 2

In [None]:
#grab any tiff to grab the dimensions 
tif = 'results/nmdb/nmdb_Summer1992_93/nmdb_Summer1992_93_multithreshold_65Thres.tif'
a = xr.open_rasterio(tif).squeeze()
transform, projection = SpatialTools.geotransform(a, (a.x, a.y), epsg=3577)
width,height = a.shape

#rasterize our shapefile and export as geotiff
yfo_array = SpatialTools.rasterize_vector(results + "yearFirstObserved_prelim.shp",
                                   height, width, transform, projection, field='firstObser',
                                   raster_path=results +"yearFirstObserved_prelim.tif")

In [None]:
#import the final year of our cumulative union analysis, and explode
#so we have individual polygons
gdf = gpd.read_file(cumulative_all)
gdf = gdf.explode()
gdf = gdf.reset_index(drop=True)
gdf = gdf.drop(['DN','area', 'year_range', 'catchment'], axis=1)
gdf.to_file(results+'exploded_cumulativeAll.shp')

In [3]:
#rasterize the exploded polygons
tif = results +"yearFirstObserved_prelim.tif"
a = xr.open_rasterio(tif).squeeze()
transform, projection = SpatialTools.geotransform(a, (a.x, a.y), epsg=3577)
width,height = a.shape

#rasterize our shapefile and keep it as numpy array
explodedGDF_raster = SpatialTools.rasterize_vector(results+'exploded_cumulativeAll.shp',
                                   height, width, transform, projection)

In [4]:
#erode, then dilate the numpy array to help seperate adjacent polygons
eroded = binary_erosion(explodedGDF_raster, iterations=2)
dilated = binary_dilation(eroded, iterations=1)
#export
SpatialTools.array_to_geotiff(results+'cumulativeAll_erodeDilate.tif',
              eroded, geo_transform = transform, 
              projection = projection, 
              nodata_val=0)

TypeError: binary_dilation() got an unexpected keyword argument 'iteration'

In [None]:
#polygonize the raster
os.system('gdal_polygonize.py ' + results+"cumulativeAll_erodeDilate.tif" + ' -f' + ' ' + '"ESRI Shapefile"' + ' ' + results+"cumulativeAll_erodeDilate.shp")

In [None]:
# examine each polygon and assign a datefirstobserved to the majority date inside the polygon  
gdf_final = gpd.read_file(results+"cumulativeAll_erodeDilate.shp")

def mymajority(x):
    """
    A little function for the majority filter to
    ignore zeros when deciding on the most common
    yearfirstobserved. Passed to 'zonal_stats'
    """
    try:
        x = x[np.nonzero(x)]
        (values,counts) = np.unique(x,return_counts=True)
        ind=np.nanargmax(counts)
        return float(values[ind])
    except ValueError:
        return -999

#zonal stats
gdf_final['firstObser'] = pd.DataFrame(zonal_stats(vectors=gdf_final['geometry'], 
                                        raster=results+"yearFirstObserved_prelim.tif",
                                        add_stats={'mymajority':mymajority}))['mymajority']

#clean and export
gdf_final = gdf_final.drop('DN', axis=1)
gdf_final.to_file(results + "yearFirstObserved_filtered_cleaned.shp")

# #area filter?
# gdf_final['area'] = gdf_final['geometry'].area
# gdf_final = gdf_final[gdf_final.area<500000000]

In [None]:
# Could use simplify for display purposes
# z = gdf_final.copy()
# z_simplified = z.copy()
# z_simplified["geometry"] = z.geometry.simplify(tolerance=50,preserve_topology=True)
# z_simplified.to_file(results+'simplified_exploded_cumulativeAll.shp')

In [None]:
# simplifying polygons

In [None]:
import sys
import os
import time
import copy
import geopandas as gpd

from shapely.geometry import asShape
from shapely.geometry import MultiLineString
from shapely.geometry import asLineString
from shapely.wkt import dumps
#from pprint import pprint

import shapefile
import numpy as np

sys.path.append('src/')
import bezier
import bendsimplify

In [None]:
shape = "results/SICA/nmdb_Summer1998_99_Irrigated_OEHandLS_masked.shp"

In [None]:
#nothihng seems to happen...
bendsimplify.bend_simplify("results/nmdb_Summer1998_99_Irrigated_OEHandLS_masked_objectID.shp")

In [None]:
# This works but is not topologically aware so overlaps/slivers occur (if not
# for important analysis then could use overlay to remove overlaps)
gdf = gpd.read_file(shape)
gdf_simplified = gdf.copy()
gdf_simplified["geometry"] = gdf.geometry.simplify(tolerance=50,preserve_topology=True)
gdf.to_file("results/test_simplify_1998_99_tolerance50_shapely.shp")