## Calculating the frequency of irrigated cropping as a proxy for intensity

Script is divided into two sections:

1. The first part creates a large rasterstack of all the irrigated area files and exports it as netcdf.
2. The rasterstack is then loaded as an xarray object and per-pixel frequency is calculated.

This script is for testing.  To run this at scale use croppingFrequency.py on hugemem

In [121]:
import sys
import os
import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr

import sys
sys.path.append('src')
import DEAPlotting, SpatialTools

### User Inputs

In [122]:
results = "results/nmdb_plots/frequency/"

### Part 1: Create rasterstack of irrigation 

Too large to run on the VDI

In [None]:
#list of years to help for-loop iterate through folders
x = range(1987,2019,1)
years = []
for i in x:
    nextyear = str(i + 1)[2:]
    y = str(i) + "_" + nextyear
    years.append(str(y))
# removing years that didn't work
years =  [e for e in years if e not in ('2011_12', '2012_13')]
years.sort()

#list of folders to help with loop
folders = os.listdir(directory)
folders.sort()

# Convert our shapefiles into tiffs, add them to an giant xarray and 
# then export as netcdf 
def convertIrrShpToTiff(shp, year):  
    #open a tif and get transform info
    tif = shp[:77]+shp[77:95]+"_multithreshold_65Thres"+suffix[10:]+".tif"
    ds = xr.open_rasterio(tif).squeeze()
    transform, proj = geotransform(ds, (ds.x, ds.y), epsg=3577)
    rows,cols = ds.shape
    #turn vector into numpy array
    shp_arr = rasterize_vector(shp, cols=cols, rows=rows, geo_transform=transform, projection=proj)
    #convert numpy array into xarray
    shp_xr = xr.DataArray(shp_arr, coords = [ds.y, ds.x], dims = ['y', 'x'])
    #append xarray to list
    da_list.append(shp_xr)

da_list = []
for year, folder in zip(years, folders): 
    print("\r", "working on year: " + year, end = '')
    convertIrrShpToTiff(directory+folder+"/"+"nmdb_Summer"+ year + suffix+".shp", year)

#generate date ranges to use as coordinates in xrray dataset
dates = pd.date_range(start='1/1/1987', end='1/01/2019', freq='Y')
dates = dates.drop([pd.Timestamp('2011-12-31'), pd.Timestamp('2012-12-31')])
#concatenate all xarrays into a single multi-dim xarray with time ('dates') as coords.
da = xr.concat(da_list, dim=dates).rename({'concat_dim':'time'}).rename('Irrigated_Area')
#convert to dataset
ds = da.to_dataset()
#export as netcdf
ds.to_netcdf(results + "NMDB_irrigation.nc")

### Part 2: Calculate per pixel frequency et al.

Testing the workflow on a small area then running on hugemem using the .py file of the same name

In [123]:
z = xr.open_dataset(results+'NMDB_irrigation.nc').isel(x=range(26000,28000)).isel(y=range(18500,20000)).astype(bool)

In [124]:
#create parallized function for calculating sum on the array
def count_irrigation(x, dim):
    return xr.apply_ufunc(np.sum, x, dask='parallelized',
                          input_core_dims=[[dim]],
                          kwargs={'axis': -1})

count = count_irrigation(z.Irrigated_Area, dim='time')
rawfrequency = count / len(z.time)

In [128]:
def IrrigationFirstOccurs(x, dim):
    """
    Calculating the time (indice) at which the first occurence of 
    Irrigation occurs (per-pixel). This works because np.nanargmax:
    "In cases of multiple occurrences of the maximum values,
    the indices corresponding to the first occurrence are returned."
    """
    return xr.apply_ufunc(np.nanargmax, x, dask='parallelized',
                          input_core_dims=[[dim]],
                          kwargs={'axis': -1})

firstOccured = IrrigationFirstOccurs(z, dim='time')
yearsIrrigated = len(z.time) - firstOccured 
normalisedFrequency = count / yearsIrrigated

In [126]:
#covert first observed to an array with the date (year)
dates = [t for t in range(1987,2019,1)]
dates =  [e for e in dates if e not in (2011, 2012)]
dates = np.asarray(dates)

def timey(ind, time):
    func = time[ind]
    return func

firstOccuredDates = timey(firstOccured.Irrigated_Area, dates)

In [None]:
# rawfrequency.where(frequency >= 0.01).plot(figsize=(5,5), cmap='plasma')

In [132]:
# yearsIrrigated.Irrigated_Area.plot(figsize=(5,5), cmap='plasma')

In [133]:
# normalisedFrequency.Irrigated_Area.plot(figsize=(8,8), cmap='plasma')