## Results for the Annual Summer Irrigated Cropping Area (SICA) project

In [None]:
import numpy as np
import xarray as xr
import geopandas as gpd
import pandas as pd
from osgeo import gdal, ogr
import os
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from pathlib import Path
from shapely.geometry import Polygon, MultiPolygon
import warnings

### Entire catchment plot

This code will produce a timeseries of irrigated area aggregated acrosss the entire Northern Murray Darling Basin

In [None]:
directory = "/g/data/r78/cb3058/dea-notebooks/ICE_project/results/nmdb/"
suffix = "_multith_65Thres_IrrigatedMasked65"

In [None]:
#list of years to help for-loop iterate through folders
x = range(1987,2019,1)
years = []
for i in x:
    nextyear = str(i + 1)[2:]
    y = str(i) + "_" + nextyear
    years.append(str(y))
# removing years that didn't work
years =  [e for e in years if e not in ('2011_12', '2012_13')]
years.sort()

folders = os.listdir(directory)
folders.sort()

area_a = []

def getIrrigatedArea(shp):
    irr = gpd.read_file(shp)
    a = irr[(irr.DN==80) | (irr.DN==75) | (irr.DN==70)]
    area_a.append(a.area.sum() / 10000)

        
#grab sum of irrigated area for each each and add to dataframe
for year, folder in zip(years, folders): 
    #progress indicator
    print("\r", "working on year: " + year, end = '')
    getIrrigatedArea(directory+folder+"/"+"nmdb_Summer"+ year + suffix+".shp")

#convert years back into integers for plot
years_dt = []
for i in years:
    x = int(i[:-3])
    years_dt.append(x)
#create pandas dataframe
df = pd.DataFrame.from_dict({'year':years_dt, 'irrigated area':area_a})#, '0.75+0.8':area_b, '0.70+0.75+0.8':area_c})
df = df.set_index('year')

In [None]:
#plot
plot_rainfall = True

if plot_rainfall == True:
    rainfall = pd.read_csv("data/mdb_rainfall.csv", index_col=0)
    ax = df.plot(colormap='jet', linestyle='--', marker='o', figsize=(16,8))
    ax.xaxis.set_major_locator(ticker.MultipleLocator(2))
    ax.set_xlabel('Year (Summer Nov-Mar)')
    ax.set_ylabel('Area under Irrigation (Ha)')
    ax.grid(True, linestyle='--', alpha=0.75)
    ax.axvspan(2010, 2013, alpha=0.4, color='grey')
    
    ax2 = ax.twinx()
    color='red'
    ax2.set_ylabel('summer rainfall anomalies(mm)', color=color)
    ax2.plot(rainfall, color=color, marker='o', linestyle='--', label='rainfall anomaly')
    ax2.legend(loc=2)
    ax2.tick_params(axis='y', labelcolor=color)
    ax2.set_ylim(bottom=-150, top=150)
    ax2.axhline(y=0, color='red')
    
    plt.tight_layout()
    
else:
    ax = df.plot(colormap='jet', linestyle='--', marker='o', figsize=(20,8))
    ax.xaxis.set_major_locator(ticker.MultipleLocator(2))
    ax.set_xlabel('Year (Summer Nov-Mar)')
    ax.set_ylabel('Area under Irrigation (Ha)')
    ax.grid(True, linestyle='--', alpha=0.75)
    ax.axvspan(2010, 2013, alpha=0.4, color='grey')
    ax.set_title("Northern MDB Annual Summer Irrigated Area")
    plt.tight_layout()
    plt.savefig("/g/data1a/r78/cb3058/dea-notebooks/ICE_project/results/nmdb_plots/nmdb_irrigated_ts.pdf",
               orientation='landscape')

In [None]:
# rainfall = pd.read_csv("data/mdb_rainfall.csv", index_col=0)
# ax = rainfall.plot(figsize=(21,8))
# ax.set_ylim(bottom=-150, top=150)
# ax.axhline(0, color='black', linestyle='--', alpha=0.5)
# plt.tight_layout

### Sub-catchment plots

This script will produce a timeseries of irrigated area for each subcatchment in the directory.  This script is slow to run beacause the geopandas overlay functions are cumbersome (this is true of other libraries as well). R-tree spatial indexing is NOT used as it is inaccurate with the clip boundaries.

In [None]:
directory = "/g/data/r78/cb3058/dea-notebooks/ICE_project/results/nmdb/"
individual_catchments_dir ="/g/data/r78/cb3058/dea-notebooks/ICE_project/data/spatial/nmdb_individual_catchments/"
suffix = "_multith_65Thres_IrrigatedMasked65"

In [None]:
#generate all the filenames we need for the loops
catchments = []
for file in os.listdir(individual_catchments_dir):
    if file.endswith(".shp"):
        catchments.append(os.path.join(individual_catchments_dir, file))

x = range(1987,2019,1)
years = []
for i in x:
    nextyear = str(i + 1)[2:]
    y = str(i) + "_" + nextyear
    years.append(str(y))
# removing years that didn't work
years =  [e for e in years if e not in ('2011_12', '2012_13')]
years.sort()

years_dt = []
for i in years:
    x = int(i[:-3])
    years_dt.append(x)

folders = os.listdir(directory)
folders.sort()

In [None]:
for catchment in catchments:
    area_a = []
    for year, folder in zip(years, folders): 
        #progress indicator
        print("\r", "working on "+ catchment[85:-4] + ": " + year, end = '')
        #get the irrigated area shapefile
        irr = gpd.read_file(directory+folder+"/"+"nmdb_Summer"+ year + suffix+".shp")
        #get a subcatchment shapefile
        catch = gpd.read_file(catchment)
        geometry = catch['geometry'].iloc[0]
        if isinstance(geometry, Polygon):
            geometry = MultiPolygon([geometry])
        #clip irr to catchment area (using R-tree spatial indexing)
        sindex = irr.sindex
        possible_matches_index = list(sindex.intersection(geometry.bounds))
        possible_matches = irr.iloc[possible_matches_index]
        irr_clip = possible_matches[possible_matches.intersects(geometry)]
#         irr_clip = gpd.overlay(irr, catch, how='intersection') #works but v.v. slow
        area_a.append(irr_clip.area.sum() / 10000)
    #create pandas dataframe
    df = pd.DataFrame.from_dict({'year':years_dt, 'irr_area':area_a})
    df = df.set_index('year')
    #create plot and save
    ax = df.plot(colormap='jet', linestyle='--', marker='o', figsize=(20,8))
    ax.xaxis.set_major_locator(ticker.MultipleLocator(2))
    ax.set_xlabel('Year (Summer Nov-Mar)')
    ax.set_ylabel('Area under Irrigation (Ha)')
    ax.grid(True, linestyle='--', alpha=0.75)
    ax.axvspan(2010, 2013, alpha=0.4, color='grey')
    ax.set_title(catchment[85:-4])
    plt.tight_layout()
    plt.savefig("/g/data1a/r78/cb3058/dea-notebooks/ICE_project/results/nmdb_plots/"+ catchment[85:-4]+".pdf",
               orientation='landscape')       

### Cumulative Irrigatable area

#### Epochal change in spatial extent of irrigation

This section divides the analysis into distinct epochs: 1987-1995, 1996-2012, and 2013-2019
and finds the region under irrigation during each epoch.  This allows for spatially showing the growth in irrigation in a simple overlay plot.

In [None]:
directory = "/g/data/r78/cb3058/dea-notebooks/ICE_project/results/nmdb_misaligned/"
individual_catchments_dir ="/g/data/r78/cb3058/dea-notebooks/ICE_project/data/spatial/nmdb_individual_catchments/"
AOI = "nmdb_Summer"
suffix = "_multith_65Thres_IrrigatedMasked65"

In [None]:
#create list of catchment shapefiles
catchments = []
for file in os.listdir(individual_catchments_dir):
    if file.endswith(".shp"):
        catchments.append(os.path.join(individual_catchments_dir, file))

#time ranges
t1 = [str(t) for t in range(1987,1996,1)]
t2 = [str(t) for t in range(1996,2012,1)]
t2 = [e for e in t2 if e not in ('2011')]
t3 = [str(t) for t in range(2012,2019,1)]
t3 = [e for e in t3 if e not in ('2012')]

In [None]:
def cliptocatchment(t):
    """
    This function will open a shapefile from 
    """
    #progress indicator
    print("\r", "working on "+catchment[85:-4]+": "+t+".", end = '')
    nextyear= str(int(t)+1)[2:]
    irr = gpd.read_file(directory+AOI+t+"_"+nextyear+"/"+AOI+t+"_"+nextyear+suffix+".shp")
    catch = gpd.read_file(catchment)
#     geometry = catch['geometry'].iloc[0]
#     if isinstance(geometry, Polygon):
#         geometry = MultiPolygon([geometry])
    #clip irr to catchment area (using R-tree spatial indexing)
#     sindex = irr.sindex
#     possible_matches_index = list(sindex.intersection(geometry.bounds))
#     possible_matches = irr.iloc[possible_matches_index]
#     irr = possible_matches[possible_matches.intersects(geometry)]
    irr = gpd.overlay(irr, catch, how='intersection') #doing it this way because its more accurate.
    shapes.append(irr)

def unionyears(timelist):
    for t in timelist:
        warnings.filterwarnings("ignore")
        cliptocatchment(t)
    try:
        warnings.filterwarnings("ignore")
        x = pd.concat(shapes)
        x = x.dissolve(by='DN', aggfunc='sum')
        x['year_range'] = timelist[0]+ "_"+timelist[-1]
        x['catchment'] = catchment[85:-4]
        x.to_file(directory[:-12] + "_plots/shapes/"+timelist[0]+"_"+timelist[-1]+"/"+catchment[85:-4]+"_"+timelist[0]+ "_"+timelist[-1]+".shp")
    except:
        print(" Failed to export "+ catchment[85:-4])
        pass 

In [None]:
for catchment in catchments:
    shapes=[]
    unionyears(t1)

In [None]:
for catchment in catchments:
    shapes=[]
    unionyears(t2)

In [None]:
for catchment in catchments:
    shapes=[]
    unionyears(t3)

#### Cumulative sum timeseries

Need to generate smaller ranges than above, say every 3 years and union shapefiles
then subtract (gpd.difference?) the unioned files from the previous epoch to find the difference
then calculate area on the difference (except for first range which is starting point)
then add that to a dataframe with the date range as the index
then plot



In [None]:
directory = "/g/data/r78/cb3058/dea-notebooks/ICE_project/results/nmdb_misaligned/"
individual_catchments_dir ="/g/data/r78/cb3058/dea-notebooks/ICE_project/data/spatial/nmdb_individual_catchments/"
AOI = "nmdb_Summer"
interim_results = "/g/data/r78/cb3058/dea-notebooks/ICE_project/results/nmdb_plots/shapes/interim/"
suffix = "_multith_65Thres_IrrigatedMasked65"

In [None]:
#create list of catchment shapefiles
catchments = []
for file in os.listdir(individual_catchments_dir):
    if file.endswith(".shp"):
        catchments.append(os.path.join(individual_catchments_dir, file))

#time ranges
t1 = [str(t) for t in range(1987,1990,1)]
t2 = [str(t) for t in range(1990,1993,1)]
t3 = [str(t) for t in range(1993,1996,1)]
t4 = [str(t) for t in range(1996,1999,1)]
t5 = [str(t) for t in range(1999,2002,1)]
t6 = [str(t) for t in range(2002,2005,1)]
t7 = [str(t) for t in range(2005,2008,1)]
t8 = [str(t) for t in range(2008,2011,1)]
t9 = [str(t) for t in range(2013,2016,1)]
t10 = [str(t) for t in range(2016,2019,1)]

timerangelist=[t1,t2,t3,t4,t5,t6,t7,t8,t9,t10]

In [None]:
for i in timerangelist:
    os.mkdir(interim_results+i[0]+"_"+i[-1])

In [None]:
def cliptocatchment(t):
    #progress indicator
    print("\r", "working on "+catchment[85:-4]+": "+t+".", end = '')
    nextyear= str(int(t)+1)[2:]
    irr = gpd.read_file(directory+AOI+t+"_"+nextyear+"/"+AOI+t+"_"+nextyear+suffix+".shp")
    catch = gpd.read_file(catchment)
    irr = gpd.overlay(irr, catch, how='intersection') #doing it this way because its more accurate.
    shapes.append(irr)

def unionyears(timerangelist):
    for timerange in timerangelist:
        for t in timerange:
            warnings.filterwarnings("ignore")
            cliptocatchment(t)
        try:
            warnings.filterwarnings("ignore")
            x = pd.concat(shapes)
            x = x.dissolve(by='DN', aggfunc='sum')
            x['year_range'] = timerange[0]+ "_"+timerange[-1]
            x['catchment'] = catchment[85:-4]
            x.to_file(interim_results +timerange[0]+"_"+timerange[-1]+"/"+catchment[85:-4]+"_"+timerange[0]+ "_"+timerange[-1]+".shp")
        except:
            print(" Failed to export "+ catchment[85:-4])
            pass 

In [None]:
# This will generate all of our time-range unioned shapefiles
for catchment in catchments:
    shapes=[]
    unionyears(timerangelist)

In [None]:
# Now we need to find the difference between time-adjacent files

In [None]:
obtain the geometries that are part of df1 but are not contained in df2
gpd.overlay(df1, df2, how='difference')

In [None]:
conda_87_89 = gpd.read_file("results/nmdb_plots/shapes/interim/1987_1989/CONDAMINE-CULGOA RIVERS_1987_1989.shp")
conda_90_92 = gpd.read_file("results/nmdb_plots/shapes/interim/1990_1992/CONDAMINE-CULGOA RIVERS_1990_1992.shp")
conda_93_95 = gpd.read_file("results/nmdb_plots/shapes/interim/1993_1995/CONDAMINE-CULGOA RIVERS_1993_1995.shp")
conda_96_98 = gpd.read_file("results/nmdb_plots/shapes/interim/1996_1998/CONDAMINE-CULGOA RIVERS_1996_1998.shp")

In [None]:
conda_96_98

In [None]:
areas = []

In [None]:
conda_87_89.area.sum() / 10000

In [None]:
diff = gpd.overlay(conda_90_92, conda_87_89, how='difference')


In [None]:
diff.area.sum() / 10000

In [None]:

def areaofFirstYear(catchment):
    #open the first timerange
    _87_89 = gpd.read_file("results/nmdb_plots/shapes/interim/1987_1989/"+ catchment + "_1987_1989.shp")
    #add area to list
    area.append((_87_89.area.sum()/ 10000))
    
def getdifferenceAreas(yr1, yr2, catchment):
    area = []
    #open the first timerange
    df1 = gpd.read_file("results/nmdb_plots/shapes/interim/"+ yr1+ "/"+ catchment + yr1 + ".shp")
    df2 = gpd.read_file("results/nmdb_plots/shapes/interim/"+ yr2+ "/"+ catchment + yr2 + ".shp")
    diff = gpd.overlay(yr2, yr1, how='difference')
    area.append(diff.area.sum() / 10000)

In [None]:
def getcumulativeNewAreas(catchment):
    areaofFirstYear(catchment)
    for i in range(1, len(yearstrings)):
        getdifferenceAreas(yearstrings[i],yearstrings[i-1], catchment)
    

In [None]:
area = []
yearstrings = ['1987_1989','1990_1992', '1993_1995', '1996_1998', '1999_2001', '2002_2004', '2005_2007', '2008_2010', '2013_2015', '2016_2018']

In [None]:
"BORDER RIVERS"

In [None]:
for i in range(1, len(yearstrings)):
    v = yearstrings[i]
    v2 = yearstrings[i-1]
    print("v1",v,"v2", v2)