## Validation of Annual Summer Irrigated Cropping Area (SICA)
Using a variety of measures. Firstly, compare the overall area between the validation dataset and the prediction dataset, secondly calculate a Jaccard Similarity Score to test how similar the two datasets are, then generate confusion matrix with a number of measures describing the precion, omission, commission, accuracy etc.

Relying on Claire Kraus's notebook for guidance:

https://github.com/GeoscienceAustralia/dea-notebooks/blob/ClaireK/Crop_mapping/NamoiPilotProjectWorkflow/ValidateAutomaticIrrigatedCropAreaGeotiffs.ipynb

In [None]:
t = [round(df['irr_area'].min(), 1), round(df['irr_area'].max(), -3)]
t

In [None]:
roundup(int(df['irr_area'].max()))

In [None]:
t = [format(int(df['irr_area'].min()), ','), format(int(df['irr_area'].max()), ',')]
t

In [None]:
import os
import pandas as pd
import matplotlib.ticker as ticker

In [None]:
year = [1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
            1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
            2009, 2010, 2013, 2014, 2015, 2016, 2017, 2018]

In [None]:
DF = pd.DataFrame(index=year)
titles = []
for file in os.listdir(folder):
    df = pd.read_csv(folder+file, index_col=0)
    name = file[:-16]
    titles.append(name)
    DF[name] = list(df.irr_area)

In [None]:
fig, ax = plt.subplots(nrows=5, ncol=2, figsize=(25,30))
.plot(ax=ax[0,0] DF, linestyle='--', marker='o', legend = False, ylim=(0))

# ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
ax.xaxis.set_label_text("")
t = [0, round(int(df['irr_area'].max()), -3)]
plt.yticks(t,t)
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
ax.grid(True, linestyle='--', alpha=0.75, axis='both')
ax.axvspan(2010, 2013, alpha=0.4, color='k')
plt.tick_params(labelsize=fontsize)
ax.set_facecolor('cornsilk')

In [None]:
# ax = plt.plot(DF, linestyle='--', marker='o', legend = False, figsize=(25,30), 
#         subplots=True, layout=(5,2), title=titles, grid=True, ylim=(0))
# # ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
# ax.xaxis.set_label_text("")
# t = [0, round(int(df['irr_area'].max()), -3)]
# plt.yticks(t,t)
# ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
# ax.grid(True, linestyle='--', alpha=0.75, axis='both')
# ax.axvspan(2010, 2013, alpha=0.4, color='k')
# plt.tick_params(labelsize=fontsize)
# ax.set_facecolor('cornsilk')

In [None]:
#create plot and save
folder = "/g/data/r78/cb3058/dea-notebooks/ICE_project/results/nmdb_plots/csvs/annual_area/"

fig, axs = plt.subplots(5,2, figsize=(25, 30))

for ax,file in zip(axs.ravel(), os.listdir(folder)):
    fontsize = 20
    df = pd.read_csv(folder+file, index_col=0)
    df.plot(ax=ax,colormap='jet', linestyle='--', marker='o', legend = False)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
    ax.xaxis.set_label_text("")
    if file[:-16] == ('PAROO RIVER') or file[:-16]==('MOONIE RIVER'):
        t = [round(int(df['irr_area'].max()), -2)]
    else:
        t = [round(int(df['irr_area'].max()), -3)]
    for tick in ax.yaxis.get_major_ticks():
        tick.set_pad(50)
    ax.set_yticks(t)
    ax.set_ylim(bottom=-int((df['irr_area'].max()*0.05)), top=(df['irr_area'].max()+(df['irr_area'].max()*0.1)))
    ax.set_yticklabels([format(i,",")+" ha" for i in t])
    ax.grid(True, linestyle='--', alpha=0.5, axis='y', color="gray")
    ax.axvspan(2010, 2013, alpha=0.4, color='k')
    ax.tick_params(labelsize=fontsize)
    ax.tick_params(axis='y', direction='in', pad=-125)
    ax.set_title(file[:-16], fontsize=fontsize)
    ax.set_facecolor('cornsilk')

plt.tight_layout()
plt.savefig("results/nmdb_plots/plots/NMDB_annual_area_subcatchment.pdf",
   orientation='portarait', dpi=300)

In [None]:
import numpy as np
import xarray as xr
import geopandas as gpd
import matplotlib.pyplot as plt

#import custom functions
import sys
sys.path.append('src')
import DEAPlotting, SpatialTools
from transform_tuple import transform_tuple

In [None]:
import os
mask_dir = "/g/data/r78/cb3058/dea-notebooks/ICE_project/data/spatial/NSWmask_and_LSmask.shp"
directory = "/g/data/r78/cb3058/dea-notebooks/ICE_project/results/nmdb/"
input_suffix = "_multithreshold_70Thres"
output_suffix = "_70Thres_OEHandLS_masked"

x = range(1999,2000,1)
years = []
for i in x:
    nextyear = str(i + 1)[2:]
    y = str(i) + "_" + nextyear
    years.append(str(y))
years =  [e for e in years if e not in ('2011_12', '2012_13')]
years.sort()
     
folders = os.listdir(directory)
folders.sort()

In [None]:
folders = folders[12]

In [None]:
directory+folder

In [None]:
directory+"nmdb_Summer"+year+"/nmdb_Summer" + year + input_suffix + ".tif"

In [None]:
inputs=[]
for year, folder in zip(years, folders):
    inputs.append(directory+"nmdb_Summer" + year + input_suffix + ".tif")
inputs

### User Inputs

In [None]:
#provide the filepaths to the irrigated cropping extent tif and the validation shapefile
irrigated = "/g/data/r78/cb3058/dea-notebooks/ICE_project/data/NSW_mdb_mask.tif" 

# validation = "/g/data/r78/cb3058/dea-notebooks/ICE_project/data/spatial/nmdb_OEH2017_irrigated.shp"
validation = "/g/data1a/r78/cb3058/dea-notebooks/ICE_project/data/nmdb_OEH2017_irrigated.tif"
# clip_shp = "/g/data/r78/cb3058/dea-notebooks/ICE_project/data/spatial/nmdb_individual_catchments/NAMOI RIVER.shp"

#what year are we validating
year = '1999-00'

### Prepare the datasets

In [None]:
#open the irrigatation tif
irr  = xr.open_rasterio(irrigated).drop('band').squeeze()
#grab some transform info from it
# transform, projection = transform_tuple(irr, (irr.x, irr.y), epsg=3577)
width,height = irr.shape
#rasterize the catchment boundaries that encompass our validation area
# boundary = SpatialTools.rasterize_vector(clip_shp, height, width,
#                                          transform, projection, raster_path=None)
#clip extent to the catchment boundaries
# irr = irr.where(boundary)
#count number of nans
# num_of_nans_pred = np.isnan(irr.values).sum()
#convert to a boolean array of irr/not-irr
AutomaticCropBoolean  = irr.values

In [None]:
ValidationMaskBoolean = xr.open_rasterio(validation).drop('band').squeeze()
ValidationMaskBoolean = ValidationMaskBoolean.values.astype(bool)

In [None]:
#convert validation shapefile to array first
transform, projection = transform_tuple(irr, (irr.x, irr.y), epsg=3577)
width,height = irr.shape

ValidationMaskBoolean  = SpatialTools.rasterize_vector(validation, height, width,
                                            transform, projection, raster_path=None)

# ValidationMaskBoolean = np.where(boundary, ValidationMaskBoolean, 0
ValidationMaskBoolean = ValidationMaskBoolean.astype(bool)

In [None]:
SpatialTools.array_to_geotiff("/g/data1a/r78/cb3058/dea-notebooks/ICE_project/data/nmdb_OEH2017_irrigated.tif",
          ValidationMaskBoolean,
          geo_transform = transform, 
          projection = projection, 
          nodata_val = 0)

In [None]:
# plt.figure(figsize=(10,10))
# plt.imshow(ValidationMaskBoolean)

### Non-site specific accuracy (compare areas)

In [None]:
validation_area = np.count_nonzero(ValidationMaskBoolean) *(25*25) / 10000
irrigated_area = np.count_nonzero(AutomaticCropBoolean)*(25*25) / 10000
print("The area under irrigation in the validation dataset is: " + str(validation_area) + " ha")
print("The area under irrigation in the irrigated area dataset is: " + str(irrigated_area) + " ha")
print("irrigated vs validation % is : " + str(round((irrigated_area/validation_area*100), 1)))

### Jaccard Similarity index

In [None]:
from sklearn.metrics import accuracy_score, jaccard_similarity_score

In [None]:
jss = jaccard_similarity_score(ValidationMaskBoolean, AutomaticCropBoolean, normalize=True)
ac = accuracy_score(ValidationMaskBoolean, AutomaticCropBoolean, normalize=True)

In [None]:
print("The Normalised Jaccard Similarity Score is: "+ str(round(jss, 2)))
print("The Normalised Accuracy Score is: "+ str(round(ac, 2)))

### Confusion matrix

In [None]:
YesRealYesAuto = np.logical_and(AutomaticCropBoolean, ValidationMaskBoolean)
NoRealNoAuto = np.logical_and(~AutomaticCropBoolean, ~ValidationMaskBoolean)

YesRealNoAuto = np.logical_and(AutomaticCropBoolean, ~ValidationMaskBoolean)
NoRealYesAuto = np.logical_and(~AutomaticCropBoolean, ValidationMaskBoolean)

In [None]:
Correct_positives = YesRealYesAuto.sum()
Incorrect_positives = NoRealYesAuto.sum()
Correct_negatives = NoRealNoAuto.sum()
Incorrect_negatives = YesRealNoAuto.sum()

Totalpixels = (width * height)

Accuracy = (Correct_positives + Correct_negatives) / Totalpixels
Misclassification_rate = (Incorrect_positives + Incorrect_negatives) / Totalpixels
True_Positive_Rate = Correct_positives / ValidationMaskBoolean.sum()
False_Positive_Rate = Correct_positives / ((~ValidationMaskBoolean).sum())
Specificity = Correct_negatives / ((~ValidationMaskBoolean).sum())
Precision = Correct_positives / AutomaticCropBoolean.sum()
Prevalence = (ValidationMaskBoolean.sum() ) / Totalpixels

In [None]:
print('\033[1m' + '{0} Automatic Irrigated Crop Extent'.format(year) + '\033[0m')
print('Accuracy = %.5f' % Accuracy)
print('Misclassification_rate = %.5f' % Misclassification_rate)
print('True_Positive_Rate = %.5f' % True_Positive_Rate)
print('False_Positive_Rate = %.5f' % False_Positive_Rate)
print('Specificity = %.5f' % Specificity)
print('Precision = %.5f' % Precision)
print('Prevalence = %.5f' % Prevalence)
print("#################")
print("The area of incorrect positives is" + str(Incorrect_positives * (25*25)/10000))
print("The area of correct positives is" + str(Correct_positives * (25*25)/10000))
print("The area of incorrect negatives is" + str(Incorrect_negatives * (25*25)/10000))