# Radar training data analyses.ipynb

(Include description of code here!)

By Sharon Jones, September 2017, Python v3, DEA v1.5.2, Radar data from `simoncube`

Modified after `Check_for_statistical_difference_in_slope.ipynb` in Geoscience Australia's `GWBAGDC` Git repo. 

** Code dependencies **
- training data shape files for different land cover types (derived from ArcGIS over the TC Debbie landfall region). The code will look for the training datasets in `/g/data1/w85/training_shapefiles/`. Ensure that al of the training shapefiles are executable.

## Imports and functions

In [None]:
# Import the libraries we need in the code and tell matplotlib to display the plots here
%matplotlib inline
import fiona
import shapely.geometry
import rasterio
import rasterio.features
import geopandas as gp
import datacube
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import scipy.stats
import pandas
import pickle
import csv
import os

In [None]:
# Set up some functions to use later in the code
def warp_geometry(geom, src_crs, dst_crs):
    """
    warp geometry from src_crs to dst_crs
    """
    return shapely.geometry.shape(rasterio.warp.transform_geom(src_crs, dst_crs, shapely.geometry.mapping(geom)))

def geometry_mask(geom, geobox, all_touched=False, invert=False):
    """
    rasterize geometry into a binary mask where pixels that overlap geometry are False
    """
    return rasterio.features.geometry_mask([geom],
                                           out_shape=geobox.shape,
                                           transform=geobox.affine,
                                           all_touched=all_touched,
                                           invert=invert)

## Define the extent of the radar files

In [None]:
cache = '/g/data/u46/users/sj9724/xarray.pickle.debbie'


# lon = 149.71, 147.83
# lat = -21.155, -19.72 # call for whole Debbie area

lon = 148.68, 148.21
lat = -20.30, -20.12 # call for partial Debbie area

# time = '2016-11-01', '2016-12-30' # *see alt time slice cell below

## Read in the training data shapefile and create a mask from it. 
This code reads in the shapefile and identifies and lists all of the polygons witin it.

In [None]:
shp = gp.GeoDataFrame.from_file('/g/data/w85/radar_grad/training_shapefiles_sj/crop_areas.shp')
training_type = 'crops'
print (shp.head)

This section filters our polygons, to find only those that are within the bounding box we specified above. It then combines all the relevant polygons into a single polygon that we can use to create our mask.

In [None]:
# Create a bounding box from the locations specified above
box = shapely.geometry.box(lon[1], lat[1], lon[0], lat[0], ccw = True)
# Only get the polygons that intersect the bounding box (i.e. remove all the irrelevant ones)
filtered = shp.where(shp.intersects(box)).dropna()
# Combine all of the relevant polygons into a single polygon
shp_union = shapely.ops.unary_union(filtered.geometry)

## Read in the data we want to apply the mask to

In [None]:
dc = datacube.Datacube(config='/g/data/u46/users/brl654/datacube/simoncube.conf')

In [None]:
try:
     with open(cache, 'rb') as file:
         z = pickle.load(file) # this only takes tens of seconds. (6GB)
except:
    #this may be 5 to 10 mintues (seeking through half a terabyte)
    z = dc.load(product='s1_gamma0_scene', lat=lat, lon=lon, output_crs='epsg:3577', resolution=(-25,25))#, time = time)
    with open(cache, 'wb') as file:
        pickle.dump(z, file, protocol=-1) # save result to disk

In [None]:
z

In [None]:
z.time

In [None]:
z.vh.mean(dim = 'time')[::10,::10].plot(cmap='inferno', vmax = 0.1)

In [None]:
from osgeo.osr import SpatialReference
# Get the WKT for EPSG:3577 (AGDC projection) 
spatial_ref_object = SpatialReference()
spatial_ref_object.ImportFromEPSG(3577)
spatial_ref = spatial_ref_object.ExportToWkt()

In [None]:
# Create the mask based on our shapefile
mask = geometry_mask(warp_geometry(shp_union, shp.crs, spatial_ref), z.geobox, invert=True)
# Get data only where the mask is 'true'
data_masked = z.where(mask)
print(data_masked)

In [None]:
# Plot the masked dataset
data_masked.mean('time').vh.plot()

In [None]:
data_masked_mean = data_masked.mean('time')
data_masked_mean

In [None]:
outfile = '/g/data/u46/users/sj9724/xarray.pickle.debbie_' + str(lat[0]) + '_' + str(lat[1]) + '_' + str(lon[0]) + '_' + str(lon[1]) + '.' + training_type
with open(outfile, 'wb') as file:
     pickle.dump(data_masked_mean, file, protocol=-1) # save result to disk

## Lets plot a histogram of the data that has come back from the mask

In [None]:
minval = 0.001
maxval = data_masked.vh.max()
bin_values = np.arange(start = minval, stop = maxval, step = 0.001)
data_masked.vh.plot.hist(bins = bin_values)