In [11]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
#load modules
import datacube
dc = datacube.Datacube()
from datacube.storage import masking
from datacube import Datacube
from datetime import datetime
from skimage import exposure

from datacube import helpers
import rasterio
from datacube_stats.statistics import GeoMedian
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

import geopandas as gpd
from sklearn.ensemble import ExtraTreesClassifier
import datacube_stats
import os
# Replace '156' and 'ck9738' with the path to your own home directory on the VDI
s2aws = Datacube(config='/home/547/ck9738/datacube-s2.conf')

In [13]:
#define nessisary gotiff handeling fuctions

import xarray, rasterio, numpy as np
def numpy_to_xarray(array, geobox, name=None):
    """Utility to convert ndarray to DataArray, using a datacube.model.GeoBox"""
    coords=[xarray.Coordinate(x, geobox.coords[x].values, attrs=dict(units=geobox.coords[x].units)) 
            for x in geobox.dims]
    return xarray.DataArray(array, coords=coords, attrs=dict(crs=geobox.crs), name=name)

def geopandas_to_xarray(table, geobox, name=None):
    """Rasterise (with reprojection)"""
    array = rasterio.features.rasterize(shapes=table.to_crs(geobox.crs._crs.ExportToProj4()).geometry,
                                        out_shape=(geobox.height, geobox.width),
                                        transform=geobox.affine)
    return numpy_to_xarray(array, geobox, name)

def rasterfile_to_xarray(file, geobox, name=None, nodata=True, num_bands=(1), X_band=1):
    """Blit like
    """
    with rasterio.open(file) as src:

        #|assert src.indexes == num_bands # assume single band
        band = rasterio.band(src, X_band) # do not attempt to read entire extent into memory
        array = np.empty((geobox.height, geobox.width), dtype=band.dtype)
        rasterio.warp.reproject(source=band,
                                destination=array,
                                dst_crs=geobox.crs.crs_str,
                                dst_transform=geobox.affine,
                                dst_nodata=nodata)
    return numpy_to_xarray(array, geobox, name)

In [14]:
#define area
# open  shapfile and data loat from that
def make_coords(filename):
    shapefile_loc='/g/data/u46/users/ck9738/Datasets/ML_for_smad_green/shapefiles_for_train/'+filename

    #import project area shapefiles
    project_area = gpd.read_file(shapefile_loc)

    #convert the shapefile to GDA94 lat-long coords so we can query dc_load using lat long
    project_area['geometry'] = project_area['geometry'].to_crs(epsg=4283)

    #find the bounding box that contains all the queried projects

    coords = project_area.total_bounds
    xmin, ymax, xmax, ymin =coords
    return(xmin, ymax, xmax, ymin)

def load_data(xmin, ymax, xmax, ymin):#load sentinal data
    sensors= ['s2a_ard_granule'] #pick the sentinal satelites you want

    bands_of_int =['red'
             #'blue', 'green', 'rededge1', 
             #'rededge2', 'rededge3', 'nir1', 'nir2', 'swir1', 'swir2', 'pixel_quality'
                  ] #pick the sentinal bands that you want, here i am just using visible light  

    query = {
            'lat': (ymin, ymax),
            'lon': (xmin, xmax),
            'output_crs': 'EPSG:3577',
            'resolution': (-10, 10),
            'time':('2016-01-01', '2016-03-30')
            }
      
# use s2b_ard_granule 	 for S2B
    data_sent = s2aws.load(product='s2a_ard_granule', measurements=bands_of_int, group_by='solar_day', **query)
    data_sent= data_sent.isel(time=0)
    return(data_sent)
#data_sent = data_sent.where(clear_pixels)


In [15]:
#turn array into panda series
def array_topanda_s(array):
    a,b = array.shape
    length_be = int(a)+int(b)
    try:
        flat = array.stack(z=('y','x'))
    except:
        flat = array.stack(z=('latitude', 'longitude'))
    #ones_array = np.ones_like(flat)
    #ones_array = np.where(flat)
    panda_series = pd.Series(flat)
    return(panda_series)

In [21]:
filelist = ['ncas_base_1.shp','ncas_base_2.shp','ncas_base_3.shp','ncas_base_4.shp']

master_panda = pd.DataFrame()

In [17]:
def create_slope(dem_data):
    """this is a rough and ready way to get a slope map out of a DEM, it is based on Numpy modules 
    and the definition slope = tan-1(rize/run). the product is a map of 'maximum slope' along either axis of the array
    
    input= DEM. digital elevation model DataSet from dc.load(product='dsm1sv10')
    Output= DEM DataSet with Slope added as a variable """
    
    elevation = dem_data.elevation[0,:,:]# get the 2D DataArray from the dataset

    #convert the elevation data to gradient using g=rize over run
    datagrad2,datagrad3 = np.gradient(elevation, axis=(0,1))

    #convert to degrees slope using slope = tan-1(gradient).
 
    dataslope_ns = np.rad2deg(np.arctan(datagrad2))
    dataslope_ew = np.rad2deg(np.arctan(datagrad3))

    #turn angle of slope into absolute values as we don't acually want negative slope
    dataslope_ns_abs = np.absolute(dataslope_ns)
    dataslope_ew_abs = np.absolute(dataslope_ew)

    #combine N-S and E-W slopemaps into a sigle map, with largest values for each square
    dataslope_all = np.maximum(dataslope_ns_abs, dataslope_ew_abs)
    
    dataslope_all = xr.DataArray(dataslope_all,dims=('latitude', 'longitude'))
    
    dem_data['slope'] = dataslope_all
    return(dem_data)

In [22]:


for files in filelist:
    #create panda_dataframe
    #open some sent data
    xmin, ymax, xmax, ymin = make_coords(files)
    query = {}
    query['x'] = (xmin,xmax)
    query['y'] = (ymin,ymax)

    dsm = dc.load(product='dsm1sv10',**query)
    slope = create_slope(dsm) #define the Geobox.
    geobox =dsm.geobox
    
    #use sent geobox to load up ncas product and smad_green prod.
    ncas = rasterfile_to_xarray("/g/data/u46/users/fxy120/australian_woody/mosaics/lztmre_aus_y20002011_dm7a2_d20050630.tif",
                                geobox, X_band=(1) )
    
    ncas_class = np.logical_and(ncas>110, ncas<200)
    #ncas_class = np.ones_like(ncas_class)
    #change shape to 1D add to dataframe
    ncass_series = array_topanda_s(ncas_class)
    little_panda = pd.DataFrame(ncass_series, columns=['class'])
    little_panda = little_panda.replace(to_replace=[False,True], value=[0,1])
    
    little_panda['slope'] = array_topanda_s(slope.slope)
    #load smad_green product
    
    group_smad_green = {}
    bands=[1,2,3]
    
    for band in bands:
        smad_green = rasterfile_to_xarray('/g/data/u46/users/dxr251/woody-v6/woody_60_-149.tif',
                                   geobox, num_bands=(1,2,3), X_band=band)
        smad_green_series = array_topanda_s(smad_green)
    
        little_panda[band] = smad_green_series


        
    #add slope as a variable
    slope_series = array_topanda_s(slope.slope)
    
    
    master_panda = master_panda.append(little_panda,ignore_index=True)
master_panda

I am densified (external_values, 1 elements)


  import sys


I am densified (external_values, 1 elements)
I am densified (external_values, 1 elements)
I am densified (external_values, 1 elements)


Unnamed: 0,class,slope,1,2,3
0,0,66.852669,0.011901,0.478185,0.448217
1,0,64.765541,0.007710,0.443336,0.430334
2,0,58.649479,0.006552,0.452418,0.443837
3,0,48.925175,0.006581,0.444237,0.442053
4,0,44.492859,0.006155,0.424542,0.444962
5,0,52.390156,0.006485,0.437431,0.445485
6,0,56.894775,0.004184,0.419859,0.421206
7,0,47.494415,0.001740,0.341268,0.346248
8,0,27.411964,0.002779,0.343427,0.338281
9,0,22.397017,0.002023,0.325734,0.328925


In [23]:
master_panda.to_csv('/g/data/u46/users/ck9738/Datasets/ML_for_smad_green/training_datasets/ncas_truthed_landsat_wslope.csv')

In [None]:
fig, ax = plt.subplots(figsize = (12,12))
cax = ax.imshow(dataslope_all)

plt.show()

In [None]:
dataslope_all