In [74]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [75]:
from datacube import helpers

In [76]:
help(helpers.write_geotiff)

Help on function write_geotiff in module datacube.helpers:

write_geotiff(filename, dataset, profile_override=None, time_index=None)
    Write an ODC style xarray.Dataset to a GeoTIFF file.
    
    :param filename: Output filename
    :param dataset: xarray dataset containing one or more bands to write to a file.
    :param profile_override: option dict, overrides rasterio file creation options.
    :param time_index: DEPRECATED



In [77]:
#load modules
import datacube
from datacube.storage import masking
from datacube import Datacube
from datetime import datetime
from skimage import exposure

import rasterio
from datacube_stats.statistics import GeoMedian
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

import geopandas as gpd
from sklearn.ensemble import ExtraTreesClassifier
import datacube_stats
import os
# Replace '156' and 'ck9738' with the path to your own home directory on the VDI
s2aws = Datacube(config='/home/547/ck9738/datacube-s2.conf')

In [78]:
#define nessisary gotiff handeling fuctions

import xarray, rasterio, numpy as np
def numpy_to_xarray(array, geobox, name=None):
    """Utility to convert ndarray to DataArray, using a datacube.model.GeoBox"""
    coords=[xarray.Coordinate(x, geobox.coords[x].values, attrs=dict(units=geobox.coords[x].units)) 
            for x in geobox.dims]
    return xarray.DataArray(array, coords=coords, attrs=dict(crs=geobox.crs), name=name)

def geopandas_to_xarray(table, geobox, name=None):
    """Rasterise (with reprojection)"""
    array = rasterio.features.rasterize(shapes=table.to_crs(geobox.crs._crs.ExportToProj4()).geometry,
                                        out_shape=(geobox.height, geobox.width),
                                        transform=geobox.affine)
    return numpy_to_xarray(array, geobox, name)

def rasterfile_to_xarray(file, geobox, name=None, nodata=True, num_bands=(1), X_band= 1):
    """Blit like
    """
    with rasterio.open(file) as src:

        #|assert src.indexes == num_bands # assume single band
        band = rasterio.band(src, X_band) # do not attempt to read entire extent into memory
        array = np.empty((geobox.height, geobox.width), dtype=band.dtype)
        rasterio.warp.reproject(source=band,
                                destination=array,
                                dst_crs=geobox.crs.crs_str,
                                dst_transform=geobox.affine,
                                dst_nodata=nodata)
    return numpy_to_xarray(array, geobox, name)

In [79]:
#define area
# open  shapfile and get the lan/lon from that 
def make_coords(filename):
    shapefile_loc='/g/data/u46/users/ck9738/Datasets/ML_for_smad_green/shapefiles_for_train/'+filename

    #import project area shapefiles
    project_area = gpd.read_file(shapefile_loc)

    #convert the shapefile to GDA94 lat-long coords so we can query dc_load using lat long
    project_area['geometry'] = project_area['geometry'].to_crs(epsg=4283)

    #find the bounding box that contains all the queried projects

    coords = project_area.total_bounds
    xmin, ymax, xmax, ymin =coords
    return(xmin, ymax, xmax, ymin)

def load_data(xmin, ymax, xmax, ymin):#load sentinal data
    sensors= ['s2a_ard_granule'] #pick the sentinal satelites you want

    bands_of_int =['red'
             #'blue', 'green', 'rededge1', 
             #'rededge2', 'rededge3', 'nir1', 'nir2', 'swir1', 'swir2', 'pixel_quality'
                  ] #pick the sentinal bands that you want, here i am just using visible light  

    query = {
            'lat': (ymin, ymax),
            'lon': (xmin, xmax),
            'output_crs': 'EPSG:3577',
            'resolution': (-10, 10),
            'time':('2016-01-01', '2016-03-30')
            }
      
# use s2b_ard_granule 	 for S2B
    data_sent = s2aws.load(product='s2a_ard_granule', measurements=bands_of_int, group_by='solar_day', **query)
    data_sent= data_sent.isel(time=0)
    return(data_sent)
#data_sent = data_sent.where(clear_pixels)


In [80]:
#turn array into panda series
def array_topanda_s(array):
    a,b = array.shape
    length_be = int(a)+int(b)
    flat = array.stack(z=('y','x'))
    #ones_array = np.ones_like(flat)
    #ones_array = np.where(flat)
    panda_series = pd.Series(flat)
    return(panda_series)

In [81]:
filelist = ['ncas_base_1.shp','ncas_base_2.shp','ncas_base_3.shp','ncas_base_4.shp']

master_panda = pd.DataFrame()

In [82]:


for files in filelist:
    #create panda_dataframe
    #open some sent data
    xmin, ymax, xmax, ymin = make_coords(files)
    sent_data = load_data(xmin, ymax, xmax, ymin)
    #use sent geobox to load up ncas product and smad_green prod.
    ncas = rasterfile_to_xarray("/g/data/u46/users/fxy120/australian_woody/mosaics/lztmre_aus_y20002011_dm7a2_d20050630.tif",
                               sent_data.geobox, num_bands=(1), X_band= 1)
    
    ncas_class = np.logical_and(ncas>124, ncas<200)
    #ncas_class = np.ones_like(ncas_class)
    #change shape to 1D add to dataframe
    ncass_series = array_topanda_s(ncas_class)
    little_panda = pd.DataFrame(ncass_series, columns=['class'])
    little_panda = little_panda.replace(to_replace=[False,True], value=[0,1])
    #load smad_green product
    bands=[1,2,3]
    for band in bands:
        smad_green = rasterfile_to_xarray('/g/data/u46/users/dxr251/woody-v6/woody_60_-149.tif',
                                   sent_data.geobox, num_bands=(1,2,3), X_band=band)
        smad_green_series = array_topanda_s(smad_green)
    
        little_panda[band] = smad_green_series
    
    master_panda = master_panda.append(little_panda,ignore_index=True)
master_panda

I am densified (external_values, 1 elements)


  import sys


I am densified (external_values, 1 elements)
I am densified (external_values, 1 elements)
I am densified (external_values, 1 elements)


Unnamed: 0,class,1,2,3
0,0,0.003713,0.508305,0.498965
1,0,0.005193,0.577651,0.553511
2,0,0.005193,0.577651,0.553511
3,0,0.005193,0.577651,0.553511
4,0,0.009352,0.661980,0.591735
5,0,0.009352,0.661980,0.591735
6,1,0.004694,0.668501,0.624123
7,1,0.004694,0.668501,0.624123
8,1,0.004694,0.668501,0.624123
9,1,0.002938,0.536924,0.518079


In [83]:
master_panda.to_csv('/g/data/u46/users/ck9738/Datasets/ML_for_smad_green/shapefiles_for_train/ncas_truthed_landsat_124.csv')