# Get dam time history - parallel workflow

In [2]:
from datacube import Datacube
from datacube.utils import geometry
from datacube.storage import masking
import fiona
import rasterio.features
import numpy as np
import csv
import multiprocessing

Populating the interactive namespace from numpy and matplotlib


In [13]:
# Load in the shapefile of the dams
print('Loading the shapefile')
shape_file = '/g/data/r78/cek156/dea-notebooks/Crop_mapping/Dams/OFS201213ID.shp'

def FindOutHowFullTheDamIs(shapes, crs):
    dc = Datacube(app = 'Polygon drill')
    first_geometry = shapes['geometry']
    polyName = shapes['properties']['ID']
    print(polyName)
    polyArea = shapes['properties']['area']
    geom = geometry.Geometry(first_geometry, crs=crs)

    ## Set up the query, and load in all of the WOFS layers
    query = {'geopolygon': geom}
    WOFL = dc.load(product='wofs_albers', **query)

    # Make a mask based on the polygon (to remove extra data outside of the polygon)
    mask = rasterio.features.geometry_mask([geom.to_crs(WOFL.geobox.crs) for geoms in [geom]],
                                           out_shape=WOFL.geobox.shape,
                                           transform=WOFL.geobox.affine,
                                           all_touched=False,
                                           invert=True)
    ## Work out how full the dam is at every time step
    DamCapacityPc = []
    DamCapacityCt = []
    DryObserved = []
    InvalidObservations = []
    for ix, times in enumerate(WOFL.time):
        # Grab the data for our timestep
        AllTheBitFlags = WOFL.water.isel(time = ix)
        # Find all the wet/dry pixels for that timestep
        WetPixels = masking.make_mask(AllTheBitFlags, wet=True)
        DryPixels = masking.make_mask(AllTheBitFlags, dry=True)
        # Apply the mask and count the number of observations
        MaskedAll = AllTheBitFlags.where(mask).count().item()
        MaskedWet = WetPixels.where(mask).sum().item()
        MaskedDry = DryPixels.where(mask).sum().item()
        # Turn our counts into percents
        try:
            WaterPercent = MaskedWet / MaskedAll * 100
            DryPercent = MaskedDry / MaskedAll * 100
            UnknownPercent = (MaskedAll - (MaskedWet + MaskedDry)) / MaskedAll *100
        except ZeroDivisionError:
            WaterPercent = 0.0
            DryPercent = 0.0
            UnknownPercent = 100.0
        # Append the percentages to a list for each timestep
        DamCapacityPc.append(WaterPercent)
        InvalidObservations.append(UnknownPercent)
        DryObserved.append(DryPercent)
        DamCapacityCt.append(MaskedWet)

    ## Filter out timesteps with less than 90% valid observations 
    ValidMask = [i for i, x in enumerate(InvalidObservations) if x < 10]
    ValidObs = WOFL.time[ValidMask].dropna(dim = 'time')
    ValidCapacityPc = [DamCapacityPc[i] for i in ValidMask]
    ValidCapacityCt = [DamCapacityCt[i] for i in ValidMask]

    DateList = ValidObs.to_dataframe().to_csv(None, header=False, index=False).split('\n')
    rows = zip(DateList,ValidCapacityCt,ValidCapacityPc)

    if DateList:
        with open('/g/data/r78/cek156/dea-notebooks/Crop_mapping/Dams/TestOutputs/{}.txt'.format(polyName), 'w') as f:
            writer = csv.writer(f)
            Headings = ['Observation Date', 'Wet pixel count (n = {0})'.format(MaskedAll), 'Wet pixel percentage']
            writer.writerow(Headings)
            for row in rows:
                writer.writerow(row)

p = multiprocessing.Pool()

with fiona.open(shape_file) as shapes:
    crs = geometry.CRS(shapes.crs_wkt) 

for shapes in fiona.open(shape_file):
    # launch a process for each file (ish).
    # The result will be approximately one process per CPU core available.
    p.apply_async(FindOutHowFullTheDamIs, [shapes, crs]) 
    #FindOutHowFullTheDamIs(shapes,crs)

KeyboardInterrupt: 