# Match up polygons and create attributes

In [1]:
%pylab notebook

from datacube import Datacube
from datacube.utils import geometry
from datacube.storage import masking
import fiona
import rasterio.features
import numpy as np
import geopandas as gp
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


## Set up the file paths

In [2]:
# All years processed so far
CumulativeDamShapeFile = '/g/data/r78/cek156/dea-notebooks/Dams/UpTo2013Test.shp'

# New year to add to the cumulative shapefile
NewYearToAdd = '/g/data/r78/cek156/dea-notebooks/Dams/Test2014.shp'

# Output shapefile
CumulativeDamShapeFileOutput = '/g/data/r78/cek156/dea-notebooks/Dams/TestMergeYears.shp'
NaughtyShapefile = '/g/data/r78/cek156/dea-notebooks/Dams/TestNaughty.shp'

## Read in the shapefiles

In [3]:
CumulativeDams = gp.read_file(CumulativeDamShapeFile)
NewDams = gp.read_file(NewYearToAdd)

## Find where the new polygons are contained within the master polygons

I.e. where the new polygons sit completely inside the master ones. This means the new polygons are only seeing a part of the water. 

In [4]:
# Perform a spatial join for polygons that are contained within the master polygon set
Contains = gp.sjoin(CumulativeDams, NewDams, how='inner', op='contains')

# Get the index of the new polygons that are contained within the master polygon dataset
ContainsIndex = sorted(Contains['index_right'])

### Remove the polygons that are contained within the master polygon set

In [5]:
NewDamsNotContained = NewDams.loc[~NewDams.index.isin(ContainsIndex)]

## Find where our new polygons intersect with the master polygons

In [6]:
# Perform a spatial join for polygons that intersect
IntersectingPolys = gp.sjoin(NewDamsNotContained, CumulativeDams, how='inner', op='intersects')

## Group polygons based on the new polygons' index

This will allow for multiple old polygons to match up with a single new polygon

In [26]:
GroupedPolygonsNew = IntersectingPolys.groupby(IntersectingPolys.index)

## Group polygons based on the master polygons' index

This will allow for multiple new polygons to match up with a single master polygon

In [24]:
GroupedPolygonsMaster = IntersectingPolys.groupby(IntersectingPolys['index_right'])

# Now deal with all our overlapping polygons

### Sort the polygons based on where:

- New and master polygon areas are < 20 pixels or <= 20% of area different
    - Append the index of the new larger polygon
    - We will keep the larger polygon and replace it in the master polygon set
- New polygon is much larger than the original
    - Append the indexes of both polygons
    - We will move both polygons (new one and master) into the 'naughty corner'

In [8]:
PolygonsToKeep = []
MasterPolygonsToMerge = []
NaughtyCornerMaster = []
NaughtyCornerNew = []
MergedPolygons = gp.GeoDataFrame()

for groups in GroupedPolygonsNew:
    LargerArea = groups[1]['area_left'].values
    SmallerArea = groups[1]['area_right'].values
    for ix, item in enumerate(SmallerArea):
        # Check if the two polygons are within 20 pixels or 20% of area in size 
        if (LargerArea[0] - int(item) < 12500) or (abs((LargerArea[0] - int(item))/int(item) * 100) <= 20):
            # Append new larger area index
            PolygonsToKeep.append(groups[1].index.values[ix])
            # Append the master polygon index so we know to replace it
            MasterPolygonsToMerge.append(groups[1]['index_right'].values[ix])
            # Now create a unary union of the two polygons and write it to a new dataframe
            MasterToUnion = CumulativeDams.loc[groups[1]['index_right']]
            NewToUnion = NewDamsNotContained.loc[groups[1].index]
            MergedGeometry = pd.concat([MasterToUnion, NewToUnion]).unary_union
            MergedGDF = gp.GeoDataFrame(geometry = [MergedGeometry])
            MergedGDF['area'] = MergedGeometry.area
            MergedPolygons = pd.concat([MergedPolygons, MergedGDF], ignore_index=True)
        # The polygons are too different, so we will deal with them later
        else:
            # Append larger area index
            NaughtyCornerNew.append(groups[1].index.values[ix])
            # Append the master index
            NaughtyCornerMaster.append(groups[1]['index_right'].values[ix])

## Create our new master polygon list and update the naughty corner

The new master polygon set will be comprised of:
- Master polygons NOT in the `MasterPolygonsToMerge` index list
- New updated polygons from the `MergedPolygons` index list
- New polygons identified, but not in master (not in `NaughtyCornerNew` or `PolygonsToKeep`)

The new 'naughty corner' polygon set will be comprised of:
- Master polygons in the `NaughtyCornerMaster` index list
- New polygons in the `NaughtyCornerNew` index list

In [9]:
MastersToKeep = CumulativeDams.loc[~CumulativeDams.index.isin(MasterPolygonsToMerge + NaughtyCornerMaster)]
NewlyFoundDams = NewDamsNotContained.loc[~NewDamsNotContained.index.isin(NaughtyCornerNew + PolygonsToKeep)]

NewMasterPolygons = pd.concat([MastersToKeep, MergedPolygons, NewlyFoundDams], ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  after removing the cwd from sys.path.


In [10]:
MastersToMove = CumulativeDams.loc[NaughtyCornerMaster]
NewToMove = NewDamsNotContained.loc[NaughtyCornerNew]

NaughtyCorner = pd.concat([MastersToMove, NewToMove], ignore_index=True)

## Write out the results to shapefile

In [11]:
NewMasterPolygons.to_file(CumulativeDamShapeFileOutput)
NaughtyCorner.to_file(NaughtyShapefile)

In [16]:
MastersToKeep.to_file('MastersToKeep.shp')

In [17]:
MergedPolygons.crs = {'init':'epsg:3577'}

In [18]:
MergedPolygons.to_file('MergedPolygons.shp')

In [19]:
NewlyFoundDams.to_file('NewlyFoundDams.shp')