In [15]:
import geopandas as gpd
from shapely.geometry.polygon import Polygon
from shapely.geometry.multipolygon import MultiPolygon
from geopandas.tools import sjoin
import glob
import os

In [16]:
def explodeMultiPolygons(file_in):
    df_in = gpd.GeoDataFrame.from_file(file_in)
    df_out = gpd.GeoDataFrame(columns=df_in.columns)
    for idx, row in df_in.iterrows():
        if type(row.geometry) == Polygon:
            df_out = df_out.append(row,ignore_index=True)
        if type(row.geometry) == MultiPolygon:
            df_mult = gpd.GeoDataFrame(columns=df_in.columns)
            recs = len(row.geometry)
            df_mult = df_mult.append([row]*recs,ignore_index=True)
            for geom in range(recs):
                df_mult.loc[geom,'geometry'] = row.geometry[geom]
            df_out = df_out.append(df_mult,ignore_index=True)
    return df_out

In [17]:
def removeMultipolygons(geodataframe):
    df_in = geodataframe
    df_out = gpd.GeoDataFrame(columns=df_in.columns)
    for idx, row in df_in.iterrows():
        if type(row.geometry) == Polygon:
            df_out = df_out.append(row,ignore_index=True)
        else:
            continue
    return df_out

In [18]:
def loadFile(file_path):
    shape = gpd.GeoDataFrame(gpd.read_file(file_path))
    return shape

In [19]:
def buildingsInParcels(buildings, parcels, new_file):
    buildings = loadFile(buildings)
    parcels = loadFile(parcels)
    affected_buildings = gpd.sjoin(buildings, parcels, op='within')
    affected_buildings = removeMultipolygons(affected_buildings)
    affected_buildings.to_file(new_file)
    return affected_buildings

In [20]:
def getBaseName(string):
    string = string.split('_')[-1]
    string = string.split('.')[0]
    return string

In [21]:
path_base = "/media/seanandrewchen/seanchen_ssd/gis-data/"
path_parcels = "training-data/affected-parcels-by-county/"
path_structures = "structures/structures-by-county/"

counties_parcels = sorted(glob.glob(path_base + path_parcels + "/*.shp"))
counties_structures = sorted(glob.glob(path_base + path_structures + "/*.shp"))

counties = []

for e in counties_structures:
    counties.append(getBaseName(e))

files = list(zip(counties_structures, counties_parcels, counties))

In [22]:
files

[('/media/seanandrewchen/seanchen_ssd/gis-data/structures/structures-by-county/clipped_structures_aransas.shp',
  '/media/seanandrewchen/seanchen_ssd/gis-data/training-data/affected-parcels-by-county/affected-parcels-Aransas.shp',
  'aransas'),
 ('/media/seanandrewchen/seanchen_ssd/gis-data/structures/structures-by-county/clipped_structures_brazoria.shp',
  '/media/seanandrewchen/seanchen_ssd/gis-data/training-data/affected-parcels-by-county/affected-parcels-Brazoria.shp',
  'brazoria'),
 ('/media/seanandrewchen/seanchen_ssd/gis-data/structures/structures-by-county/clipped_structures_calhoun.shp',
  '/media/seanandrewchen/seanchen_ssd/gis-data/training-data/affected-parcels-by-county/affected-parcels-Calhoun.shp',
  'calhoun'),
 ('/media/seanandrewchen/seanchen_ssd/gis-data/structures/structures-by-county/clipped_structures_fortbend.shp',
  '/media/seanandrewchen/seanchen_ssd/gis-data/training-data/affected-parcels-by-county/affected-parcels-FortBend.shp',
  'fortbend'),
 ('/media/sean

In [23]:
for index, file in enumerate(files):
    print(str(index) + ":" + file[2])

0:aransas
1:brazoria
2:calhoun
3:fortbend
4:galveston
5:harris
6:jefferson
7:matagorda
8:montgomery
9:nueces
10:orange
11:victoria
12:wharton


In [14]:
for file in files:
    new_name = "affected_structures_" + file[2] + ".shp"
    buildingsInParcels(file[0], file[1], new_name)

In [None]:
buildingsInParcels(files[5][0], files[5][1], "affected_structures_harris.shp")