In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from shapely import wkt
from pointpats import random
from sklearn.cluster import KMeans
from geovoronoi import voronoi_regions_from_coords, points_to_coords
from shapely.ops import linemerge, unary_union, polygonize
from shapely.geometry import Polygon, MultiPoint, LineString

In [2]:
def generate_points(geom, distance_between_pts = 1e-6):
    
    # The setup
    boundary = geom.boundary # Boundary of polygon as a linestring
    boundary_length = boundary.length # Its length
    
    # Build a list of points spaced by 0.1 along this linestring:
    pts_boundary = [
        boundary.interpolate(n, False) for n
        in np.linspace(0, boundary_length, int(boundary_length / distance_between_pts) + 1)
    ]
    
    return pts_boundary

def subdivide_polygon(geom):
    points = generate_points(geom)
    new_points = [points[0], points[int(0.5*len(points))]]
    lines = [LineString(new_points)]
    lines.append(geom.boundary)
    lines = unary_union(lines)
    lines = linemerge(lines)
    polygons = list(polygonize(lines))
    
    return polygons

In [3]:
#--------------------
# block geodata
#--------------------
geodata = pd.read_csv(f'D:disaggregation-data/chicago-taxi/geodata/block.csv')
geodata['geometry'] = geodata['geometry'].apply(wkt.loads)
geodata = gpd.GeoDataFrame(geodata, crs='epsg:4326')

#--------------------
# parameters
#--------------------
seed=100
size=1000
n_clusters=2
np.random.seed(seed)
extreme_polygons = []

In [5]:
#--------------------
# split
#--------------------
for geom in tqdm(geodata.geometry):
    geom = geom.buffer(0)
    try:
        sub_polys = subdivide_polygon(geom)
        for sub_poly in sub_polys:
            if sub_poly.geom_type == 'MultiPolygon':
                for poly in list(sub_poly.geoms):
                    extreme_polygons.append(poly)
            else:
                extreme_polygons.append(sub_poly)
    except:
        extreme_polygons.append(geom)

100%|██████████████████████████████████████████████████████████████████████████████| 6810/6810 [03:23<00:00, 33.40it/s]


In [7]:
#--------------------
# save model
#--------------------
extreme_data = pd.DataFrame(extreme_polygons, columns=['the_geom'])
extreme_data.to_csv('D://disaggregation-data/chicago-taxi/raw-data/chicago_extreme_2010.csv', index=False)