In [9]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
import glob

from shapely.geometry import Point
from shapely.geometry import box
from shapely.geometry import mapping
from shapely.ops import unary_union, polygonize
from shapely.geometry import MultiPolygon

from joblib import Parallel, delayed
from tqdm import tqdm

# input my libraries
from tidyzoning import find_district_idx
from tidyzoning import find_bldg_type
from tidyzoning import check_land_use
from tidyzoning import get_zoning_req
from tidyzoning import check_fl_area
from tidyzoning import check_far
from tidyzoning import check_height
from tidyzoning import check_stories
from tidyzoning import check_bedrooms
from tidyzoning import check_lot_coverage
from tidyzoning import check_unit_density
from tidyzoning import check_unit_qty
from tidyzoning import add_setbacks
from tidyzoning import get_buildable_area
from tidyzoning import zoning_analysis_pipeline
from tidyzoning import unify_tidybuilding
from tidyzoning import filter_constraints
from tidyzoning import check_zoning_process
from tidyzoning import parcels_in_zoning
from tidyzoning import parcel_in_confidence
from tidyzoning import parcel_in_nonconfidence
from tidyzoning import generate_parcel_info
from tidyzoning import check_height_eave

from tidyzoning.check_footprint import check_footprint
from tidyzoning.check_unit_size import check_unit_size
from tidyzoning.process_all_districts import process_all_districts

from tidyzoning import compare_id_differences

In [None]:
tidyparcel_feeds = gpd.read_file(r"/Users/houpuli/Dropbox/ceshi/tidyparcel_feeds.geojson")
# 1. Filter out the centroid rows
lines = tidyparcel_feeds[tidyparcel_feeds['side'] != 'centroid']

# 2. Define a helper to turn a group of lines into a (multi)polygon
def lines_to_polygon(geom_series):
    merged = unary_union(geom_series)
    polys = list(polygonize(merged))
    if len(polys) == 1:
        return polys[0]
    else:
        return MultiPolygon(polys)

# 3. Apply per parcel_id
records = []
for pid, grp in lines.groupby('parcel_id'):
    poly = lines_to_polygon(grp.geometry)
    records.append({'parcel_id': pid, 'geometry': poly})

# 4. Build the new GeoDataFrame
tidyparcel_poly = gpd.GeoDataFrame(records,
                                   crs=tidyparcel_feeds.crs,
                                   geometry='geometry')
tidyparcel_poly

Unnamed: 0,parcel_id,geometry
0,Collin_County_combined_parcel_1,"POLYGON ((-96.53485 33.19556, -96.53499 33.195..."
1,Collin_County_combined_parcel_10,"POLYGON ((-96.79240 33.18075, -96.79260 33.180..."
2,Collin_County_combined_parcel_1000,"POLYGON ((-96.72820 33.07760, -96.72776 33.077..."
3,Collin_County_combined_parcel_10000,"POLYGON ((-96.73106 33.18067, -96.73106 33.180..."
4,Collin_County_combined_parcel_100000,"POLYGON ((-96.79686 33.00334, -96.79692 33.003..."
...,...,...
1953246,Wise_County_combined_parcel_995,"POLYGON ((-97.46807 33.05315, -97.46807 33.053..."
1953247,Wise_County_combined_parcel_996,"POLYGON ((-97.46807 33.05315, -97.46771 33.053..."
1953248,Wise_County_combined_parcel_997,"POLYGON ((-97.46815 33.05298, -97.46814 33.052..."
1953249,Wise_County_combined_parcel_998,"POLYGON ((-97.46796 33.05267, -97.46834 33.052..."


In [None]:
building_overture = gpd.read_file(r"/Users/houpuli/Dropbox/ceshi/building_overture/OneDrive_1_5-5-2025.zip")
building_overture['centrid'] = building_overture['geometry'].centroid
building_overture = gpd.GeoDataFrame(building_overture, geometry='centrid')
building_overture


  building_overture['centrid'] = building_overture['geometry'].centroid


Unnamed: 0,id,names,version,height,num_floors,is_undergr,num_floo_1,roof_shape,roof_direc,roof_orien,class,subtype,sources,bbox,geometry,centrid
0,08b26dda26052fff0200252f7d13a3a1,,0,,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-98.57355499267578,""xmax"":-98.57342529...","POLYGON ((-98.57354 33.75800, -98.57354 33.758...",POINT (-98.57349 33.75806)
1,08b26dda26053fff0200824eea447a43,,0,,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-98.57342529296875,""xmax"":-98.57331848...","POLYGON ((-98.57332 33.75787, -98.57342 33.757...",POINT (-98.57337 33.75792)
2,08b26dda26008fff0200b1781a575e62,,0,,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-98.573974609375,""xmax"":-98.5738525390...","POLYGON ((-98.57396 33.75946, -98.57395 33.759...",POINT (-98.57391 33.75949)
3,08b26dda2600cfff02001989ef62597e,,0,3.242130,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-98.573974609375,""xmax"":-98.5738525390...","POLYGON ((-98.57385 33.75957, -98.57396 33.759...",POINT (-98.57391 33.75964)
4,08b26dda2689afff02005f792d56e146,,0,,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-98.57506561279297,""xmax"":-98.57484436...","POLYGON ((-98.57489 33.76548, -98.57499 33.765...",POINT (-98.57495 33.76555)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3507593,08b26cd4848c4fff02005e8a00423ef6,,0,2.991583,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-96.18679809570313,""xmax"":-96.18667602...","POLYGON ((-96.18680 33.71890, -96.18668 33.718...",POINT (-96.18674 33.71887)
3507594,08b26cd4848f1fff0200a641cf1678e7,,0,,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-96.1871109008789,""xmax"":-96.187042236...","POLYGON ((-96.18708 33.71916, -96.18710 33.719...",POINT (-96.18708 33.71919)
3507595,08b26cd4848f1fff02007ee37c618b84,,0,2.817342,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-96.18690490722656,""xmax"":-96.18675231...","POLYGON ((-96.18676 33.71926, -96.18676 33.719...",POINT (-96.18682 33.71921)
3507596,08b26cd4848f0fff0200786ef731adeb,,0,4.370954,,0,,,,,,,"[{""property"":"""",""dataset"":""Microsoft ML Buildi...","{""xmin"":-96.1874008178711,""xmax"":-96.187278747...","POLYGON ((-96.18728 33.71959, -96.18728 33.719...",POINT (-96.18733 33.71954)


In [20]:
# 1) join without indicator
joined = gpd.sjoin(tidyparcel_poly, building_overture, how='left',predicate='intersects')
joined['_merge'] = np.where(joined['index_right'].isna(),'left_only','both')
tidyparcel_poly_no_point = joined[joined['_merge'] == 'left_only']
tidyparcel_poly_no_point

Unnamed: 0,parcel_id,geometry_left,index_right,id,names,version,height,num_floors,is_undergr,num_floo_1,roof_shape,roof_direc,roof_orien,class,subtype,sources,bbox,geometry_right,_merge
27,Collin_County_combined_parcel_100027,"POLYGON ((-96.81967 33.17864, -96.81968 33.178...",,,,,,,,,,,,,,,,,left_only
34,Collin_County_combined_parcel_100035,"POLYGON ((-96.61108 33.24262, -96.61109 33.242...",,,,,,,,,,,,,,,,,left_only
41,Collin_County_combined_parcel_100041,"POLYGON ((-96.66278 33.18031, -96.66278 33.180...",,,,,,,,,,,,,,,,,left_only
48,Collin_County_combined_parcel_100050,"POLYGON ((-96.55438 33.01101, -96.55446 33.011...",,,,,,,,,,,,,,,,,left_only
50,Collin_County_combined_parcel_100053,"POLYGON ((-96.67211 33.18288, -96.67201 33.183...",,,,,,,,,,,,,,,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1953154,Wise_County_combined_parcel_9382,"POLYGON ((-97.68634 33.14858, -97.68635 33.148...",,,,,,,,,,,,,,,,,left_only
1953156,Wise_County_combined_parcel_9384,"POLYGON ((-97.68592 33.14930, -97.68592 33.149...",,,,,,,,,,,,,,,,,left_only
1953180,Wise_County_combined_parcel_9512,"POLYGON ((-97.57984 33.23131, -97.58017 33.231...",,,,,,,,,,,,,,,,,left_only
1953185,Wise_County_combined_parcel_9556,"POLYGON ((-97.54602 33.07973, -97.54645 33.079...",,,,,,,,,,,,,,,,,left_only


In [None]:

vacant_id

array(['Collin_County_combined_parcel_100027',
       'Collin_County_combined_parcel_100035',
       'Collin_County_combined_parcel_100041', ...,
       'Wise_County_combined_parcel_9512',
       'Wise_County_combined_parcel_9556',
       'Wise_County_combined_parcel_9600'], dtype=object)

In [None]:

tidyparcel_van_occ

Unnamed: 0,parcel_id
4,Collin_County_combined_parcel_1
9,Collin_County_combined_parcel_10
14,Collin_County_combined_parcel_1000
19,Collin_County_combined_parcel_10000
24,Collin_County_combined_parcel_100000
...,...
10541993,Wise_County_combined_parcel_995
10541998,Wise_County_combined_parcel_996
10542004,Wise_County_combined_parcel_997
10542009,Wise_County_combined_parcel_998


In [None]:
vacant_id = tidyparcel_poly_no_point['parcel_id'].unique()
tidyparcel_van_occ = tidyparcel_feeds[tidyparcel_feeds['side'] == 'centroid'][['parcel_id']]

tidyparcel_van_occ['parcel status'] = np.where(
    tidyparcel_van_occ['parcel_id'].isin(vacant_id),
    'vacant_parcel',
    'occupied_parcel'
)

tidyparcel_feeds_land_use = pd.read_csv(r"/Users/houpuli/Dropbox/ceshi/tidyparcel_feeds_land_use.csv")

tidyparcel_attribute = pd.merge(
    tidyparcel_feeds_land_use,
    tidyparcel_van_occ,
    how='left',
    left_on='parcel_id',
    right_on='parcel_id'
)

tidyparcel_attribute.loc[tidyparcel_attribute['confidence'] == 'confidence_parcel', 'confidence'] = True
tidyparcel_attribute.loc[tidyparcel_attribute['confidence'] == 'non_confidence_parcel', 'confidence'] = False

tidyparcel_attribute

Unnamed: 0,parcel_id,landuse,landuse_spec,confidence,parcel status
0,Collin_County_combined_parcel_1,R,A1,True,occupied_parcel
1,Collin_County_combined_parcel_10,R,A1,True,occupied_parcel
2,Collin_County_combined_parcel_1000,R,A1,True,occupied_parcel
3,Collin_County_combined_parcel_10000,R,A1,True,occupied_parcel
4,Collin_County_combined_parcel_100000,R,A1,True,occupied_parcel
...,...,...,...,...,...
1953246,Wise_County_combined_parcel_995,,,False,occupied_parcel
1953247,Wise_County_combined_parcel_996,,,False,occupied_parcel
1953248,Wise_County_combined_parcel_997,,,False,occupied_parcel
1953249,Wise_County_combined_parcel_998,,,False,occupied_parcel
