In [1]:
import geopandas as gpd
from geofeather import from_geofeather
from geofeather import to_geofeather

In [1]:
# Data file path
# --data
#   --ParcelData
#     --statewide_parcel_data.feather
#     --statewide_parcel_data.feather.crs
#   --AddressData
#     --statewide_address_data.feather
#     --statewide_address_data.feather.crs

In [2]:
parcel_whole = from_geofeather("data/ParcelData/statewide_parcel_data.feather")
address_whole = from_geofeather("data/AddressData/statewide_address_data.feather")

In [3]:
parcel_quincy = parcel_whole[parcel_whole['CITY'] == "QUINCY"]
address_quincy = address_whole[address_whole['COMMUNITY_NAME'] == "QUINCY"]

In [4]:
print("Number of parcels in Quincy: %d." %(len(parcel_quincy)))
print("Number of addresses in Quincy: %d." %(len(address_quincy)))

Number of parcels in Quincy: 26537.
Number of addresses in Quincy: 56431.


In [None]:
parcel_qui

In [6]:
# Convert crs
parcel_quincy_converted = parcel_quincy.to_crs("EPSG:4326")
address_quincy_converted = address_quincy.to_crs("EPSG:4326")

In [23]:
# parcel_quincy_converted.info()
address_quincy_converted.STREET_NAME[:10]

2065620    WILLARD STREET
2065621    WILLARD STREET
2065622    WILLARD STREET
2065623    WILLARD STREET
2065646    WILLARD STREET
2065647    WILLARD STREET
2065648    WILLARD STREET
2065650    WILLARD STREET
2065651    WILLARD STREET
2065652    WILLARD STREET
Name: STREET_NAME, dtype: object

In [24]:
parcel_quincy_trimmed = parcel_quincy_converted[['OWN_ADDR', 'geometry']]
address_quincy_trimmed = address_quincy_converted[['STREET_NAME', 'geometry']]

In [55]:
parcel_quincy_trimmed_2 = parcel_quincy[['OWN_ADDR', 'geometry']]
address_quincy_trimmed_2 = address_quincy[['STREET_NAME', 'geometry']]

In [11]:
from datetime import datetime

In [26]:
print(parcel_quincy_trimmed.info())
print()
print(address_quincy_trimmed.info())

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 26537 entries, 207 to 2406350
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   OWN_ADDR  26425 non-null  object  
 1   geometry  26537 non-null  geometry
dtypes: geometry(1), object(1)
memory usage: 622.0+ KB
None

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 56431 entries, 2065620 to 2329774
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   STREET_NAME  56431 non-null  object  
 1   geometry     56431 non-null  geometry
dtypes: geometry(1), object(1)
memory usage: 1.3+ MB
None


In [56]:
#Merge using goepandas.sjoin
start_time = datetime.now()

joined_test_quincy = gpd.sjoin(address_quincy_trimmed_2, parcel_quincy_trimmed_2, how="inner", op="intersects")
# joined_test = gpd.sjoin(parcel_quincy_converted, address_quincy_converted, how="inner", op="within")

end_time = datetime.now()
print("Time cost: %.2fms" %((end_time - start_time).total_seconds() * 1000))

Time cost: 31356.43ms


In [41]:
parcel_quincy_trimmed.info()
address_quincy_trimmed.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 26537 entries, 207 to 2406350
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   OWN_ADDR  26425 non-null  object  
 1   geometry  26537 non-null  geometry
dtypes: geometry(1), object(1)
memory usage: 622.0+ KB
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 56431 entries, 2065620 to 2329774
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   STREET_NAME  56431 non-null  object  
 1   geometry     56431 non-null  geometry
dtypes: geometry(1), object(1)
memory usage: 1.3+ MB


In [58]:
joined_test_quincy[:10]

Unnamed: 0,STREET_NAME,geometry,index_right,OWN_ADDR
2065620,WILLARD STREET,POINT (238754.959 886609.775),1159314,PO BOX 87407
2065621,WILLARD STREET,POINT (238754.959 886609.775),1159314,PO BOX 87407
2065622,WILLARD STREET,POINT (238754.959 886609.775),1159314,PO BOX 87407
2065623,WILLARD STREET,POINT (238754.959 886609.775),1159314,PO BOX 87407
2065646,WILLARD STREET,POINT (238821.324 886625.242),1159314,PO BOX 87407
2065647,WILLARD STREET,POINT (238821.324 886625.242),1159314,PO BOX 87407
2065648,WILLARD STREET,POINT (238821.324 886625.242),1159314,PO BOX 87407
2065650,WILLARD STREET,POINT (238821.324 886625.242),1159314,PO BOX 87407
2065651,WILLARD STREET,POINT (238821.324 886625.242),1159314,PO BOX 87407
2065652,WILLARD STREET,POINT (238821.324 886625.242),1159314,PO BOX 87407


In [54]:
joined_test_quincy['index_right'].value_counts()

2004814    497
2321600    497
422215     497
2321793    497
1688726    497
          ... 
68881        1
1068345      1
1228135      1
1463802      1
2241559      1
Name: index_right, Length: 26081, dtype: int64