In [64]:
import geopandas as gp
import pandas as pd
from datetime import datetime

In [3]:
#reads the TriCOG Land Bank service area footprint

tricog = gp.read_file("../data/tricog_dissolved.zip")

In [None]:
#read the allegheny county parcel file

allegheny_county_parcels = gp.read_file("../data/alleghenycounty_parcels202504.geojson")

In [4]:
#perform a spatial join between the TriCOG footprint and the county's parcels to find parcels within TriCOG

parcels_within_tricog = gp.sjoin(allegheny_county_parcels, tricog.to_crs("EPSG:4326"))

In [None]:
clipped_parcels = gp.clip(allegheny_county_parcels, tricog.to_crs("EPSG:4326"))

In [None]:
#read the assessment file and rename the parcel ID column "PIN"
with open("../data_assessments_apr_2025.csv", newline="") as f:
    allegheny_county_assessments_list = [row for row in csv.reader(f, delimiter=',')]
allegheny_county_assessments_list[0][0] = 'PIN'

In [None]:
#convert the list of lists to a dataframe

allegheny_county_assessments = pd.DataFrame(allegheny_county_assessments_list[1:], columns = allegheny_county_assessments_list[0])

In [None]:
#Perform an inner join that connects the parcels within the TriCOG footprint to their assessment data
assessments_on_clipped = clipped_parcels.merge(allegheny_county_assessments, how='inner', on='PIN')

In [97]:
#convert sale data dates to datetime and only keep houses that were last sold before 2022
#since we need houses with at least three years' tax delinquency, houses sold after 2022 could not meet that criteria

assessments_on_clipped['SALEDATE'] = pd.to_datetime(assessments_on_clipped['SALEDATE'], format='%m-%d-%Y')
assessments_on_clipped['PREVSALEDATE'] = pd.to_datetime(assessments_on_clipped['PREVSALEDATE'], format='%m-%d-%Y')
cutoff = datetime(2022,1,1)
older_sales = assessments_on_clipped[assessments_on_clipped['SALEDATE']<cutoff]

In [101]:
#creates a dict to use as a lookup to find the sale date of a parcel given its parcel id

pin_to_sale_date = {row.PIN: row.SALEDATE for row in older_sales.itertuples()}

In [112]:
#read_the_liens_file
with open("../data/liens.csv", newline="") as f:
    liens_list = [row for row in csv.reader(f, delimiter=',')]
liens = pd.DataFrame(liens_list[1:], columns = liens_list[0])

In [151]:
#look for liens associated with any parcel that was sold before 2022 and
#is in the TriCOG footprint

liens_on_older_sales = liens[liens.pin.isin(older_sales.PIN)]

In [131]:
#creates a pandas DataFrame of liens from before 2022 that are associated with the current owner (and not liens that are attached to previous owners)

liens_on_current_owners = []
for row in liens_on_older_sales.itertuples():
    if int(row.tax_year) > pin_to_sale_date[row.pin].year:
        liens_on_current_owners.append(row)
liens_on_current_owners = pd.DataFrame(liens_on_current_owners)

In [133]:
#creates a dataframe slice of assessments of relevant parcels that have never satisfied a lien

satisfied_liens = liens_on_current_owners[liens_on_current_owners['satisfied']=='t']
outstanding_liens = liens_on_current_owners[~liens_on_current_owners.pin.isin(satisfied_liens.pin)]
assessments_with_outstanding_liens = older_sales[older_sales.PIN.isin(outstanding_liens.pin)]

In [145]:
#isolates parcels that are either vacant lots or single-family houses

residential_parcels = assessments_with_outstanding_liens[assessments_with_outstanding_liens.CLASSDESC=='RESIDENTIAL']
vacant_eligible_parcels = residential_parcels[residential_parcels['USEDESC'].isin(['RESIDENTIAL VACANT LAND', 'VACANT LAND'])]
single_family_parcels = residential_parcels[residential_parcels.USEDESC.isin(['SINGLE FAMILY'])]

In [144]:
#reads the MVA file, isolates DEF MVAs within TriCOG footprint

mva = gp.read_file("../data/pitts_allegheny_mva2021.zip")
def_mvas = mva[mva.MVA21.isin(['D','E','F'])]
mvas_in_tcog = gp.clip(def_mvas, tricog.to_crs("EPSG:4326"))

In [148]:
#isolates relevant single-family housing parcels within DEF MVAs in TriCOG boundaries

single_family_in_mva = gp.clip(single_family_parcels, def_mvas)