GET HOUSE DATA FOR LA COUNTY

In [16]:
import aiohttp
import asyncio
import nest_asyncio

nest_asyncio.apply()

buildings_url = "https://services.arcgis.com/RmCCgQtiZLDCtblq/ArcGIS/rest/services/Countywide_Building_Outlines/FeatureServer/1/query"

params = {
    "where": "HEIGHT BETWEEN 0 AND 33",        # Get all data
    "outFields": "*",       # Retrieve all available fields
    "resultRecordCount": 1000,  # Limit the number of records returned
    "f": "geojson"          # Get data in GeoJSON format
}

async def fetch(session, offset):
    params["resultOffset"] = offset
    async with session.get(buildings_url, params=params) as response:
        return await response.json()

async def main():
    all_data = []
    async with aiohttp.ClientSession() as session:
        tasks = [fetch(session, offset) for offset in range(0, 700000, 1000)]
        results = await asyncio.gather(*tasks)
        for data in results:
            if "features" in data:
                all_data.extend(data["features"])
    return all_data

all_data = asyncio.run(main())


In [17]:
from shapely.geometry import Polygon
import geopandas as gpd
features = []
geometries=[]
for feature in all_data:
    attributes = feature["properties"]  # Extract attributes
    geometry = feature['geometry']
    # Convert ArcGIS "rings" format to Shapely Polygon
    if geometry["type"] == "Polygon":  # Ensure it's a polygon
        polygon = Polygon(geometry["coordinates"][0])  # Extract outer boundary
        geometries.append(polygon)
        features.append(attributes)

# Convert to GeoDataFrame


In [18]:

buildings_data = gpd.GeoDataFrame(features, geometry=geometries, crs="EPSG:4326")


In [10]:

buildings_data=buildings_data[["SitusAddress","SitusCity","SitusZIP","geometry"]]

In [19]:
buildings_data.head()

Unnamed: 0,OBJECTID,CODE,BLD_ID,HEIGHT,ELEV,LARIAC_BUILDINGS_2014_AREA,SOURCE,DATE_,AIN,STATUS,...,Shape_Area_1,RuleID_1,Override1,Rule_2,Override2,RuleID3,Override,Shape__Area,Shape__Length,geometry
0,1,Building,495131743836,25.63,32.49,4365,LARIAC2,2008,7429018060,Unchanged,...,8871.11305,1,,1,,1,,588.441406,97.610808,"POLYGON ((-118.21885 33.7839, -118.21909 33.78..."
1,2,Building,495312745735,20.43,31.66,7910,LARIAC2,2008,7429028035,Unchanged,...,19529.216525,1,,1,,1,,1066.550781,130.64534,"POLYGON ((-118.21854 33.78929, -118.21824 33.7..."
2,3,Building,495311745574,10.65,20.44,2893,LARIAC2,2008,7429028030,Unchanged,...,3259.30935,1,,1,,1,,390.125,102.446327,"POLYGON ((-118.21824 33.78879, -118.21821 33.7..."
3,4,Building,495268745113,15.08,24.68,5788,LARIAC2,2008,7429029032,Unchanged,...,11077.699128,1,,1,,1,,780.390625,111.964207,"POLYGON ((-118.21864 33.78758, -118.21837 33.7..."
4,6,Building,495000745168,13.73,24.18,88,LARIAC2,2008,7429024027,Unchanged,...,6251.207597,1,,1,,1,,11.875,15.107804,"POLYGON ((-118.21942 33.78765, -118.2194 33.78..."


In [20]:
buildings_data['fulladdress']= buildings_data['SitusAddress'] + ' ' + buildings_data['SitusCity'] + ' ' + buildings_data['SitusZIP'].astype(str)

In [20]:
buildings_data

Unnamed: 0,OBJECTID,Address,City,Zipcode,Height,ABS_HT,geometry,fulladdress
0,1,,,,,521.580660,"POLYGON ((-117.98902 33.95513, -117.98904 33.9...",
1,2,,,,,665.691357,"POLYGON ((-117.97309 33.95583, -117.973 33.955...",
2,3,,,,,613.141221,"POLYGON ((-117.9791 33.95639, -117.97904 33.95...",
3,4,,,,,758.267211,"POLYGON ((-117.97417 33.95714, -117.97428 33.9...",
4,5,,,,,347.422286,"POLYGON ((-117.98265 33.94599, -117.98265 33.9...",
...,...,...,...,...,...,...,...,...
699995,699997,2344 N RIVERSIDE DR,SANTA ANA,92706,13.634913,158.588637,"POLYGON ((-117.87059 33.77, -117.87071 33.7699...",2344 N RIVERSIDE DR SANTA ANA 92706
699996,699998,321 E VIRGINIA AVE,SANTA ANA,92706,39.502468,196.284439,"POLYGON ((-117.86469 33.77214, -117.8648 33.77...",321 E VIRGINIA AVE SANTA ANA 92706
699997,699999,1935 N HELIOTROPE DR,SANTA ANA,92706,13.100521,144.644758,"POLYGON ((-117.87456 33.7631, -117.87474 33.76...",1935 N HELIOTROPE DR SANTA ANA 92706
699998,700000,935 W 21ST ST,SANTA ANA,92706,11.892386,136.256160,"POLYGON ((-117.87801 33.76338, -117.8782 33.76...",935 W 21ST ST SANTA ANA 92706


In [34]:
test_df=buildings_data.head(3)[1:] 

test_df

Unnamed: 0,OBJECTID,CODE,BLD_ID,HEIGHT,ELEV,LARIAC_BUILDINGS_2014_AREA,SOURCE,DATE_,AIN,STATUS,...,RuleID_1,Override1,Rule_2,Override2,RuleID3,Override,Shape__Area,Shape__Length,geometry,fulladdress
1,2,Building,495312745735,20.43,31.66,7910,LARIAC2,2008,7429028035,Unchanged,...,1,,1,,1,,1066.550781,130.64534,"POLYGON ((-118.21854 33.78929, -118.21824 33.7...",1758 HAYES AVE LONG BEACH CA 90813-1145
2,3,Building,495311745574,10.65,20.44,2893,LARIAC2,2008,7429028030,Unchanged,...,1,,1,,1,,390.125,102.446327,"POLYGON ((-118.21824 33.78879, -118.21821 33.7...",1730 HAYES AVE LONG BEACH CA 90813-1145


## WEBSCRAPER FOR LA BUILDINGS

BELOW CODE IS THE SCRIPT TO SCRAPE THE ASSESSED VALUE OF THE LA COUNTY WEBSITE   https://portal.assessor.lacounty.gov/ 

How it works is scrapes the AIN number from each building and returns the 2025 assessment value for that specific property. It is then appended to the `price` column of the dataframe

In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def scrape_property_data(ain):
    # Start the WebDriver
    driver = webdriver.Chrome()
    try:
        # Open the target website
        driver.get("https://portal.assessor.lacounty.gov/")
        time.sleep(2)  # Allow time for the page to load

        # Locate the search input field and enter the address
        search_input = driver.find_element(By.NAME, "basicsearchterm")
        search_input.send_keys(ain)
        time.sleep(1)  # Simulate typing delay
        search_input.send_keys(Keys.RETURN)

        # Wait for search results to load
        time.sleep(5)

        # Click the first search result row
        # first_result = driver.find_element(By.XPATH, "(//tr[@ng-repeat='row in pager.page'])[1]")
        # first_result.click()

        # Wait for the property details to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "tfoot .text-right+ .text-left-important .ng-binding"))
        )

        # Extract and return the scraped text
        scraped_text = driver.find_element(By.CSS_SELECTOR, "tfoot .text-right+ .text-left-important .ng-binding").text
        print(scraped_text)
        # return scraped_text
    except Exception as e:
        print("Error:", e)
        return None
    finally:
        driver.quit()
    
    return scraped_text


test_df['price'] = test_df['AIN'].apply(scrape_property_data)
test_df['price'].replace(',','',regex=True,inplace=True)
test_df['price']=test_df['price'].astype(int)

773,063
158,335


In [51]:
test_df['price'].replace(',','',regex=True,inplace=True)

In [56]:
test_df.dtypes

OBJECTID            int64
CODE               object
BLD_ID             object
HEIGHT            float64
ELEV              float64
                   ...   
Shape__Area       float64
Shape__Length     float64
geometry         geometry
fulladdress        object
price               int64
Length: 63, dtype: object