GET HOUSE DATA FOR LA COUNTY

In [10]:
import aiohttp
import asyncio
import nest_asyncio

nest_asyncio.apply()  # Required if running in Jupyter Notebook

# Base URL for the API query
BASE_URL = "https://services.arcgis.com/RmCCgQtiZLDCtblq/arcgis/rest/services/Countywide_Building_Outlines/FeatureServer/1/query"

# Parameters for the query
QUERY_PARAMS = {
    "where": "(HEIGHT < 33) AND UseType = 'RESIDENTIAL' AND SitusCity IN('LOS ANGELES CA','BEVERLY HILLS CA',  'PALMDALE')",
    "outFields": "*",
    "outSR": "4326",
    "f": "json",
    "resultRecordCount": 1000,  # Fetch 1000 records per request
}

async def fetch_total_count():
    """Fetch total number of matching records."""
    params = QUERY_PARAMS.copy()
    params["returnCountOnly"] = "true"

    async with aiohttp.ClientSession() as session:
        async with session.get(BASE_URL, params=params) as response:
            data = await response.json()
            return data.get("count", 0)  # Extract total count

async def fetch(session, offset):
    """Fetch a batch of records using pagination."""
    params = QUERY_PARAMS.copy()
    params["resultOffset"] = offset

    async with session.get(BASE_URL, params=params) as response:
        return await response.json()

async def main():
    """Fetch all records asynchronously with pagination."""
    all_data = []
    total_count = await fetch_total_count()
    print(f"Total Records to Retrieve: {total_count}")

    semaphore = asyncio.Semaphore(10)  # Limit concurrency to prevent API overload

    async with aiohttp.ClientSession() as session:
        async def bound_fetch(offset):
            async with semaphore:
                data = await fetch(session, offset)
                return data

        # Generate tasks for pagination
        tasks = [bound_fetch(offset) for offset in range(0, total_count, 1000)]
        results = await asyncio.gather(*tasks)

        for data in results:
            if "features" in data:
                all_data.extend(data["features"])

    print(f"Total Records Retrieved: {len(all_data)}")
    return all_data

# Run the async function
all_data = asyncio.run(main())


Total Records to Retrieve: 1037863


KeyboardInterrupt: 

In [5]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon

# Extract features and geometries
features = []
geometries = []

for feature in all_data:
    attributes = feature["attributes"]  # Correct key for attributes
    geometry = feature.get("geometry", {})

    # Convert ArcGIS "rings" (list of coordinates) to Shapely Polygon
    if "rings" in geometry and isinstance(geometry["rings"], list):
        polygon = Polygon(geometry["rings"][0])  # Use first ring for polygon
        geometries.append(polygon)
        features.append(attributes)

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(features, geometry=geometries, crs="EPSG:4326")

# Display first few rows

# Convert to GeoDataFrame

In [7]:
gdf.columns

Index(['OBJECTID', 'CODE', 'BLD_ID', 'HEIGHT', 'ELEV',
       'LARIAC_BUILDINGS_2014_AREA', 'SOURCE', 'DATE_', 'AIN', 'STATUS',
       'OLD_BLD_ID', 'RuleID', 'RuleID_2', 'RuleID_3', 'APN', 'SitusHouseNo',
       'SitusFraction', 'SitusDirection', 'SitusUnit', 'SitusStreet',
       'SitusAddress', 'SitusCity', 'SitusZIP', 'TaxRateArea', 'AgencyClassNo',
       'AgencyName', 'AgencyType', 'UseCode', 'UseCode_2', 'UseType',
       'UseDescription', 'YearBuilt1', 'EffectiveYear1', 'RecDate', 'RecDocNo',
       'Roll_Year', 'Roll_LandValue', 'Roll_ImpValue', 'Roll_PersPropValue',
       'Roll_FixtureValue', 'Roll_HomeOwnersExemp', 'Roll_RealEstateExemp',
       'Roll_PersPropExemp', 'Roll_FixtureExemp', 'Roll_LandBaseYear',
       'Roll_ImpBaseYear', 'SpatialChangeDate', 'ParcelCreateDate', 'Assr_Map',
       'Assr_Index_Map', 'Shape_Length_1', 'Shape_Area_1', 'RuleID_1',
       'Override1', 'Rule_2', 'Override2', 'RuleID3', 'Override',
       'Shape__Area', 'Shape__Length', 'geometry'],
 

In [8]:
gdf['SitusCity'].unique()

array(['BEVERLY HILLS CA', 'PALMDALE'], dtype=object)

In [9]:
gdf.columns

gdf[['SitusAddress', 'SitusCity', 'SitusZIP', 'geometry','Shape__Area','Shape__Length']]

Unnamed: 0,SitusAddress,SitusCity,SitusZIP,geometry,Shape__Area,Shape__Length
0,713 N MAPLE DR,BEVERLY HILLS CA,90210-3480,"POLYGON ((-118.40209 34.08417, -118.40201 34.0...",175.636719,54.218807
1,721 N MAPLE DR,BEVERLY HILLS CA,90210-3480,"POLYGON ((-118.40283 34.08516, -118.40281 34.0...",152.628906,64.591542
2,723 N ELM DR,BEVERLY HILLS CA,90210-3422,"POLYGON ((-118.404 34.08432, -118.40394 34.084...",522.902344,113.965758
3,725 N ELM DR,BEVERLY HILLS CA,90210-3422,"POLYGON ((-118.40447 34.0841, -118.40443 34.08...",300.738281,82.351785
4,813 FOOTHILL RD,BEVERLY HILLS CA,90210-2903,"POLYGON ((-118.40699 34.08543, -118.40703 34.0...",311.074219,71.964052
...,...,...,...,...,...,...
10489,36825 37TH ST E,PALMDALE,93550,"POLYGON ((-118.06325 34.55067, -118.06311 34.5...",292.574219,69.872956
10490,36813 37TH ST E,PALMDALE,93550,"POLYGON ((-118.06325 34.55018, -118.06331 34.5...",390.609375,81.057453
10491,720 N ALTA DR,BEVERLY HILLS CA,90210-3506,"POLYGON ((-118.25207 34.64164, -118.25208 34.6...",419.371094,90.276051
10492,720 N ALTA DR,BEVERLY HILLS CA,90210-3506,"POLYGON ((-118.25166 34.64026, -118.25155 34.6...",468.039062,101.269613


In [None]:
buildings_data = gpd.GeoDataFrame(features, geometry=geometries, crs="EPSG:4326")


### Filtered Geoservice

In [1]:
import aiohttp
import asyncio
import nest_asyncio
nest_asyncio.apply()  # Required if running in Jupyter Notebook

# The URL from your filtered website query
QUERY_URL = "https://services.arcgis.com/RmCCgQtiZLDCtblq/arcgis/rest/services/Countywide_Building_Outlines/FeatureServer/1/query?f=json&where=(CODE%20IN%20('Building'))%20AND%20(UseType%20IN%20('Residential'))%20AND%20(HEIGHT%20%3E%3D%200%20AND%20HEIGHT%20%3C%3D%2034.63)%20AND%20(SitusCity%20IN%20('LOS%20ANGELES%20CA'))&outFields=*"

async def fetch_total_count():
    """Fetch total number of matching records."""
    count_url = QUERY_URL + "&returnCountOnly=true"

    async with aiohttp.ClientSession() as session:
        async with session.get(count_url) as response:
            data = await response.json()
            return data.get("count", 0)  # Extract total count

async def fetch(session, offset):
    """Fetch a batch of records using pagination."""
    paginated_url = QUERY_URL + f"&resultOffset={offset}&resultRecordCount=1000"

    async with session.get(paginated_url) as response:
        return await response.json()

async def main():
    """Fetch all records asynchronously with pagination."""
    all_data = []
    total_count = await fetch_total_count()
    print(f"Total Records to Retrieve: {total_count}")

    semaphore = asyncio.Semaphore(10)  # Limit concurrency to prevent API overload

    async with aiohttp.ClientSession() as session:
        async def bound_fetch(offset):
            async with semaphore:
                data = await fetch(session, offset)
                return data

        # Generate tasks for pagination
        tasks = [bound_fetch(offset) for offset in range(0, total_count, 1000)]
        results = await asyncio.gather(*tasks)

        for data in results:
            if "features" in data:
                all_data.extend(data["features"])

    print(f"Total Records Retrieved: {len(all_data)}")
    return all_data

# Run the async function
all_data = asyncio.run(main())


Total Records to Retrieve: 1033709
Total Records Retrieved: 1033709


In [8]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon

# Extract features and geometries
features = []
geometries = []

for feature in all_data:
    attributes = feature["attributes"]  # Correct key for attributes
    geometry = feature.get("geometry", {})

    # Convert ArcGIS "rings" (list of coordinates) to Shapely Polygon
    if "rings" in geometry and isinstance(geometry["rings"], list):
        polygon = Polygon(geometry["rings"][0])  # Use first ring for polygon
        geometries.append(polygon)
        features.append(attributes)

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(features, geometry=geometries, crs="EPSG:4326")

# Display first few rows

# Convert to GeoDataFrame

In [9]:
gdf.columns

Index(['OBJECTID', 'CODE', 'BLD_ID', 'HEIGHT', 'ELEV',
       'LARIAC_BUILDINGS_2014_AREA', 'SOURCE', 'DATE_', 'AIN', 'STATUS',
       'OLD_BLD_ID', 'RuleID', 'RuleID_2', 'RuleID_3', 'APN', 'SitusHouseNo',
       'SitusFraction', 'SitusDirection', 'SitusUnit', 'SitusStreet',
       'SitusAddress', 'SitusCity', 'SitusZIP', 'TaxRateArea', 'AgencyClassNo',
       'AgencyName', 'AgencyType', 'UseCode', 'UseCode_2', 'UseType',
       'UseDescription', 'YearBuilt1', 'EffectiveYear1', 'RecDate', 'RecDocNo',
       'Roll_Year', 'Roll_LandValue', 'Roll_ImpValue', 'Roll_PersPropValue',
       'Roll_FixtureValue', 'Roll_HomeOwnersExemp', 'Roll_RealEstateExemp',
       'Roll_PersPropExemp', 'Roll_FixtureExemp', 'Roll_LandBaseYear',
       'Roll_ImpBaseYear', 'SpatialChangeDate', 'ParcelCreateDate', 'Assr_Map',
       'Assr_Index_Map', 'Shape_Length_1', 'Shape_Area_1', 'RuleID_1',
       'Override1', 'Rule_2', 'Override2', 'RuleID3', 'Override',
       'Shape__Area', 'Shape__Length', 'geometry'],
 

In [16]:
gdf_copy=gdf.copy()

In [17]:
gdf_copy=gdf_copy[['HEIGHT','ELEV','SitusAddress','SitusCity','SitusZIP','Roll_Year','Roll_LandValue','Roll_ImpValue','Roll_PersPropValue']]
gdf_copy 

Unnamed: 0,HEIGHT,ELEV,SitusAddress,SitusCity,SitusZIP,Roll_Year,Roll_LandValue,Roll_ImpValue,Roll_PersPropValue
0,15.35,49.73,1747 W 27TH ST,LOS ANGELES CA,90732-4617,2012,505738.0,416137.0,0.0
1,9.18,25.92,1540 LAGOON AVE,LOS ANGELES CA,90744-2044,2012,168000.0,118000.0,0.0
2,12.68,22.43,1342 W 223RD ST,LOS ANGELES CA,90501-4122,2012,218159.0,54536.0,0.0
3,20.98,28.50,13611 S MENLO AVE,LOS ANGELES CA,90247-2135,2012,132605.0,84449.0,0.0
4,19.21,392.85,2506 S MORAY AVE,LOS ANGELES CA,90732-4632,2012,37259.0,102166.0,0.0
...,...,...,...,...,...,...,...,...,...
1033703,12.93,3563.64,1462 GULF AVE,LOS ANGELES CA,90744-2010,2012,76803.0,62675.0,0.0
1033704,15.27,3734.36,1529 W 216TH ST,LOS ANGELES CA,90501-3034,2012,158707.0,71547.0,0.0
1033705,8.43,3617.09,1230 N MARINE AVE,LOS ANGELES CA,90744-3129,2012,40109.0,187215.0,0.0
1033706,11.30,3644.10,928 PIONEER AVE,LOS ANGELES CA,90744-3751,2012,140000.0,78000.0,0.0


In [18]:
gdf_copy['TotalValue']=gdf_copy['Roll_LandValue']+gdf_copy['Roll_ImpValue']

gdf_copy

Unnamed: 0,HEIGHT,ELEV,SitusAddress,SitusCity,SitusZIP,Roll_Year,Roll_LandValue,Roll_ImpValue,Roll_PersPropValue,TotalValue
0,15.35,49.73,1747 W 27TH ST,LOS ANGELES CA,90732-4617,2012,505738.0,416137.0,0.0,921875.0
1,9.18,25.92,1540 LAGOON AVE,LOS ANGELES CA,90744-2044,2012,168000.0,118000.0,0.0,286000.0
2,12.68,22.43,1342 W 223RD ST,LOS ANGELES CA,90501-4122,2012,218159.0,54536.0,0.0,272695.0
3,20.98,28.50,13611 S MENLO AVE,LOS ANGELES CA,90247-2135,2012,132605.0,84449.0,0.0,217054.0
4,19.21,392.85,2506 S MORAY AVE,LOS ANGELES CA,90732-4632,2012,37259.0,102166.0,0.0,139425.0
...,...,...,...,...,...,...,...,...,...,...
1033703,12.93,3563.64,1462 GULF AVE,LOS ANGELES CA,90744-2010,2012,76803.0,62675.0,0.0,139478.0
1033704,15.27,3734.36,1529 W 216TH ST,LOS ANGELES CA,90501-3034,2012,158707.0,71547.0,0.0,230254.0
1033705,8.43,3617.09,1230 N MARINE AVE,LOS ANGELES CA,90744-3129,2012,40109.0,187215.0,0.0,227324.0
1033706,11.30,3644.10,928 PIONEER AVE,LOS ANGELES CA,90744-3751,2012,140000.0,78000.0,0.0,218000.0


#### CPI FOR 2012 is https://www.bls.gov/news.release/archives/cpi_01162013.pdf
#### CPI for 2025 is https://www.usinflationcalculator.com/inflation/consumer-price-index-and-annual-percent-changes-from-1913-to-2008/ 

#### Real estate values in LA appreicated 144% since 2012  https://www.zillow.com/research/data/ 

### Calculatute house prices in today's value buy using CPI and appreciation calculator

In [24]:
CPI_2012= 229.6
CPI_2025= 317.671

INFLATION_FACT= CPI_2025/CPI_2012

APPRECIATION_FACT= (1+1.44)
APPRECIATION_FACT

gdf_copy['2025_price']= gdf_copy['TotalValue']*APPRECIATION_FACT 

gdf_copy

Unnamed: 0,HEIGHT,ELEV,SitusAddress,SitusCity,SitusZIP,Roll_Year,Roll_LandValue,Roll_ImpValue,Roll_PersPropValue,TotalValue,2025_price
0,15.35,49.73,1747 W 27TH ST,LOS ANGELES CA,90732-4617,2012,505738.0,416137.0,0.0,921875.0,2249375.00
1,9.18,25.92,1540 LAGOON AVE,LOS ANGELES CA,90744-2044,2012,168000.0,118000.0,0.0,286000.0,697840.00
2,12.68,22.43,1342 W 223RD ST,LOS ANGELES CA,90501-4122,2012,218159.0,54536.0,0.0,272695.0,665375.80
3,20.98,28.50,13611 S MENLO AVE,LOS ANGELES CA,90247-2135,2012,132605.0,84449.0,0.0,217054.0,529611.76
4,19.21,392.85,2506 S MORAY AVE,LOS ANGELES CA,90732-4632,2012,37259.0,102166.0,0.0,139425.0,340197.00
...,...,...,...,...,...,...,...,...,...,...,...
1033703,12.93,3563.64,1462 GULF AVE,LOS ANGELES CA,90744-2010,2012,76803.0,62675.0,0.0,139478.0,340326.32
1033704,15.27,3734.36,1529 W 216TH ST,LOS ANGELES CA,90501-3034,2012,158707.0,71547.0,0.0,230254.0,561819.76
1033705,8.43,3617.09,1230 N MARINE AVE,LOS ANGELES CA,90744-3129,2012,40109.0,187215.0,0.0,227324.0,554670.56
1033706,11.30,3644.10,928 PIONEER AVE,LOS ANGELES CA,90744-3751,2012,140000.0,78000.0,0.0,218000.0,531920.00


## WEBSCRAPER FOR LA BUILDINGS

BELOW CODE IS THE SCRIPT TO SCRAPE THE ASSESSED VALUE OF THE LA COUNTY WEBSITE   https://portal.assessor.lacounty.gov/ 

How it works is scrapes the AIN number from each building and returns the 2025 assessment value for that specific property. It is then appended to the `price` column of the dataframe

In [17]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def scrape_property_data(ain):
    # Start the WebDriver
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    driver = webdriver.Chrome(options=chrome_options)
    try:
        # Open the target website
        driver.get("https://portal.assessor.lacounty.gov/")
        time.sleep(2)  # Allow time for the page to load

        # Locate the search input field and enter the address
        search_input = driver.find_element(By.NAME, "basicsearchterm")
        search_input.send_keys(ain)
        time.sleep(1)  # Simulate typing delay
        search_input.send_keys(Keys.RETURN)

        # Wait for search results to load
        time.sleep(5)

        # Click the first search result row
        # first_result = driver.find_element(By.XPATH, "(//tr[@ng-repeat='row in pager.page'])[1]")
        # first_result.click()

        # Wait for the property details to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "tfoot .text-right+ .text-left-important .ng-binding"))
        )

        # Extract and return the scraped text
        scraped_text = driver.find_element(By.CSS_SELECTOR, "tfoot .text-right+ .text-left-important .ng-binding").text
        # print(scraped_text)
        # return scraped_text
    except Exception as e:
        print("Error:", e)
        return None
    finally:
        driver.quit()
    
    return scraped_text




In [18]:


gdf['assessedValue'] = gdf['AIN'].apply(scrape_property_data)
gdf['assessedValue'].replace(',','',regex=True,inplace=True)
gdf['assessedValue']=gdf['assessedValue'].astype(int)

Exception ignored in: <function Service.__del__ at 0x000001E9CB14DBC0>
Traceback (most recent call last):
  File "d:\GithubRepos\forestfires\.venv\Lib\site-packages\selenium\webdriver\common\service.py", line 200, in __del__
    self.stop()
  File "d:\GithubRepos\forestfires\.venv\Lib\site-packages\selenium\webdriver\common\service.py", line 157, in stop
    self.send_remote_shutdown_command()
  File "d:\GithubRepos\forestfires\.venv\Lib\site-packages\selenium\webdriver\common\service.py", line 137, in send_remote_shutdown_command
    request.urlopen(f"{self.service_url}/shutdown")
  File "D:\anaconda3\Lib\urllib\request.py", line 215, in urlopen
    return opener.open(url, data, timeout)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\anaconda3\Lib\urllib\request.py", line 515, in open
    response = self._open(req, data)
               ^^^^^^^^^^^^^^^^^^^^^
  File "D:\anaconda3\Lib\urllib\request.py", line 532, in _open
    result = self._call_chain(self.handle_open, protocol, 

KeyboardInterrupt: 

In [20]:
gdf 

Unnamed: 0,HEIGHT,ELEV,SitusAddress,SitusCity,SitusZIP,AIN,fulladdress
0,15.35,49.73,1747 W 27TH ST,LOS ANGELES CA,90732-4617,7563018006,1747 W 27TH ST LOS ANGELES CA 90732-4617
1,9.18,25.92,1540 LAGOON AVE,LOS ANGELES CA,90744-2044,7421018006,1540 LAGOON AVE LOS ANGELES CA 90744-2044
2,12.68,22.43,1342 W 223RD ST,LOS ANGELES CA,90501-4122,7347004004,1342 W 223RD ST LOS ANGELES CA 90501-4122
3,20.98,28.50,13611 S MENLO AVE,LOS ANGELES CA,90247-2135,6119004004,13611 S MENLO AVE LOS ANGELES CA 90247-2135
4,19.21,392.85,2506 S MORAY AVE,LOS ANGELES CA,90732-4632,7563016002,2506 S MORAY AVE LOS ANGELES CA 90732-4632
...,...,...,...,...,...,...,...
1033703,12.93,3563.64,1462 GULF AVE,LOS ANGELES CA,90744-2010,7414034002,1462 GULF AVE LOS ANGELES CA 90744-2010
1033704,15.27,3734.36,1529 W 216TH ST,LOS ANGELES CA,90501-3034,7349027024,1529 W 216TH ST LOS ANGELES CA 90501-3034
1033705,8.43,3617.09,1230 N MARINE AVE,LOS ANGELES CA,90744-3129,7420008010,1230 N MARINE AVE LOS ANGELES CA 90744-3129
1033706,11.30,3644.10,928 PIONEER AVE,LOS ANGELES CA,90744-3751,7425035005,928 PIONEER AVE LOS ANGELES CA 90744-3751
