In [1]:
import os
from parcllabs import ParclLabsClient
import plotly.express as px
import requests
import pandas as pd

In [2]:
api_key = os.getenv("PARCL_LABS_API_KEY")
client = ParclLabsClient(api_key, turbo_mode=True)

houston_parcl_id = 5381035

In [3]:
payload = {
    "event_filters": {
        "event_names": ["LISTED_SALE", "SOLD"],
        "max_event_date": "2025-04-18",
        "min_event_date": "2024-01-01",
    },
    "owner_filters": {
        "is_investor_owned": False,
        "is_owner_occupied": True,
    },
    "parcl_ids": [houston_parcl_id],
    "property_filters": {
        "property_types": ["SINGLE_FAMILY", "CONDO", "TOWNHOUSE"],
    },
}


In [4]:
def fetch_listings(payload, limit=1000):
    offset = 0
    headers = {"Authorization": api_key, "Content-Type": "application/json"}

    all_results = []
    
    while True:
        # Add pagination parameters to payload
        current_payload = payload.copy()
        base_url = f"https://api.parcllabs.com/v2/property_search?limit={limit}&offset={offset}"
        print(base_url)

        response = requests.post(base_url, headers=headers, json=current_payload)

        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            print(response.text)
            break

        data = response.json()
        results = data.get("data", [])

        if not results:
            break

        all_results.extend(results)
        offset += limit

        print(f"Fetched {len(results)} records. Total so far: {len(all_results)}")

        # Use has_more from the response to determine if we should continue
        if data.get("pagination", {}).get("has_more", False) == False:
            break

    return all_results


# Fetch all listings
listing_events = fetch_listings(payload)
print(f"Total listings fetched: {len(listing_events)}")

https://api.parcllabs.com/v2/property_search?limit=1000&offset=0
Fetched 1000 records. Total so far: 1000
https://api.parcllabs.com/v2/property_search?limit=1000&offset=1000
Fetched 1000 records. Total so far: 2000
https://api.parcllabs.com/v2/property_search?limit=1000&offset=2000
Fetched 1000 records. Total so far: 3000
https://api.parcllabs.com/v2/property_search?limit=1000&offset=3000
Fetched 1000 records. Total so far: 4000
https://api.parcllabs.com/v2/property_search?limit=1000&offset=4000
Fetched 1000 records. Total so far: 5000
https://api.parcllabs.com/v2/property_search?limit=1000&offset=5000
Fetched 1000 records. Total so far: 6000
https://api.parcllabs.com/v2/property_search?limit=1000&offset=6000
Fetched 1000 records. Total so far: 7000
https://api.parcllabs.com/v2/property_search?limit=1000&offset=7000
Fetched 1000 records. Total so far: 8000
https://api.parcllabs.com/v2/property_search?limit=1000&offset=8000
Fetched 1000 records. Total so far: 9000
https://api.parcllabs.

In [24]:
# TODO:
# convert to dataframe, each row is an event
# each event should be mapped to a month and year and price level
# create a new dataframe that aggregates/counts the events by month and year and price level and property type and zip code

# Create a list to store flattened data
flattened_data = []

# Process each property and its events
for property_data in listing_events:
    property_meta = property_data["property_metadata"]
    for event in property_data["events"]:
        # Create a flat record combining property metadata and event data
        record = {
            "parcl_property_id": property_data["parcl_property_id"],
            "address": f"{property_meta['address1']} {property_meta['address2'] or ''}".strip(),
            "city": property_meta["city"],
            "state": property_meta["state"],
            "zip5": property_meta["zip5"],
            "latitude": property_meta["latitude"],
            "longitude": property_meta["longitude"],
            "property_type": property_meta["property_type"],
            "bedrooms": property_meta["bedrooms"],
            "bathrooms": property_meta["bathrooms"],
            "sq_ft": property_meta["sq_ft"],
            "year_built": property_meta["year_built"],
            "event_date": event["event_date"],
            "event_name": event["event_name"],
            "price": event["price"],
            "investor_flag": event["investor_flag"],
            "owner_occupied_flag": event["owner_occupied_flag"],
        }
        flattened_data.append(record)

# Convert to DataFrame
df = pd.DataFrame(flattened_data)

# Convert event_date to datetime
df["event_date"] = pd.to_datetime(df["event_date"])

# Add month and year columns as datetime
df["month"] = df["event_date"].dt.strftime("%Y-%m")
df["year"] = df["event_date"].dt.strftime("%Y")

# Create price level bins
price_bins = [0, 250000, 500000, 750000, 1000000, float("inf")]
price_labels = [
    "<$250K",
    "$250K-$500K",
    "$500K-$750K",
    "$750K-$1M",
    ">$1M",
]
df["price_level"] = pd.cut(df["price"], bins=price_bins, labels=price_labels)


In [25]:
agg_df_no_property_type = (
    df.groupby(["year", "month", "price_level", "event_name"])
    .agg(
        {
            "price": ["count", "mean", "min", "max"],
            "sq_ft": "mean",
            "bedrooms": "mean",
            "bathrooms": "mean",
        }
    )
    .reset_index()
)

# Flatten the multi-level column names
agg_df_no_property_type.columns = [
    "_".join(col).strip("_") for col in agg_df_no_property_type.columns.values
]

print(agg_df_no_property_type.head(n=20))

# Rename columns for clarity
agg_df_no_property_type = agg_df_no_property_type.rename(
    columns={
        "price_count": "count",
        "price_mean": "avg_price",
        "price_min": "min_price",
        "price_max": "max_price",
        "sq_ft_mean": "avg_sq_ft",
        "bedrooms_mean": "avg_bedrooms",
        "bathrooms_mean": "avg_bathrooms",
    }
)

agg_df_no_property_type = agg_df_no_property_type[
    agg_df_no_property_type["count"] > 0
]

    year    month  price_level   event_name  price_count    price_mean  \
0   2024  2024-01       <$250K  LISTED_SALE          218  1.842344e+05   
1   2024  2024-01       <$250K         SOLD          132  1.891845e+05   
2   2024  2024-01  $250K-$500K  LISTED_SALE          396  3.597185e+05   
3   2024  2024-01  $250K-$500K         SOLD          232  3.512255e+05   
4   2024  2024-01  $500K-$750K  LISTED_SALE          166  6.196684e+05   
5   2024  2024-01  $500K-$750K         SOLD           78  6.020864e+05   
6   2024  2024-01    $750K-$1M  LISTED_SALE           82  8.744244e+05   
7   2024  2024-01    $750K-$1M         SOLD           38  8.844036e+05   
8   2024  2024-01         >$1M  LISTED_SALE          115  2.152081e+06   
9   2024  2024-01         >$1M         SOLD           28  1.952635e+06   
10  2024  2024-02       <$250K  LISTED_SALE          252  1.875419e+05   
11  2024  2024-02       <$250K         SOLD          127  1.911096e+05   
12  2024  2024-02  $250K-$500K  LISTED





In [26]:
# create a scatter plot of the number of listings by price level and property type over time by month
# x should be the month and year
# y should be the number of listings
# color should be the price level
# make a custom chart per price level

print(agg_df_no_property_type.head(n=20))

fig = px.line(agg_df_no_property_type, x="month", y="count", color="event_name", facet_col="price_level")
fig.show()

    year    month  price_level   event_name  count     avg_price  min_price  \
0   2024  2024-01       <$250K  LISTED_SALE    218  1.842344e+05     3200.0   
1   2024  2024-01       <$250K         SOLD    132  1.891845e+05     2000.0   
2   2024  2024-01  $250K-$500K  LISTED_SALE    396  3.597185e+05   252000.0   
3   2024  2024-01  $250K-$500K         SOLD    232  3.512255e+05   251322.0   
4   2024  2024-01  $500K-$750K  LISTED_SALE    166  6.196684e+05   508000.0   
5   2024  2024-01  $500K-$750K         SOLD     78  6.020864e+05   504625.0   
6   2024  2024-01    $750K-$1M  LISTED_SALE     82  8.744244e+05   755000.0   
7   2024  2024-01    $750K-$1M         SOLD     38  8.844036e+05   757302.0   
8   2024  2024-01         >$1M  LISTED_SALE    115  2.152081e+06  1030000.0   
9   2024  2024-01         >$1M         SOLD     28  1.952635e+06  1054688.0   
10  2024  2024-02       <$250K  LISTED_SALE    252  1.875419e+05    73000.0   
11  2024  2024-02       <$250K         SOLD    127  

In [27]:
# Filter data for $250K-$500K price level
price_level_data = agg_df_no_property_type[
    agg_df_no_property_type["price_level"] == "$250K-$500K"
]

fig = px.line(
    price_level_data,
    x="month",
    y="count",
    color="event_name",
)
fig.show()