# Advanced analysis on new construction supply and demand trends across the country, price performance, and specific hubs of distress for on market listings

This notebook is broken up in the following sections
- Setup
- Search for Markets
- Retrieving Data
- Which locations have grown the most?
- The impact of new construction on the new listing supply?
- The impact of new construction on total SFH demand?
- Single Family Home New Construction Price Performance
- Unit level analysis of specific sub markets where new construction on market listings are starting to cut prices the most

We can expect to make the following types of charts:

<p align="center">
  <img src="../../../images/rate_of_sfh_growth.png" alt="Alt text">
</p>

<p align="center">
  <img src="../../../images/share_of_new_listings_sfh.png" alt="Alt text">
</p>

<p align="center">
  <img src="../../../images/share_of_demand_for_new_construction.png" alt="Alt text">
</p>

<p align="center">
  <img src="../../../images/share_of_new_listings_sfh.png" alt="Alt text">
</p>

<p align="center">
  <img src="../../../images/new_construction_sfh_prices.png" alt="Alt text">
</p>

<p align="center">
  <img src="../../../images/atx_listings.png" alt="Alt text">
</p>

### Setup environment and import the Parcl Labs Python Library

This notebook uses an old version of the SDK and events endpoint to maintain compatability with deprecated charting function. Note that this version of the SDK will be missing some of the new functionality.

In [None]:
# if needed, install and/or upgrade to the latest verison of the Parcl Labs Python library
%pip install --upgrade parcllabs==1.10.0 nbformat

In [2]:
import os
from itertools import islice

import plotly
import numpy as np
import pandas as pd
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go
from parcllabs import ParclLabsClient
from plotly.subplots import make_subplots
from parcllabs.beta.charting.utils import create_labs_logo_dict
from parcllabs.beta.charting.styling import default_style_config as style_config


client = ParclLabsClient(
    api_key=os.environ.get('PARCL_LABS_API_KEY', "<your Parcl Labs API key if not set as environment variable>"), 
    limit=12 # set default limit
)

### Search for Markets

In [3]:
# in this case, lets look at US market overall
us_market = client.search.markets.retrieve(
    query='United States',
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=1
)

metros = client.search.markets.retrieve(
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=100,
    location_type='CBSA'
)

markets = pd.concat([us_market, metros])
market_ids = markets['parcl_id'].unique().tolist()
markets['state'] = markets['name'].apply(lambda x: x.split(',')[-1].strip().upper().split('-')[0])
markets['clean_name'] = markets.apply(lambda x: f"{x['name'].split('-')[0].split(',')[0].strip()}, {x['state']}", axis=1)
markets['clean_name'] = markets['clean_name'].replace({'United States Of America, UNITED STATES OF AMERICA': 'USA'})

### Retrieve the Data

In [4]:
property_type = 'SINGLE_FAMILY'

stock = client.market_metrics.housing_stock.retrieve(
    parcl_ids=market_ids,
    limit=300,
)

In [5]:
# Show the average age of homes being sold in the market decrease for where the housing stock has increased the most
property_attributes = client.market_metrics.housing_event_property_attributes.retrieve(
    parcl_ids=market_ids,
    limit=300,
    property_type=property_type
)

# event counts for new listings
new_listings = client.market_metrics.housing_event_counts.retrieve(
    parcl_ids=market_ids,
    limit=300,
    property_type=property_type
)

# we will need to secure data from 3 separate endpoints
nc_listings = client.new_construction_metrics.housing_event_counts.retrieve(
    parcl_ids=market_ids,
    limit=300,
    property_type=property_type
)

nc_prices = client.new_construction_metrics.housing_event_prices.retrieve(
    parcl_ids=market_ids,
    limit=300,
    property_type=property_type
)

# lets get new construction demand and supply counts in addition to prices
new_construction_housing_event_prices = client.new_construction_metrics.housing_event_prices.retrieve(
    parcl_ids=market_ids,
    limit=300, # lets get the full series
    property_type=property_type
)

In [6]:
# rename new_listings_for_sale to new_construction
nc_listings = nc_listings.rename(columns={
    'new_listings_for_sale': 'new_construction_new_listings_for_sale',
    'new_rental_listings': 'new_construction_new_rental_listings',
    'sales': 'new_construction_sales',
})

new_listings = new_listings.merge(nc_listings[['parcl_id', 'new_construction_new_listings_for_sale', 'date', 'new_construction_new_rental_listings', 'new_construction_sales']], on=['parcl_id', 'date'])
new_listings['pct_new_construction'] = new_listings['new_construction_new_listings_for_sale']/new_listings['new_listings_for_sale']
new_listings['pct_new_construction_rentals'] = new_listings['new_construction_new_rental_listings']/new_listings['new_rental_listings']
new_listings['pct_sales_new_construction'] = new_listings['new_construction_sales']/new_listings['sales']
new_listings = new_listings.merge(markets[['parcl_id', 'clean_name']], on='parcl_id')

### Which locations have grown the most since 2019?

Here we are specifically looking for which markets have expanded single family home presence the most in the last 5 years. 

In [7]:
# calculate percent change in all_properties since 1/1/2019 to current, grouped by parcl_id
stock = stock.sort_values(['parcl_id', 'date'])
stock['pct_change'] = stock.groupby('parcl_id')['single_family'].pct_change()

stock['first_value_single_family'] = stock.groupby('parcl_id')['single_family'].transform('first')
stock['pct_change_since_first_single_family'] = (stock['single_family'] - stock['first_value_single_family']) / stock['first_value_single_family']

In [8]:
# index to the highest growth markets
highest_growth_markets = stock.merge(markets[['parcl_id', 'clean_name']])

highest_growth_markets_tmp = highest_growth_markets.loc[highest_growth_markets['date']=='2024-06-01'].sort_values('pct_change_since_first_single_family', ascending=False)
# grab top 20 markets
highest_growth_markets = highest_growth_markets_tmp.head(20)
# append US market
highest_growth_markets = pd.concat([highest_growth_markets, highest_growth_markets_tmp.loc[highest_growth_markets_tmp['parcl_id'].isin([us_market['parcl_id'].values[0]])]])

In [None]:
# Prepare the data for the bar chart
chart_max_date = highest_growth_markets['date'].max()
chart_max_date = chart_max_date.strftime('%B, %Y')

# Create a color list where USA is red and others are blue
colors = ['red' if name == 'USA' else '#177BD9' for name in highest_growth_markets['clean_name']]

# Create the stacked bar chart
fig = px.bar(highest_growth_markets, 
             x='clean_name', 
             y='pct_change_since_first_single_family', 
             barmode='stack', 
             title=f'Rate of Single Family Housing Stock Growth Since 2019 (as of {chart_max_date})',
             color='clean_name',
             color_discrete_sequence=colors
)

fig.update_layout(
    margin=dict(l=40, r=40, t=100, b=40),  # Increased top margin to accommodate subtitle
    title={
        'text': f'Rate of Single Family Housing Stock Growth Since 2019<br><span style="font-size: 16px; font-style: italic;">Top 20 Metros: Data as of {chart_max_date}</span>',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': style_config['title_font']
    },
    xaxis=dict(
        title_text='',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        title_font=style_config['title_font_axis'],
        tickfont=dict(size=style_config['axis_font']['size'], color=style_config['axis_font']['color']),
    ),
    yaxis=dict(
        title_text='% Housing Stock Growth',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        tickfont=style_config['axis_font'],
        zeroline=False,
        tickformat='.0%',
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        title_font=style_config['title_font_axis']
    ),
    plot_bgcolor=style_config['background_color'],
    paper_bgcolor=style_config['background_color'],
    font=dict(color=style_config['font_color']),
    legend_title_text='',
    autosize=False,
    width=1600,
    height=800,
    title_font=dict(size=24),
    xaxis_title_font=dict(size=18),
    yaxis_title_font=dict(size=18),
    legend_title_font=dict(size=14),
    legend_font=dict(size=12),
    legend=dict(
        x=style_config['legend_x'],
        y=style_config['legend_y'],
        xanchor=style_config['legend_xanchor'],
        yanchor=style_config['legend_yanchor'],
        font=style_config['legend_font'],
        bgcolor='rgba(0, 0, 0, 0)'
    ),
)

# Add arrow annotation for USA with percentage value
usa_index = highest_growth_markets['clean_name'].tolist().index('USA')
usa_value = highest_growth_markets['pct_change_since_first_single_family'].iloc[usa_index]

charleston_index = highest_growth_markets['clean_name'].tolist().index('Charleston, SC')
charleston_value = highest_growth_markets['pct_change_since_first_single_family'].iloc[charleston_index]

boise_index = highest_growth_markets['clean_name'].tolist().index('Boise City, ID')
boise_value = highest_growth_markets['pct_change_since_first_single_family'].iloc[boise_index]

fig.add_annotation(
    x='USA',
    y=usa_value,
    text=f"USA: {usa_value:.1%}",
    showarrow=True,
    arrowhead=2,
    arrowsize=1,
    arrowwidth=2,
    arrowcolor="white",
    font=dict(size=14, color="white"),
    bgcolor="rgba(0,0,0,0.8)",
    bordercolor="white",
    borderwidth=2,
    borderpad=4,
    ax=0,
    ay=-40
)

fig.add_annotation(
    x='Charleston, SC',
    y=charleston_value,
    text=f"Charleston, SC: {charleston_value:.1%}",
    showarrow=True,
    arrowhead=2,
    arrowsize=1,
    arrowwidth=2,
    arrowcolor="white",
    font=dict(size=14, color="white"),
    bgcolor="rgba(0,0,0,0.8)",
    bordercolor="white",
    borderwidth=2,
    borderpad=4,
    ax=0,
    ay=-40
)

fig.add_annotation(
    x='Boise City, ID',
    y=boise_value,
    text=f"Boise City, ID: {boise_value:.1%}",
    showarrow=True,
    arrowhead=2,
    arrowsize=1,
    arrowwidth=2,
    arrowcolor="white",
    font=dict(size=14, color="white"),
    bgcolor="rgba(0,0,0,0.8)",
    bordercolor="white",
    borderwidth=2,
    borderpad=4,
    ax=0,
    ay=-40
)

fig.add_layout_image(create_labs_logo_dict())

# Remove the legend as it's redundant with the bar labels
fig.update_layout(showlegend=False)

fig.show()

In [10]:
# format for output
output = highest_growth_markets[['clean_name', 'date', 'single_family', 'pct_change_since_first_single_family']]
output = output.rename(columns={'clean_name': 'name', 'pct_change_since_first_single_family': 'single_family_pct_change_since_2019'})

output.to_csv('sfh_high_growth_markets.csv', index=False)

### High growth + high percentage of single family home new construction listings

Of new single family home listings coming to market, which markets have the highest percentage of new construction single family homes?

In [None]:
# Assuming new_listings is your dataframe
new_listings['date'] = pd.to_datetime(new_listings['date'])
new_listings = new_listings.sort_values('date')
new_listings = new_listings.loc[new_listings['parcl_id'].isin(highest_growth_markets['parcl_id'].unique().tolist())]

top_markets = new_listings.groupby('clean_name')['pct_new_construction'].mean().nlargest(10).index.tolist()
df_top = new_listings[new_listings['clean_name'].isin(top_markets + ['USA'])].dropna()

# Filter data from 2019 onwards
df_top = df_top[df_top['date'].dt.year >= 2019]

chart_max_date = df_top['date'].max().strftime('%B, %Y')

# Create custom color scale with brighter colors
color_scale = [
    '#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8', 
    '#F7DC6F', '#BB8FCE', '#82E0AA', '#F1948A', '#85C1E9', '#FF3333'
]
color_map = {market: color_scale[i % len(color_scale)] for i, market in enumerate(top_markets + ['USA'])}
color_map['USA'] = '#FF3333'  # Brighter red for USA

# Highlight specific markets
highlight_markets = ['Austin, TX', 'Boise City, ID', 'USA']
for market in highlight_markets:
    if market != 'USA':
        color_map[market] = '#FFD700'  # Gold color for highlighted markets

fig = go.Figure()

# Add traces for each market
for market in top_markets + ['USA']:
    market_data = df_top[df_top['clean_name'] == market].sort_values('date')
    fig.add_trace(go.Scatter(
        x=market_data['date'],
        y=market_data['pct_new_construction'],
        mode='lines',
        name=market,
        line=dict(
            color=color_map[market],
            width=3 if market in highlight_markets else 1.5,
            dash='solid' if market in highlight_markets else 'dot',
            shape='spline',
            smoothing=0.3
        ),
        opacity=1 if market in highlight_markets else 0.7,
        hovertemplate='%{x|%B %Y}<br>%{y:.1%}<extra></extra>'
    ))

# Function to find the last point for annotation
def find_annotation_point(df, market):
    market_data = df[df['clean_name'] == market].sort_values('date')
    return market_data.iloc[-1]['date'], market_data.iloc[-1]['pct_new_construction']

# Create annotations for highlighted markets (excluding USA)
annotations = []
for market in ['Austin, TX', 'Boise City, ID', 'USA']:
    x, y = find_annotation_point(df_top, market)
    annotations.append(dict(
        x=x,
        y=y,
        xref='x',
        yref='y',
        text=f"{market}: {y:.1%}",
        showarrow=True,
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor=color_map[market],
        ax=40,
        ay=-40,
        font=dict(size=12, color=color_map[market]),
        bgcolor='rgba(0,0,0,0.7)',
        bordercolor=color_map[market],
        borderwidth=1,
        borderpad=4
    ))


# Define style configuration for dark mode
style_config = {
    'background_color': '#1e1e1e',
    'font_color': '#ffffff',
    'grid_color': '#333333',
    'line_color_axis': '#555555',
    'showgrid': True,
    'gridwidth': 0.5,
    'linewidth': 1,
    'title_font_axis': dict(size=16, color='#ffffff'),
    'axis_font': dict(size=14, color='#ffffff')
}

# Update layout for axes, title, and other styling
fig.update_layout(
    width=1600,
    height=800,
    xaxis=dict(
        title='',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        title_font=style_config['title_font_axis'],
        range=[df_top['date'].min(), df_top['date'].max() + pd.Timedelta(days=30)],
        tickformat='%b %Y',
        dtick="M6",
        tickangle=45,
        tickfont=dict(size=12)
    ),
    yaxis=dict(
        title='% of New Inventory from New Construction',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        tickfont=style_config['axis_font'],
        zeroline=False,
        tickformat='.0%',
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        title_font=style_config['title_font_axis'],
        range=[0, df_top['pct_new_construction'].max() * 1.1],
        title_standoff=20
    ),
    plot_bgcolor=style_config['background_color'],
    paper_bgcolor=style_config['background_color'],
    font=dict(color=style_config['font_color'], size=14),
    legend=dict(
        title='Market',
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=0.99,
        bgcolor='rgba(0,0,0,0.5)',
        bordercolor='rgba(255,255,255,0.2)',
        borderwidth=1,
        font=dict(size=12, color=style_config['font_color'])
    ),
    margin=dict(l=80, r=20, t=120, b=80),
    title={
        'text': f"Percentage of New Single Family Home Inventory from New Construction<br><span style='font-size: 16px; font-style: italic;'>High Growth Single Family Home Markets: Data as of {chart_max_date}</span>",
        'y': 0.97,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=24, color=style_config['font_color'])
    },
    annotations=annotations,
    hovermode="x unified"
)

# Add a watermark (assuming create_labs_logo_dict function is available)
fig.add_layout_image(
    create_labs_logo_dict(color='white')
)

# Show the plot
fig.show()

In [None]:
# save to csv
output = df_top[['date', 'clean_name', 'pct_new_construction']]
output = output.rename(columns={'clean_name': 'name'})
output.to_csv('sfh_new_construction_listings.csv', index=False)

### How have these high growth markets trended in demand for new construction relative to all single family home sales?

We are going to explicitly analyze the share of new construction sales in these markets relative to all single family home sales.

In [None]:
# Assuming new_listings is your dataframe
df = new_listings.copy()

df = df.loc[df['clean_name'].isin(top_markets + ['USA'])]

# Convert date to datetime if it's not already
df['date'] = pd.to_datetime(df['date'])

chart_max_date = df['date'].max().strftime('%B, %Y')

# Filter data from 2019 onwards
df = df[df['date'].dt.year >= 2019]

# Calculate average percentage for each market
avg_percentages = df.groupby('clean_name')['pct_sales_new_construction'].mean().sort_values(ascending=False)

# Get top 10 markets
top_10_markets = avg_percentages.head(10).index.tolist()

# Filter dataframe for top 10 markets and USA
df_plot = df[df['clean_name'].isin(top_10_markets + ['USA'])]

# Create color map
color_scale = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8', 
               '#F7DC6F', '#BB8FCE', '#82E0AA', '#F1948A', '#85C1E9', '#FF3333']
color_map = dict(zip(top_10_markets + ['USA'], color_scale))

# Highlight specific markets
highlight_markets = ['Austin, TX', 'Boise City, ID', 'USA']
for market in highlight_markets:
    if market != 'USA':
        color_map[market] = '#FFD700'  # Gold color for highlighted markets
    else:
        color_map[market] = '#FF3333'  # Keep USA red

# Create the figure
fig = go.Figure()

# Add traces for each market
for market in top_10_markets + ['USA']:
    market_data = df_plot[df_plot['clean_name'] == market].sort_values('date')
    fig.add_trace(go.Scatter(
        x=market_data['date'],
        y=market_data['pct_sales_new_construction'],
        mode='lines',
        name=market,
        line=dict(
            color=color_map[market], 
            width=3 if market in highlight_markets else 1.5,
            dash='solid' if market in highlight_markets else 'dot'
        ),
        opacity=1 if market in highlight_markets else 0.7,
        hovertemplate='%{x|%B %Y}<br>%{y:.1%}<extra></extra>'
    ))

# Update layout for dark mode and styling
fig.update_layout(
    title={
        'text': f"Percentage of Sales from New Construction<br><span style='font-size: 16px; font-style: italic;'>High Growth Single Family Home Markets: Data as of {chart_max_date}</span>",
        'y': 0.97,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=24, color='white')
    },
    font=dict(color='white', size=14),
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    height=800,
    width=1600,
    hovermode="x unified",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=0.99,
        bgcolor='rgba(0,0,0,0.5)',
        bordercolor='rgba(255,255,255,0.2)',
        borderwidth=1,
        font=dict(size=12)
    ),
    margin=dict(l=80, r=20, t=120, b=80)
)

# Update x-axis
fig.update_xaxes(
    title_text="",
    showgrid=True,
    gridwidth=0.5,
    gridcolor='#333333',
    tickformat='%b %Y',
    dtick="M6",
    tickangle=45,
    tickfont=dict(size=12)
)

# Update y-axis
fig.update_yaxes(
    title_text="Percentage of Sales",
    showgrid=True,
    gridwidth=0.5,
    gridcolor='#333333',
    tickformat='.0%',
    range=[0, df_plot['pct_sales_new_construction'].max() * 1.1],
    tickfont=dict(size=12),
    title_standoff=20
)

# Add annotations for the highlighted markets (excluding USA)
latest_date = df_plot['date'].max()
annotation_markets = ['Austin, TX', 'Boise City, ID']

for market in annotation_markets:
    latest_data = df_plot[(df_plot['clean_name'] == market) & (df_plot['date'] == latest_date)]
    latest_value = latest_data['pct_sales_new_construction'].values[0]
    
    fig.add_annotation(
        x=latest_date,
        y=latest_value,
        text=f"{market}: {latest_value:.1%}",
        showarrow=True,
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor=color_map[market],
        ax=40,
        ay=0,
        font=dict(size=12, color=color_map[market]),
        bgcolor='rgba(0,0,0,0.7)',
        bordercolor=color_map[market],
        borderwidth=1,
        borderpad=4
    )

fig.add_layout_image(
    create_labs_logo_dict(color='white')
)

# Show the figure
fig.show()

In [None]:
output = df_plot[['date', 'clean_name', 'pct_sales_new_construction']]
output = output.rename(columns={'clean_name': 'name'}) 
output.to_csv('sfh_new_construction_sales.csv', index=False)

### Single Family Home New Construction Prices
Let's evaluate the price of new construction single family homes in these markets for listings, rentals and sales performance. 
We will use a three month trending average on a year over year basis to evaluate the price performance of these markets.

In [None]:
# Assuming new_construction_housing_event_prices is your dataframe
df = new_construction_housing_event_prices.copy()

df = df.merge(markets[['parcl_id', 'clean_name']], on='parcl_id')
df = df.loc[df['clean_name'].isin(top_10_markets + ['USA'])]

# Convert date to datetime if it's not already
df['date'] = pd.to_datetime(df['date'])
chart_max_date = df['date'].max().strftime('%B, %Y')

# Sort the dataframe
df = df.sort_values(['clean_name', 'date'])

# Function to calculate YoY change on 3-month moving average
def calculate_yoy_change(group, column):
    group[f'{column}_3ma'] = group[column].rolling(window=3, min_periods=3).mean()
    group[f'{column}_yoy_change'] = group[f'{column}_3ma'].pct_change(periods=12)
    return group

# Apply the function to each metric
metrics = [
    'price_per_square_foot_median_sales',
    'price_per_square_foot_median_new_listings_for_sale',
    'price_per_square_foot_median_new_rental_listings'
]

for metric in metrics:
    df = df.groupby('clean_name').apply(lambda x: calculate_yoy_change(x, metric)).reset_index(drop=True)

# Get the most recent date for each market
latest_data = df.loc[df.groupby('clean_name')['date'].idxmax()].sort_values(f'{metrics[0]}_yoy_change', ascending=True)

# Create the figure with subplots
fig = make_subplots(rows=1, cols=3, shared_yaxes=True, horizontal_spacing=0.05,
                    subplot_titles=("Sales", "New Listings", "Rentals"))

# Define colors
colors = {
    'positive': '#2ECC40',
    'negative': '#FF4136'
}

# Create bars for each metric
for i, metric in enumerate(metrics):
    yoy_change = f'{metric}_yoy_change'
    color = [colors['positive'] if x >= 0 else colors['negative'] for x in latest_data[yoy_change]]
    
    fig.add_trace(
        go.Bar(
            y=latest_data['clean_name'],
            x=latest_data[yoy_change],
            orientation='h',
            marker_color=color,
            text=[f"{x:.1%}" for x in latest_data[yoy_change]],
            textposition='outside',
            hovertemplate='%{y}: %{x:.1%}<extra></extra>'
        ),
        row=1, col=i+1
    )

# Update layout
fig.update_layout(
    title={
        'text': f"Year-over-Year Change (3-month moving average) in Price per Square Foot<br><span style='font-size: 16px; font-style: italic;'>New Construction Single Family Homes: Data as of {chart_max_date}</span>",
        'y': 0.96,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=24, color='white')
    },
    font=dict(color='white'),
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    height=800,
    width=1600,
    showlegend=False,
    margin=dict(l=125, r=25, t=140, b=25)  # Increased bottom margin for x-axis title
)

# Update axes
fig.update_xaxes(
    showticklabels=False,  # Hide x-axis tick labels
    showgrid=True,
    gridwidth=0.5,
    gridcolor='#333333',
    zeroline=True,
    zerolinecolor='white',
    zerolinewidth=2,
)

# Set different ranges for each subplot
fig.update_xaxes(range=[-0.25, 0.25], row=1, col=1)  # Sales: -25% to +25%
fig.update_xaxes(range=[-0.1, 0.12], row=1, col=2)  # New Listings: -10% to +12%
fig.update_xaxes(range=[-0.1, 0.1], row=1, col=3)  # Rentals: -10% to +10%

# Add vertical separating lines
fig.add_vline(x=0.33, line_width=1, line_color="white", opacity=0.5)
fig.add_vline(x=0.67, line_width=1, line_color="white", opacity=0.5)

# Update subplot titles (headers for each graphic)
fig.update_annotations(font_size=18)

# Center the x-axis title with the entire figure
fig.add_annotation(
    x=0.5,
    y=-0.15,
    xref='paper',
    yref='paper',
    text="Year-over-Year Change (3-month moving average)",
    showarrow=False,
    font=dict(size=16, color='white'),
    xanchor='center',
    yanchor='top'
)

fig.update_yaxes(
    showgrid=False,
    tickfont=dict(size=14),
    row=1, col=1
)

# Hide y-axis labels for the second and third subplots
fig.update_yaxes(showticklabels=False, row=1, col=2)
fig.update_yaxes(showticklabels=False, row=1, col=3)

# Add a watermark (assuming create_labs_logo_dict function is available)
fig.add_layout_image(
    create_labs_logo_dict(color='white')
)

# Show the figure
fig.show()

In [None]:
output = latest_data[['date', 'clean_name', 'price_per_square_foot_median_sales_yoy_change', 'price_per_square_foot_median_new_listings_for_sale_yoy_change', 'price_per_square_foot_median_new_rental_listings_yoy_change']]
output = output.rename(columns={'clean_name': 'name'})
output.to_csv('sfh_new_construction_prices.csv', index=False)

### Lets get unit level analysis for Austin, Texas, for new construction listings over the past 3 months
Here we are going to evaluate on market listings for new construction, specifically looking for clusters of Austin metro where there are high volumes of new construction listings with price cuts. 

In [None]:
austin_zip_codes = [
    '73301', '73344', '76527', '76530', '76537', '76573', '76574', '76578', 
    '78602', '78610', '78612', '78613', '78615', '78616', '78617', '78619', 
    '78620', '78621', '78622', '78626', '78627', '78628', '78630', '78634', 
    '78640', '78641', '78642', '78644', '78645', '78646', '78648', '78650', 
    '78651', '78652', '78653', '78655', '78656', '78659', '78660', '78661', 
    '78662', '78664', '78665', '78666', '78667', '78669', '78673', '78674', 
    '78676', '78680', '78681', '78682', '78683', '78691', '78701', '78702', 
    '78703', '78704', '78705', '78708', '78709', '78710', '78711', '78712', 
    '78713', '78714', '78715', '78716', '78717', '78718', '78719', '78720', 
    '78721', '78722', '78723', '78724', '78725', '78726', '78727', '78728', 
    '78729', '78730', '78731', '78732', '78733', '78734', '78735', '78736', 
    '78737', '78738', '78739', '78741', '78742', '78744', '78745', '78746', 
    '78747', '78748', '78749', '78750', '78751', '78752', '78753', '78754', 
    '78755', '78756', '78757', '78758', '78759', '78760', '78761', '78762', 
    '78763', '78764', '78765', '78766', '78767', '78768', '78769', '78772', 
    '78773', '78774', '78778', '78779', '78780', '78781', '78783', '78785', 
    '78789', '78799', '78953', '78957'
]

In [None]:
# grab all new SFH's
atx_units = []

for a in austin_zip_codes:
    try:
        units = client.property.search.retrieve(
            property_type='single_family',
            zip=a,
            year_built_min=2019,
        )
        atx_units.append(units)
    except Exception as e:
        print(f"Error fetching data for zip code {a}: {e}")

In [None]:
# Concatenate the list of all Florida properties and get the list of parcl_property_ids
parcl_property_ids = pd.concat(atx_units)['parcl_property_id'].tolist()

def chunked_iterable(iterable, size):
    """Yield successive n-sized chunks from the iterable."""
    iterator = iter(iterable)
    for first in iterator:  # stops when `iterator` is depleted
        yield [first] + list(islice(iterator, size - 1))


all_histories = []

for i, chunk in enumerate(chunked_iterable(parcl_property_ids, 999)):
    print(f"Processing chunk {i + 1}...")
    try:
        hist = client.property.events.retrieve(
            parcl_property_ids=chunk,
            start_date='2019-01-01'
        )
        all_histories.append(hist)
    except Exception as e:
        print(f"Error processing chunk {i + 1}: {e}")

print("Completed processing all property IDs.")

In [None]:
atx = pd.concat(all_histories)
data = atx.loc[atx['event_type']=='LISTING']

In [None]:
homes = data.sort_values(['parcl_property_id', 'event_date'])
# filter for typical homes
homes['event_date'] = pd.to_datetime(homes['event_date'])
df = homes[(homes['price'] < 2000000) & (homes['price']>100000)]

In [None]:
# we need to pair price changes together in one row for easier analysis. 
# we will need to join everything first, then we can apply the appropriate filters
df_merge = df[['parcl_property_id', 'event_date', 'event_name', 'price']]
df_merge = df_merge.loc[df_merge['event_name'] == 'PRICE_CHANGE']
df_merge = df_merge.drop_duplicates(subset=['parcl_property_id', 'price'], keep='first')


In [None]:
# rename columns
df_merge = df_merge.rename(columns={
    'event_date': 'event_date_next', 
    'event_name': 'event_name_next', 
    'price': 'price_next'
})

In [None]:
data = df.merge(df_merge, on='parcl_property_id')

In [None]:
from datetime import timedelta
# time delta filter - within 90 days of each other
filtered_df = data[
    ((data['event_date_next'] - data['event_date']) <= timedelta(days=90)) &
    ((data['event_date_next'] - data['event_date']) > timedelta(days=0)) &
    (data['price'] != data['price_next'])
]

In [None]:
paired = filtered_df[['parcl_property_id', 'event_date', 'event_name', 'price', 'event_date_next', 'event_name_next', 'price_next']]
paired = paired.drop_duplicates()
paired['price_diff'] = paired['price_next'] - paired['price']
paired['price_diff_pct'] = paired.apply(lambda x: (x['price_next'] - x['price']) / x['price'], axis=1) 
paired['date_diff_days'] = paired['event_date_next'] - paired['event_date']
paired['date_diff_days'] = paired['date_diff_days'].dt.days
paired['event_week'] = paired['event_date_next'].dt.to_period('W').apply(lambda r: r.start_time)
median_days_diff = paired.groupby('event_week')['price_diff_pct'].median().reset_index()

In [None]:
prop_data = df[['parcl_property_id', 'city', 'zip5', 'state_abbreviation', 'latitude', 'longitude', 'year_built', 'property_type']].drop_duplicates()
paired_w_prop = paired.merge(prop_data, on='parcl_property_id')
paired['price_diff'] = paired['price_next'] - paired['price']
paired['price_diff_pct'] = paired.apply(lambda x: (x['price_next'] - x['price']) / x['price'], axis=1) 
paired['date_diff_days'] = paired['event_date_next'] - paired['event_date']
paired['date_diff_days'] = paired['date_diff_days'].dt.days
paired['event_week'] = paired['event_date_next'].dt.to_period('W').apply(lambda r: r.start_time)
median_days_diff = paired.groupby('event_week')['price_diff_pct'].median().reset_index()
prop_data = df[['parcl_property_id', 'city', 'zip5', 'latitude', 'longitude', 'year_built', 'state_abbreviation']].drop_duplicates()
paired_w_prop = paired.merge(prop_data, on='parcl_property_id')

In [None]:
# need to filter out duplicate joins
min_date_diff = paired_w_prop.groupby(['parcl_property_id', 'event_date', 'event_name', 'price'])['date_diff_days'].min().reset_index(name='min_date_diff')
# join min_date_diff to changes
paired_w_prop = paired_w_prop.merge(min_date_diff, on=['parcl_property_id', 'event_date', 'event_name', 'price'])
paired_w_prop = paired_w_prop.loc[paired_w_prop['min_date_diff'] == paired_w_prop['date_diff_days']]
pwp = paired_w_prop.merge(df[['parcl_property_id', 'event_date', 'new_construction_flag']], on=['parcl_property_id', 'event_date'], how='left')
# filter to new construction event cycles
pwp = pwp.loc[pwp['new_construction_flag'] == 1]

In [None]:
pwp['event_week'] = pwp['event_date_next'].dt.to_period('W').apply(lambda r: r.start_time)
median_price_diff = pwp.groupby(['state_abbreviation', 'event_week'])['price_diff_pct'].median().reset_index()
median_price_diff = median_price_diff.loc[median_price_diff['price_diff_pct']<0.1]

In [None]:
# calculate change since first listing
pwp = pwp.sort_values(['parcl_property_id', 'event_date'])
first_price = pwp.groupby('parcl_property_id')['price'].first().reset_index(name='first_price')
pwp = pwp.merge(first_price, on='parcl_property_id')
pwp['pct_change_since_list'] = pwp.apply(lambda x: (x['price_next'] - x['first_price']) / x['first_price'], axis=1)

In [None]:
markdowns = pwp.loc[pwp['event_date']>='5/1/2024'].sort_values(['parcl_property_id', 'event_date']).drop_duplicates(keep='last', subset=['parcl_property_id', 'latitude', 'longitude'])

In [None]:
# Load the data (replace this with your actual data loading method)
mbox_token = 'pk.eyJ1IjoiZGF0YXdyZXN0bGVyLXBhcmNsbGFicyIsImEiOiJjbHZ2bTRidGUxdndtMndvNnI5eGY5dDVoIn0.wXcsWmRjcDAlutloLezm5Q'

# Filter data for Austin
df = markdowns.copy()
df = df.loc[df['pct_change_since_list'] < 0]
austin_data = df[df['state_abbreviation'] == 'TX']

# Calculate min and max values for color scale
min_val = austin_data['pct_change_since_list'].min()
max_val = austin_data['pct_change_since_list'].max()

# Create heatmap
heatmap = go.Densitymapbox(
    lat=austin_data['latitude'],
    lon=austin_data['longitude'],
    z=austin_data['pct_change_since_list'],
    radius=10,
    colorscale=[
        [0, 'rgb(165,0,38)'],    # Deep red for most negative
        [0.25, 'rgb(215,48,39)'],
        [0.5, 'rgb(244,109,67)'],
        [0.75, 'rgb(253,174,97)'],
        [1, 'rgb(255,255,191)']  # Light yellow for least negative
    ],
    zmin=min_val,
    zmax=max_val,
    hovertext=austin_data['pct_change_since_list'].apply(lambda x: f'{x:.2f}%'),
    hoverinfo='text',
    colorbar=dict(
        title=dict(
            text="% Markdown",
            font=dict(size=14)
        ),
        tickformat='.0%',
        thickness=20,
        len=0.6,
        x=0.88,
        y=0.5,
        yanchor="middle",
        bgcolor='rgba(255,255,255,0.8)',
        bordercolor='rgba(0,0,0,0.5)',
        borderwidth=1
    )
)

# Create figure
fig = go.Figure(heatmap)

# Update layout
fig.update_layout(
    mapbox_accesstoken=mbox_token,
    mapbox=dict(
        center=dict(lat=austin_data['latitude'].median(), lon=austin_data['longitude'].median()),
        style="dark",
        zoom=9
    ),
    height=800,
    width=1000,
    title=dict(
        text="New Construction Single Family Home Active Listing Markdowns: Austin, TX",
        font=dict(size=24, family="Arial",  weight='bold', color='white'),
        y=0.98,
        x=0.5,
        xanchor='center'
    ),
       plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font=dict(family="Arial", size=14, color="#333333"),
    margin=dict(l=0, r=0, t=40, b=0)
)

fig.add_layout_image(
    create_labs_logo_dict(color='white', x=0.2)
)

# Show the figure
fig.show()

In [None]:
# df.to_csv('ct.csv', index=False)