<center>
<h1>Welcome to the Lab 🥼🧪</h1>
</center>

## How to identify who is providing the actual supply to housing markets?

We will analyze if supply is coming from investors, new construction or existing homeowners. We will breakout investors into portfolio sizes and analyze the impact of each group on the housing market.

#### Need help getting started?

As a reminder, you can get your Parcl Labs API key [here](https://dashboard.parcllabs.com/signup) to follow along.

To run this immediately, you can use Google Colab. Remember, you must set your `PARCL_LABS_API_KEY`.

Run in collab --> [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ParclLabs/parcllabs-cookbook/blob/main/examples/experimental/supply_and_demand/who_is_providing_supply.ipynb)

In [None]:
# if needed, install and/or upgrade to the latest verison of the Parcl Labs Python library
%pip install --upgrade parcllabs==1.10.0 nbformat

In [2]:
import os
import pandas as pd
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go
from parcllabs import ParclLabsClient
from parcllabs.beta.charting.styling import SIZE_CONFIG
from parcllabs.beta.ts_stats import TimeSeriesAnalysis
from parcllabs.beta.charting.utils import create_labs_logo_dict
from parcllabs.beta.charting.default_charts import create_dual_axis_chart
from parcllabs.beta.charting.styling import default_style_config as style_config


client = ParclLabsClient(
    api_key=os.environ.get('PARCL_LABS_API_KEY', "<your Parcl Labs API key if not set as environment variable>"), 
    limit=200 # set global default limit, will be handy when retrieving the market data itself
)

In [3]:
# STEP 1. Retrieve Markets

# lets analyze the top 100 markets comped against US average

metros = client.search.markets.retrieve(
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    location_type='CBSA',
    limit=100 # get top 300 metros based on population
)

# add us national to comp markets against national numbers
us = client.search.markets.retrieve(
    query='United States',
    limit=1
)

markets = pd.concat([metros, us])
market_parcl_ids = markets['parcl_id'].tolist()

markets['state'] = markets['name'].apply(lambda x: x.split(',')[-1].strip().upper().split('-')[0])
markets['clean_name'] = markets.apply(lambda x: f"{x['name'].split('-')[0].split(',')[0].strip()}, {x['state']}", axis=1)
markets['clean_name'] = markets['clean_name'].replace({'United States Of America, UNITED STATES OF AMERICA': 'USA'})

In [8]:
PROPERTY_TYPE = 'ALL_PROPERTIES'
# get supply side of the market
supply = client.for_sale_market_metrics.for_sale_inventory.retrieve(
    parcl_ids=market_parcl_ids,
    limit=300,
    property_type=PROPERTY_TYPE,
)

# get price changing dynamics
price_changes = client.for_sale_market_metrics.for_sale_inventory_price_changes.retrieve(
    parcl_ids=market_parcl_ids,
    limit=300,
    property_type=PROPERTY_TYPE,
)

In [None]:
supply = supply.merge(price_changes[['parcl_id', 'date', 'count_price_drop']], on=['parcl_id', 'date'])
supply['pct_price_drops'] = supply['count_price_drop'] / supply['for_sale_inventory']
supply.head()

In [None]:
supply = supply.sort_values(['parcl_id', 'date'])

In [12]:
supply['yoy_change_in_price_drops'] = supply.groupby('parcl_id')['pct_price_drops'].pct_change(52) * 100

In [13]:
ath = supply.groupby('parcl_id')['pct_price_drops'].max().reset_index(name='ath_pct_drops')
ath_supply = supply.merge(ath, on=['parcl_id'], how='left')
pids = ath_supply.loc[(ath_supply['date']=='2024-07-01') & (ath_supply['pct_price_drops'] == ath_supply['ath_pct_drops'])]['parcl_id'].tolist()


In [14]:
top_n = supply.loc[supply['date']=='2024-07-01'].sort_values('yoy_change_in_price_drops', ascending=False).head(20)
top_n_ids = top_n['parcl_id'].tolist()
if us['parcl_id'].values[0] not in top_n_ids:
    top_n_ids.append(us['parcl_id'].values[0])

chart = supply.loc[supply['parcl_id'].isin(top_n_ids)]

In [15]:

chart = chart.merge(markets[['parcl_id', 'clean_name']], on='parcl_id')

In [None]:
max_date_for_chart = chart['date'].max().date()
max_date_for_chart = max_date_for_chart.strftime('%B %d, %Y')

# Create the line chart using Plotly Express
fig = px.line(
    chart,
    x='date',
    y='pct_price_drops',
    color='clean_name',
    line_group='clean_name',
    labels={'pct_price_drops': '% of Inventory with Price Cuts'},
    title=f'Percentage of Inventory with Price Reductions ({max_date_for_chart})'
)

# Update traces to apply specific styles
for trace in fig.data:
    if trace.name == 'USA':
        trace.update(
            line=dict(color='red', width=4),
            opacity=1
        )
    else:
        trace.update(
            line=dict(color='lightblue', dash='dash', width=2),
            opacity=0.8
        )
    # Remove text annotations from traces
    trace.update(
        mode='lines'
    )

# Find the latest date in the dataset
latest_date = max(chart['date'])

# Add annotations for each line on the far right
annotations = []
y_positions = []

for trace in fig.data:
    # Get the last y-value for each clean_name
    last_y_value = chart[
        (chart['clean_name'] == trace.name) &
        (chart['date'] == latest_date)
    ]['pct_price_drops'].values[0]
    
    # Only add the annotation if it doesn't overlap with existing annotations
    if not any(abs(last_y_value - y) < 0.01 for y in y_positions):  # Adjust threshold as needed
        annotations.append(dict(
            x=latest_date,
            y=last_y_value,
            xref='x',
            yref='y',
            text=trace.name,
            showarrow=False,
            xanchor='left',
            font=dict(size=12)  # Adjust the font size if needed
        ))
        y_positions.append(last_y_value)

fig.add_layout_image(
        create_labs_logo_dict()
)

# Update layout for axes, title, and other styling
fig.update_layout(
    width=1600,
    height=800,
    xaxis=dict(
        title='',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        # tickangle=style_config['tick_angle'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    yaxis=dict(
        title='% Price Reductions',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        tickfont=style_config['axis_font'],
        zeroline=False,
        tickformat='.0%',
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    plot_bgcolor=style_config['background_color'],
    paper_bgcolor=style_config['background_color'],
    font=dict(color=style_config['font_color']),
    showlegend=False,  # Remove the legend
    margin=dict(l=40, r=40, t=80, b=40),
    title={
        'y': 0.98,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=24)
    },
    annotations=annotations  # Add annotations
)

fig.show()


In [None]:
start_date = '2022-09-01'

supply = client.for_sale_market_metrics.for_sale_inventory.retrieve(
    parcl_ids=market_parcl_ids,
    start_date=start_date
)

new_listings = client.market_metrics.housing_event_counts.retrieve(
    parcl_ids=market_parcl_ids,
    start_date=start_date
)

# we will need to secure data from 3 separate endpoints
nc_listings = client.new_construction_metrics.housing_event_counts.retrieve(
    parcl_ids=market_parcl_ids,
    start_date=start_date
)

investor_listings = client.investor_metrics.housing_event_counts.retrieve(
    parcl_ids=market_parcl_ids,
    start_date=start_date
)

In [None]:
nc_listings.head()

In [None]:
# get investor ownership
investor_ownership = client.investor_metrics.housing_stock_ownership.retrieve(
    parcl_ids=market_parcl_ids,
    start_date=start_date
)

investor_ownership

In [20]:
# need to index supply monthly as its currently a weekly series
supply['date_month'] = supply['date'].dt.to_period('M').dt.to_timestamp()
max_weekly_date = supply.groupby(['parcl_id', 'date_month'])['date'].max().reset_index()
supply = supply.merge(max_weekly_date, on=['parcl_id', 'date_month', 'date'], how='inner') # inner join will get us the last week of each month
supply = supply.rename(columns={
    'date': 'date_arch',
    'date_month': 'date'
})
supply = supply[['date', 'parcl_id', 'for_sale_inventory']]

In [21]:
cols = ['date', 'parcl_id', 'new_listings_for_sale']
nl = new_listings[cols]
ncl = nc_listings[cols]
nil = investor_listings[cols]

In [22]:
ncl = ncl.rename(columns={
    'new_listings_for_sale': 'new_construction_listings_for_sale' 
})

nil = nil.rename(columns={
    'new_listings_for_sale': 'new_investor_listings_for_sale'
})

data = pd.merge(nl, ncl, on=['date', 'parcl_id'])
data = data.merge(nil, on=['date', 'parcl_id'])
data = pd.merge(markets[['parcl_id', 'name']], data, on='parcl_id')
data = data.merge(supply, on=['parcl_id', 'date'])
data = data.merge(investor_ownership[['date', 'parcl_id', 'pct_ownership']], on=['date', 'parcl_id'])
data['pct_ownership'] = data['pct_ownership']/100

In [None]:
data.head()

In [24]:
data['pct_new_listings_of_all'] = data['new_listings_for_sale']/data['for_sale_inventory']
data['pct_new_construction_listings_of_new'] = data['new_construction_listings_for_sale']/data['new_listings_for_sale']
data['pct_new_investor_listings_of_new'] = data['new_investor_listings_for_sale'] / data['new_listings_for_sale']
data['pct_new_construction_listings_of_all'] = data['new_construction_listings_for_sale']/data['for_sale_inventory']
data['pct_new_investor_listings_of_all'] = data['new_investor_listings_for_sale']/data['for_sale_inventory']
data['ownership_to_list_skew_new_listings'] = data['pct_new_investor_listings_of_new'] - data['pct_ownership']
data['ownership_to_list_skew_of_all'] = data['pct_new_investor_listings_of_all'] - data['pct_ownership']

In [None]:
data.loc[data['date']=='5/1/2024'].sort_values('ownership_to_list_skew_new_listings', ascending=False).head(20)

In [None]:
data.loc[data['date']=='5/1/2024'].sort_values('pct_new_construction_listings_of_new', ascending=False).head(20)

In [27]:
data['state'] = data['name'].apply(lambda x: x.split(',')[-1].strip().upper().split('-')[0])
data['clean_name'] = data.apply(lambda x: f"{x['name'].split('-')[0].split(',')[0].strip()}, {x['state']}", axis=1)
data['clean_name'] = data['clean_name'].replace({'United States Of America, UNITED STATES OF AMERICA': 'USA'})

In [28]:
def multi_market_line_chart_as_pct(
    data: pd.DataFrame,
    y: str='pct_new_construction_listings_of_new',
    x: str='date',
    title: str='Percentage of New Listings coming from New Construction',
    label: str='% of New Listings from New Construction',
    color: str='clean_name'
): 

    max_date_for_chart = data['date'].max().date()
    max_date_for_chart = max_date_for_chart.strftime('%B %d, %Y')

    # Create the line chart using Plotly Express
    fig = px.line(
        data,
        x=x,
        y=y,
        color=color,
        line_group=color,
        labels={y: label},
        title=f'{title} ({max_date_for_chart})'
    )

    # Update traces to apply specific styles
    for trace in fig.data:
        if trace.name == 'USA':
            trace.update(
                line=dict(color='red', width=4),
                opacity=1
            )
        else:
            trace.update(
                line=dict(color='lightblue', dash='dash', width=2),
                opacity=0.8
            )
        # Remove text annotations from traces
        trace.update(
            mode='lines'
        )

    # Find the latest date in the dataset
    latest_date = max(data[x])

    # Add annotations for each line on the far right
    annotations = []
    y_positions = []

    for trace in fig.data:
        # Get the last y-value for each clean_name
        last_y_value = data[
            (data[color] == trace.name) &
            (data[x] == latest_date)
        ][y].values[0]
        
        # Only add the annotation if it doesn't overlap with existing annotations
        if not any(abs(last_y_value - y) < 0.01 for y in y_positions):  # Adjust threshold as needed
            annotations.append(dict(
                x=latest_date,
                y=last_y_value,
                xref='x',
                yref='y',
                text=trace.name,
                showarrow=False,
                xanchor='left',
                font=dict(size=12)  # Adjust the font size if needed
            ))
            y_positions.append(last_y_value)

    fig.add_layout_image(
            create_labs_logo_dict()
    )

    # Update layout for axes, title, and other styling
    fig.update_layout(
        width=1600,
        height=800,
        xaxis=dict(
            title='',
            showgrid=style_config['showgrid'],
            gridwidth=style_config['gridwidth'],
            gridcolor=style_config['grid_color'],
            # tickangle=style_config['tick_angle'],
            linecolor=style_config['line_color_axis'],
            linewidth=style_config['linewidth'],
            titlefont=style_config['title_font_axis']
        ),
        yaxis=dict(
            title='% Price Reductions',
            showgrid=style_config['showgrid'],
            gridwidth=style_config['gridwidth'],
            gridcolor=style_config['grid_color'],
            tickfont=style_config['axis_font'],
            zeroline=False,
            tickformat='.0%',
            linecolor=style_config['line_color_axis'],
            linewidth=style_config['linewidth'],
            titlefont=style_config['title_font_axis']
        ),
        plot_bgcolor=style_config['background_color'],
        paper_bgcolor=style_config['background_color'],
        font=dict(color=style_config['font_color']),
        showlegend=False,  # Remove the legend
        margin=dict(l=40, r=40, t=80, b=40),
        title={
            'y': 0.98,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=24)
        },
        annotations=annotations  # Add annotations
    )

    fig.show()


In [None]:
multi_market_line_chart_as_pct(
    data=data,
    y='pct_new_construction_listings_of_new',
    x='date',
    title='Percentage of New Listings coming from New Construction',
    label='% of New Listings from New Construction',
    color='clean_name'
)


In [None]:
multi_market_line_chart_as_pct(
    data=data.loc[data['parcl_id'].isin([2899822, 5826765, 2900417, 2900128,
 2900417,
 2900213,
 2899989,
 2900192,
 2899822,
 2900041,
 2899748,
 2900229])],
    y='pct_new_investor_listings_of_new',
    x='date',
    title='Percentage of New Listings coming from Investors',
    label='% of New Listings from Investors',
    color='clean_name'
)

In [None]:
# cape coral pid: 2899822, 5826765, 2900417
data.loc[data['clean_name'].str.contains(', FL')]['parcl_id'].unique().tolist()