# Welcome to the Lab 🥼🧪

## Housing Mix Analysis

Why does housing mix matter? The types of units trading varies by market and can provide insights into the market's health. This analysis will look at the housing mix of a market and compare it to the national average. The [Parcl Labs Price Feeds](https://www.parcllabs.com/articles/parcl-labs-price-feed-whitepaper) is the price per square foot of units trading on a market. Understanding the mix and the variation in mix over time can provide insight into where prices could go. 

**Note** This notebook will work with any of the 70k+ markets supported by the Parcl Labs API.

As a reminder, you can get your Parcl Labs API key [here](https://dashboard.parcllabs.com/signup) to follow along. 

To run this immediately, you can use Google Colab. Remember, you must set your `PARCL_LABS_API_KEY` as a secret. See this [guide](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) for more information.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ParclLabs/parcllabs-examples/blob/main/python/traders/housing_mix_analysis.ipynb)

In [None]:
# Environment setup
import os
import sys
import subprocess
from datetime import datetime

# Collab setup from one click above
if "google.colab" in sys.modules:
    from google.colab import userdata
    %pip install parcllabs plotly kaleido numpy
    !git clone https://github.com/ParclLabs/parcllabs-examples.git
    sys.path.append('/content/parcllabs-examples/python/')
    api_key = userdata.get('PARCL_LABS_API_KEY')
else:
    api_key = os.getenv('PARCL_LABS_API_KEY')
    cur_dir = os.getcwd()
    chart_dir = os.path.join(cur_dir, '..')
    sys.path.append(chart_dir)

In [None]:
import parcllabs
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from parcllabs import ParclLabsClient
from charting.utils import create_labs_logo_dict, format_metro_names

print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
# init client
client = ParclLabsClient(api_key=api_key)

In [None]:
# Get top 100 CBSAs by population
markets = client.search_markets.retrieve(
    as_dataframe=True,
    sort_by='PARCL_EXCHANGE_MARKET',
    sort_order='DESC',
    params={
        'limit': 14
    }
)

miami = client.search_markets.retrieve(
    parcl_id=5352987,
    as_dataframe=True
)

markets = pd.concat([markets, miami])
markets

In [None]:
def clean_names(nme):
    replace = {
        'Washington City': 'D.C.',
        'United States Of America': 'USA',
        'New York City': 'NYC',
        'Kings County': 'Brooklyn, NY',
    }
    if nme in replace.keys():
        return replace[nme]
    else:
        return nme
    
markets['name'] = markets['name'].apply(clean_names)
markets

In [None]:
feeds = client.price_feed.retrieve_many(
        parcl_ids=markets['parcl_id'].tolist(),
        start_date='2019-01-01',
        as_dataframe=True,
        params={'limit': 1000},  # expand the limit to 1000, these are daily series
        auto_paginate=True, # auto paginate to get all the data - WARNING: ~6k credits can be used in one parcl price feed. Change the START_DATE to a more recent date to reduce the number of credits used
)

feeds.head()

In [None]:
# get the current housing stock

stock = client.market_metrics_housing_stock.retrieve_many(
    parcl_ids=markets['parcl_id'].tolist(),
    as_dataframe=True,
    params={
        'limit': 300 # get current state
    }
)

In [None]:
stock['pct_single_family'] = stock['single_family'] / stock['all_properties'] 
stock['pct_condo'] = stock['condo'] / stock['all_properties'] 
stock['pct_townhouse'] = stock['townhouse'] / stock['all_properties'] 
stock['pct_other'] = stock['other'] / stock['all_properties'] 

In [None]:
# join name
df = stock.merge(markets[['name', 'parcl_id']], on='parcl_id')
df['name'] = df['name'].apply(lambda x: format_metro_names(x, include_state=True))


In [None]:
def build_pie_chart(
        data,
        market_name: str,
        save_path: str = None
):
    # Prepare data for the pie chart
    labels = ['Single Family', 'Condo', 'Townhouse', 'Other']
    values = [data['single_family'].values[0], data['condo'].values[0], data['townhouse'].values[0], data['other'].values[0]]

    # Create pie chart
    fig = go.Figure(data=[go.Pie(
        labels=labels,
        values=values,
        textinfo='label+percent',
        textposition='outside',
        pull=[0.05, 0.05, 0.05, 0.05],
        marker=dict(
            colors=['#636EFA', '#EF553B', '#00CC96', '#AB63FA'],
            line=dict(color='#FFFFFF', width=2)
        ),
        insidetextorientation='radial'
    )])

    # load assets
    labs_logo_dict = create_labs_logo_dict(
        src='labs',
        y=1.015,
        sizex=0.3,
        sizey=0.3
    )

    # Add Parcl Labs logo
    fig.add_layout_image(
        labs_logo_dict
    )

    # Add title and layout adjustments
    fig.update_layout(
        title_text=f'Housing Stock Mix: {market_name}',
        title_font_size=24,
        title_x=0.5,
        annotations=[dict(
            text=f'Total Units: {data["all_properties"].values[0]:,}',
            x=0.5,
            y=-0.1,
            showarrow=False,
            font=dict(size=14, color='white')
        )],
        showlegend=False,
        plot_bgcolor='#222222',
        paper_bgcolor='#222222',
        font=dict(color='white')
    )

    # Add hover info
    fig.update_traces(
        hoverinfo='label+percent+value',
        textfont_size=14
    )

    # Adjust figure size
    fig.update_layout(
        autosize=False,
        width=800,
        height=800
    )

    # Save the figure
    if save_path:
        fig.write_image(save_path)

    # Show the figure
    fig.show()

In [None]:
prop_types = [
    'SINGLE_FAMILY',
    'CONDO',
    'TOWNHOUSE',
    'ALL_PROPERTIES'
]

alldf = []

for ptype in prop_types:

    mix = client.market_metrics_housing_event_counts.retrieve_many(
        parcl_ids=markets['parcl_id'].tolist(),
        as_dataframe=True,
        property_type=ptype,
        params={
            'limit': 300
        }
    )

    mix['property_type'] = ptype.lower()
    alldf.append(mix[['parcl_id', 'date', 'sales', 'property_type']])


In [None]:
chart = pd.concat(alldf[0:3])
chart = chart.merge(alldf[-1][['parcl_id', 'date', 'sales']].rename(columns={'sales': 'all_sales'}), on=['parcl_id', 'date'])
chart

In [None]:
# benchmark sfh's vs. all other units
sfh = chart.loc[chart['property_type']=='single_family']
all_other = sfh.copy(deep=True)
all_other['sales'] = all_other['all_sales']-all_other['sales']
all_other['property_type'] = 'All Other Units'
chart = pd.concat([sfh, all_other])
chart['property_type'] = chart['property_type'].replace({'single_family': 'Single Family'})

In [None]:
chart['pct_sales'] = chart['sales'] / chart['all_sales']
chart = chart.merge(markets[['name', 'parcl_id']], on='parcl_id')


In [None]:
# load assets
labs_logo_dict = create_labs_logo_dict(
    src='labs',
    y=1.0,
    sizex=0.15,
    sizey=0.15,
    x=0.47
)

In [None]:
import plotly.graph_objects as go
import pandas as pd

# Define the function to build the line chart
def build_line_chart(
    data: pd.DataFrame,
    title: str = None,
    x_axis_title: str = None,
    y_axis_title: str = None,
    save_path: str = None,
    value_name: str = None
):
    HEIGHT = 900
    WIDTH = 1600
    
    fig = go.Figure()

    # Define colors for each property type
    colors = {
        'Single Family': '#636EFA',  # Blue
        'All Other Units': '#EF553B',  # Orange
        'Townhouse': '#00CC96'       # Green
    }

    # Plot each property type
    for property_type in data['property_type'].unique():
        subset = data[data['property_type'] == property_type]
        fig.add_trace(go.Scatter(
            x=subset['date'],
            y=subset[value_name]*100,
            mode='lines+markers',
            name=property_type,
            line=dict(width=2, color=colors.get(property_type, '#FFFFFF')),  # Default to white if no color found
            marker=dict(size=6)
        ))

    # Add vertical dotted line
    fig.add_shape(
        dict(
            type="line",
            x0="2020-03-01",
            y0=0,
            x1="2020-03-01",
            y1=1,
            xref='x',
            yref='paper',
            line=dict(
                color="Red",
                width=2,
                dash="dot",
            )
        )
    )

    # Add annotation for the vertical line
    fig.add_annotation(
        dict(
            x="2020-03-01",
            y=1,
            xref='x',
            yref='paper',
            xanchor='left',  # Position the text to the right of the line
            xshift=10,  # Shift the text slightly to the right
            text="COVID Starts",
            showarrow=False,
            font=dict(
                size=12,
                color="Red"
            ),
            align="center"
        )
    )

    # Add logo image
    fig.add_layout_image(
        labs_logo_dict
    )
    
    fig.update_layout(
        margin=dict(l=20, r=20, t=110, b=20),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': title,
            'y': 0.95,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#222222',  # Dark background for better contrast
        paper_bgcolor='#222222',  # Dark background for the paper
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text=x_axis_title,
            showgrid=True,
            gridwidth=0.5,  # Vertical grid line width
            gridcolor='rgba(255, 255, 255, 0.1)',  # Vertical grid line color with opacity
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1  # Axis line width
        ),
        yaxis=dict(
            title_text=y_axis_title,
            showgrid=True,
            gridwidth=0.5,  # Horizontal grid line width
            gridcolor='rgba(255, 255, 255, 0.2)',  # Horizontal grid line color with opacity
            tickfont=dict(size=14),
            tickprefix='',  # Remove dollar sign prefix
            ticksuffix='%',  # Add percentage suffix
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1  # Axis line width
        ),
        hovermode='x unified',  # Unified hover mode for better interactivity
        hoverlabel=dict(
            bgcolor='#333333',
            font_size=14,
            font_family="Rockwell"
        ),
        legend=dict(
            x=0.95,  # Position legend in the top right corner
            y=0.95,
            xanchor='right',
            yanchor='top',
            font=dict(size=14, color='#FFFFFF'),
            bgcolor='rgba(0, 0, 0, 0.5)'
        )
    )

    if save_path:
        fig.write_image(save_path, width=WIDTH, height=HEIGHT)
    
    # Show the plot
    fig.show()

# Define the function to build the dual-axis chart with a stacked bar
def demand_vs_pf_chart(
    market_name: str, 
    data: pd.DataFrame,
    sales_data: pd.DataFrame,
    price_series: str = 'price_feed',
    single_family_series: str = 'sales',
    all_other_units_series: str = 'all_other_sales',
    save_path: str = None,
    title: str = None,
    yaxis1_title: str = None,
    yaxis2_title: str = None
):

    HEIGHT = 900
    WIDTH = 1600
    
    fig = go.Figure()

    # Add bar trace for Single Family Purchases first (bottom of the stack)
    fig.add_trace(go.Bar(
        x=sales_data['date'],
        y=sales_data[single_family_series],
        marker=dict(color='#636EFA', opacity=0.7),
        name='Single Family Purchases',
        yaxis='y2'
    ))

    # Add bar trace for All Other Unit Purchases second (top of the stack)
    fig.add_trace(go.Bar(
        x=sales_data['date'],
        y=sales_data[all_other_units_series],
        marker=dict(color='#EF553B', opacity=0.7),
        name='All Other Unit Purchases',
        yaxis='y2'
    ))

    # Add primary y-axis trace for price series
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data[price_series],
        mode='lines+markers',
        line=dict(width=4, color='#00CC96'),  # Solid green color for price series
        marker=dict(size=6),
        name=yaxis1_title
    ))

    # Reorder traces so the line is in front
    fig.data = fig.data[::-1]

    # Add logo image
    fig.add_layout_image(
        labs_logo_dict
    )
    
    fig.update_layout(
        margin=dict(l=20, r=20, t=110, b=20),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': title,
            'y': 0.95,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#222222',
        paper_bgcolor='#222222',
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.1)',
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis=dict(
            title_text=yaxis1_title,
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.2)',
            tickfont=dict(size=14),
            tickprefix='$',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis2=dict(
            title_text=yaxis2_title,
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.2)',
            tickfont=dict(size=14),
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1,
            overlaying='y',
            side='right',
            ticksuffix=' units'
        ),
        hovermode='x unified',
        hoverlabel=dict(
            bgcolor='#333333',
            font_size=14,
            font_family="Rockwell"
        ),
        legend=dict(
            x=0.95,  # Position legend in the top right corner
            y=0.95,
            xanchor='right',
            yanchor='top',
            font=dict(size=14, color='#FFFFFF'),
            bgcolor='rgba(0, 0, 0, 0.5)'
        ),
        barmode='stack'  # Stack the bars
    )

    if save_path:
        fig.write_image(save_path, width=WIDTH, height=HEIGHT)
    fig.show()

def create_housing_mix_tweet(
    name: str,
    pie_chart: pd.DataFrame
):
    pct_single_family = pie_chart['pct_single_family'].values[0]
    pct_condo = pie_chart['pct_condo'].values[0]
    pct_other  = pie_chart['pct_other'].values[0]
    pct_townhouse = pie_chart['pct_townhouse'].values[0]
    housing_mix_tweet = f"""
    {name} Current Housing Mix

    Single Family: {pct_single_family:.2%}
    Condo: {pct_condo:.2%}
    Townhouse: {pct_townhouse:.2%}
    Other: {pct_other:.2%}
    """

    return housing_mix_tweet

def create_sfh_sales_mix_tweet(
    name: str,
    line_chart_data: pd.DataFrame
):
    line_chart_data = line_chart_data.loc[line_chart_data['property_type'] == 'Single Family'].sort_values('date')
    current_sfh_sale_ratio = line_chart_data['pct_sales'].values[-1]
    pct_change_sfh_sale_ratio_6_mo = (line_chart_data['pct_sales'].values[-1] - line_chart_data['pct_sales'].values[-6])/line_chart_data['pct_sales'].values[-6]
    pct_change_sfh_sale_ratio_yoy = (line_chart_data['pct_sales'].values[-1] - line_chart_data['pct_sales'].values[-12])/line_chart_data['pct_sales'].values[-12]
    pct_change_sfh_sale_ratio_19 = (line_chart_data['pct_sales'].values[-1] - line_chart_data['pct_sales'].values[0])/line_chart_data['pct_sales'].values[0]

    sfh_sales_tweet = f"""
    {name} SFH vs. All Other Unit Sales

    Current SFH Sale %: {current_sfh_sale_ratio:.2%}
    % Change (6 mo): {pct_change_sfh_sale_ratio_6_mo:.2%}
    % Change (YoY): {pct_change_sfh_sale_ratio_yoy:.2%}
    % Change (Since `19): {pct_change_sfh_sale_ratio_19:.2%}
    """

    return sfh_sales_tweet

def create_sales_vol_pf_tweet(
        name: str,
        pf: pd.DataFrame, 
        counts_data: pd.DataFrame
):
    # daily series
    pf = pf.sort_values('date')
    pf_pct_change_30_day = (pf['price_feed'].values[-1]-pf['price_feed'].values[-30])/pf['price_feed'].values[-30]
    pf_pct_change_6_mo = (pf['price_feed'].values[-1]-pf['price_feed'].values[-180])/pf['price_feed'].values[-180]
    pf_pct_change_yoy = (pf['price_feed'].values[-1]-pf['price_feed'].values[-365])/pf['price_feed'].values[-365]
    pf_pct_change_since_2019 = (pf['price_feed'].values[-1]-pf['price_feed'].values[0])/pf['price_feed'].values[0]

    # monthly series
    demand_sfh = counts_data.sort_values('date')
    demand_sfh_pct_change_30_day = (demand_sfh['sales'].values[-1]-demand_sfh['sales'].values[-2])/demand_sfh['sales'].values[-2]
    demand_sfh_pct_change_6_mo = (demand_sfh['sales'].values[-1]-demand_sfh['sales'].values[-6])/demand_sfh['sales'].values[-6]
    demand_sfh_pct_change_yoy = (demand_sfh['sales'].values[-1]-demand_sfh['sales'].values[-12])/demand_sfh['sales'].values[-12]
    demand_sfh_pct_change_since_2019 = (demand_sfh['sales'].values[-1]-demand_sfh['sales'].values[0])/demand_sfh['sales'].values[0]

    sales_volume_pf_tweet = f"""
    {name} Sales Volume vs. PPSQF

    % Change (30 Day)
    PPSQF: {pf_pct_change_30_day:.2%}
    SFH Sales: {demand_sfh_pct_change_30_day:.2%}

    % Change (6 Mo)
    PPSQF: {pf_pct_change_6_mo:.2%}
    SFH Sales: {demand_sfh_pct_change_6_mo:.2%}

    % Change (YoY)
    PPSQF: {pf_pct_change_yoy:.2%}
    SFH Sales: {demand_sfh_pct_change_yoy:.2%}

    % Change (Since `19)
    PPSQF: {pf_pct_change_since_2019:.2%}
    SFH Sales: {demand_sfh_pct_change_since_2019:.2%}
    """
    return sales_volume_pf_tweet


In [None]:
pid = 5374321

pf = feeds.loc[feeds['parcl_id'] == pid]
pie_chart = df.loc[(df['parcl_id']==pid) & (df['date']=='2024-04-01')]
line_chart_data = chart.loc[chart['parcl_id'] == pid]
counts_data = line_chart_data.copy(deep=True)
counts_data['all_other_sales'] = counts_data['all_sales'] - counts_data['sales']
counts_data = counts_data.loc[counts_data['property_type'] == 'Single Family']
name = line_chart_data['name'].values[0]
housing_mix_tweet = create_housing_mix_tweet(name=name, pie_chart=pie_chart)
sfh_sales_tweet = create_sfh_sales_mix_tweet(name=name, line_chart_data=line_chart_data)
sales_volume_pf_tweet = create_sales_vol_pf_tweet(name=name, pf=pf, counts_data=counts_data)

# Example call to the function
demand_vs_pf_chart(
    name, 
    pf, 
    counts_data,
    price_series='price_feed',
    single_family_series='sales',
    all_other_units_series='all_other_sales',
    yaxis1_title='Price per Square Foot ($)',
    yaxis2_title='Total Number of Purchases',
    title=f'{name} Price per Square Foot and Total Number of Sales by Property Type',
    save_path=f'../graphics/housing_mix_analysis/{name}_purchases_by_pf.png'
)

In [None]:
# Example usage
# Assuming you have a DataFrame named 'chart' with columns 'parcl_id', 'property_type', 'date', 'pct_sales', and 'name'
for pid in chart['parcl_id'].unique():
    pf = feeds.loc[feeds['parcl_id'] == pid]
    pie_chart = df.loc[(df['parcl_id']==pid) & (df['date']=='2024-04-01')]
    line_chart_data = chart.loc[chart['parcl_id'] == pid]
    counts_data = line_chart_data.copy(deep=True)
    counts_data['all_other_sales'] = counts_data['all_sales'] - counts_data['sales']
    counts_data = counts_data.loc[counts_data['property_type'] == 'Single Family']
    name = line_chart_data['name'].values[0]
    housing_mix_tweet = create_housing_mix_tweet(name=name, pie_chart=pie_chart)
    sfh_sales_tweet = create_sfh_sales_mix_tweet(name=name, line_chart_data=line_chart_data)
    sales_volume_pf_tweet = create_sales_vol_pf_tweet(name=name, pf=pf, counts_data=counts_data)
    
    

    print(housing_mix_tweet)
    build_pie_chart(
        data=pie_chart,
        market_name=name,
        save_path=f'../graphics/housing_mix_analysis/{name}_housing_stock_mix.png'
    )
    print(sfh_sales_tweet)
    build_line_chart(
        data=chart.loc[chart['parcl_id']==pid],
        title=f'{name} Sales Mix: Single Family vs. All Other Units',
        x_axis_title='',
        y_axis_title='Percentage of Total Sales',
        value_name='pct_sales',
        save_path=f'../graphics/housing_mix_analysis/{name}_home_sales_mix.png'
)
    print(sales_volume_pf_tweet)
    # Example call to the function
    demand_vs_pf_chart(
        name, 
        pf, 
        counts_data,
        price_series='price_feed',
        single_family_series='sales',
        all_other_units_series='all_other_sales',
        yaxis1_title='Price per Square Foot ($)',
        yaxis2_title='Total Number of Purchases',
        title=f'{name} Price per Square Foot and Total Number of Sales by Property Type',
        save_path=f'../graphics/housing_mix_analysis/{name}_purchases_by_pf.png'
    )
    