# Welcome to the Lab 🥼🧪
## Which metros have the highest concentration of investor ownership? 

In this notebook, we are going to create a table of the last 12 months of the total share of all homes owned by investors for the top 100 markets based on
total number of housing stock. 

Welcome to the Lab - LFG

**Note** This notebook will work with any of the 70k+ markets supported by the Parcl Labs API.

As a reminder, you can get your Parcl Labs API key [here](https://dashboard.parcllabs.com/signup) to follow along. 

To run this immediately, you can use Google Colab. Remember, you must set your `PARCL_LABS_API_KEY` as a secret. See this [guide](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) for more information.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ParclLabs/parcllabs-examples/blob/main/python/table_of_investor_concentration.ipynb)

We will be creating this table: 
![Chart](assets/all_homes_owned_by_investors.png)

In [1]:
# Environment setup
import os
import sys
import subprocess
from datetime import datetime

# Collab setup from one click above
if "google.colab" in sys.modules:
    from google.colab import userdata
    %pip install parcllabs plotly kaleido
    api_key = userdata.get('PARCL_LABS_API_KEY')
else:
    api_key = os.getenv('PARCL_LABS_API_KEY')

In [2]:
import parcllabs
import pandas as pd
import plotly.express as px
from parcllabs import ParclLabsClient

print(f"Parcl Labs Version: {parcllabs.__version__}")

Parcl Labs Version: 0.1.16


In [3]:
client = ParclLabsClient(api_key=api_key)

In [4]:
# lets get all metros in the country
metros = client.search_markets.retrieve(
    location_type='CBSA',
    as_dataframe=True,
    params={
        'limit': 1000
    }
)

In [5]:
# get the ids
metro_ids = metros['parcl_id'].tolist()

In [6]:
# get most recent housing stock for all metros
housing_stock = client.market_metrics_housing_stock.retrieve_many(
    parcl_ids=metro_ids,
    params={
        'limit': 1 # let's get the most recent stock
    },
    as_dataframe=True # make life easy on ourselves
)
housing_stock.head()

|████████████████████████████████████████| 927/927 [100%] in 2:16.5 (6.79/s) 


Unnamed: 0,date,single_family,condo,townhouse,other,all_properties,parcl_id
0,2024-03-01,2802362,957017,76608,1583688,5419675,2900187
1,2024-03-01,1997656,858838,19719,555526,3431739,2900078
2,2024-03-01,2017899,768604,123514,588914,3498931,2899845
3,2024-03-01,1921281,457709,41641,373657,2794288,2899734
4,2024-03-01,1765489,383733,33520,355410,2538152,2899967


In [7]:
# lets get the top 100 metros by housing stock
top_100 = housing_stock.sort_values('all_properties', ascending=False).head(100)
top_100.head()

Unnamed: 0,date,single_family,condo,townhouse,other,all_properties,parcl_id
0,2024-03-01,2802362,957017,76608,1583688,5419675,2900187
2,2024-03-01,2017899,768604,123514,588914,3498931,2899845
1,2024-03-01,1997656,858838,19719,555526,3431739,2900078
3,2024-03-01,1921281,457709,41641,373657,2794288,2899734
7,2024-03-01,939341,1121073,226524,284459,2571397,2900128


In [8]:
# investor ownership for these top 100 markets
investor_ownership = client.investor_metrics_housing_stock_ownership.retrieve_many(
    parcl_ids=top_100['parcl_id'].tolist(),
    params={
        'limit': 12, # get last 6 mo's 
    },
    as_dataframe=True
)

|████████████████████████████████████████| 100/100 [100%] in 16.2s (6.16/s) 


In [9]:
# we merge the stock and investor ownership data and calculate the pct ownership
out = pd.merge(metros[['parcl_id', 'name']], investor_ownership[['parcl_id',  'date', 'pct_ownership']], on='parcl_id')
out = out.drop('parcl_id', axis=1)
out['pct_ownership'] = out['pct_ownership']/100
out.head()

Unnamed: 0,name,date,pct_ownership
0,"New York-Newark-Jersey City, Ny-Nj-Pa",2024-03-01,0.0645
1,"New York-Newark-Jersey City, Ny-Nj-Pa",2024-02-01,0.0643
2,"New York-Newark-Jersey City, Ny-Nj-Pa",2024-01-01,0.0641
3,"New York-Newark-Jersey City, Ny-Nj-Pa",2023-12-01,0.0639
4,"New York-Newark-Jersey City, Ny-Nj-Pa",2023-11-01,0.0637


In [10]:
# we clean the names of the metros for better display. We define a function to do this
def format_names(nme):
    # add special case for Portland, ME
    metro = nme.replace('Portland-South Portland, Me', 'Portland Maine')
    metro = metro.split(',')[0].split('-')[0].strip()
    metro = metro.split('/')[0].strip()
    return metro
    
out['name'] = out['name'].apply(lambda x: format_names(x))
out.head(5)

Unnamed: 0,name,date,pct_ownership
0,New York,2024-03-01,0.0645
1,New York,2024-02-01,0.0643
2,New York,2024-01-01,0.0641
3,New York,2023-12-01,0.0639
4,New York,2023-11-01,0.0637


In [11]:

# we pivot the data for easy charting
charting_data = out.pivot_table(values='pct_ownership', index='name', columns='date')

In [12]:
# sort values by the most recent date
charting_data = charting_data.sort_values('2024-03-01', ascending=False)

In [13]:
# before we chart we need to import the necessary libraries and define a function to apply a diverging color schema
import plotly.graph_objects as go
import pandas as pd
import numpy as np

# Function to apply diverging color scheme
def color_scale(value):
    # Normalize value within the range [0, 0.3]
    normalized = min(max(value / 0.3, 0), 1)
    # Convert to a color in the blue scale
    # Light blue (e.g., #ADD8E6) to dark blue (e.g., #00008B)
    r = int((173 + (0 - 173) * normalized))  # R component ranges from 173 to 0
    g = int((216 + (0 - 216) * normalized))  # G component ranges from 216 to 0
    b = int((230 + (139 - 230) * normalized))  # B component ranges from 230 to 139
    return f'rgb({r},{g},{b})'

# Date formatting function for headers
def format_date(date):
    date = datetime.strptime(date, '%Y-%m-%d')
    return date.strftime('%b `' + date.strftime('%y'))  # Formats like "Jan '23"

dates = charting_data.columns.tolist()

In [15]:
# set up the parameters for the visualization

# labs_logo_dict setup
labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white.png'
}

labs_logo_dict = dict(
    source=labs_logo_lookup['white'],
    xref="paper",
    yref="paper",
    x=0.5,  # Centering the logo below the title
    y=1,  # Adjust this value to position the logo just below the title
    sizex=0.15,
    sizey=0.1,
    xanchor="center",
    yanchor="bottom"
)

# Creating the table
fig = go.Figure(
    data=[go.Table(
        header=dict(
            values=['<b>Market</b>'] + [f"<b>{format_date(date)}</b>" for date in dates],
            fill_color='#080D16',
            font=dict(color='#FFFFFF', size=12),
            align='center',
            height=30
        ),
        cells=dict(
            values=[[f"{name}" for name in charting_data.index]] + 
                   [charting_data[date].apply(lambda x: f"{x:.2%}") for date in dates],
            fill=dict(
                color=[
                    ['#080D16']*len(charting_data.index)] +  # Keep Market column unicolor
                    [[color_scale(charting_data.at[market, date]) for market in charting_data.index] for date in dates]
            ),
            font=dict(
                color='#FFFFFF',
                size=[11 if i == 0 else 11 for i in range(len(dates) + 1)]  # Increase font size for 'Market' column
            ),
            align='center',
            height=30
        ))
])

# Add logo below the title
fig.add_layout_image(labs_logo_dict)

# Adjust table layout size and title
fig.update_layout(
    title={
        'text': 'Percent of All Homes Owned by Investors by Top 100 Metro Areas',
        'y': 0.98,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font_color='#FFFFFF',
    width=1400,  # Adjust width as needed
    height=2000,
    paper_bgcolor='#080D16',
    margin=dict(l=10, r=10, t=120, b=10)  # Increased top margin to accommodate the logo
)

# Show the figure
fig.show()
