# Welcome to the Lab 🥼🧪
## Which metros have the highest share of mom and pop ownership? Institutional Ownership? 

**Note** This notebook will work with any of the 70k+ markets supported by the Parcl Labs API.

As a reminder, you can get your Parcl Labs API key [here](https://dashboard.parcllabs.com/signup) to follow along. 

To run this immediately, you can use Google Colab. Remember, you must set your `PARCL_LABS_API_KEY` as a secret. See this [guide](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) for more information.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ParclLabs/parcllabs-examples/blob/main/python/table_of_mom_and_pop_vs_institutions_ownership.ipynb)

We will be creating this table: 

![Chart](assets/top100_metros_mom_and_pops.png)

In [None]:
# Environment setup
import os
import sys
import subprocess
from datetime import datetime

# Collab setup from one click above
if "google.colab" in sys.modules:
    from google.colab import userdata
    %pip install parcllabs plotly kaleido
    api_key = userdata.get('PARCL_LABS_API_KEY')
else:
    api_key = os.getenv('PARCL_LABS_API_KEY')

In [None]:
import parcllabs
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from parcllabs import ParclLabsClient

print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
# instantiate the client
client = ParclLabsClient(api_key=api_key, limit=1)

In [None]:
# lets get all metros in the country
metros = client.search_markets.retrieve(
    location_type='CBSA',
    as_dataframe=True,
    params={
        'limit': 100
    },
    sort_by='TOTAL_POPULATION',
    sort_order='DESC'
)

In [None]:
# get the ids
metro_ids = metros['parcl_id'].tolist()
len(metro_ids)

In [None]:
# get most recent housing stock for all metros
# lets retrieve the portfolio size ownership metrics
ports = client.portfolio_metrics_sf_housing_stock_ownership.retrieve_many(
    parcl_ids=metro_ids,
    as_dataframe=True,
    
)

ports.head()

In [None]:
# grab columns we want
out = pd.merge(ports, metros, on='parcl_id')
cols = [
    'name',
    'pct_sf_housing_stock_portfolio_2_to_9', 
    'pct_sf_housing_stock_portfolio_10_to_99', 
    'pct_sf_housing_stock_portfolio_100_to_999',
    'pct_sf_housing_stock_portfolio_1000_plus',
    'pct_sf_housing_stock_all_portfolios'
]
out = out[cols]

# divide by 100 for charting
# out['pct_sf_housing_stock_portfolio_2_to_9'] = out['pct_sf_housing_stock_portfolio_2_to_9']/100
# out['pct_sf_housing_stock_portfolio_10_to_99'] = out['pct_sf_housing_stock_portfolio_10_to_99']/100
# out['pct_sf_housing_stock_portfolio_100_to_999'] = out['pct_sf_housing_stock_portfolio_100_to_999']/100
# out['pct_sf_housing_stock_portfolio_1000_plus'] = out['pct_sf_housing_stock_portfolio_1000_plus']/100
# out['pct_sf_housing_stock_all_portfolios'] = out['pct_sf_housing_stock_all_portfolios']/100

In [None]:
# define a function to clean up the official names and make them easier to read
def format_names(nme):
    metro = nme.split(',')[0].split('-')[0].strip()
    metro = metro.split('/')[0].strip()
    return metro

out['name'] = out['name'].apply(lambda x: format_names(x))

out.index = out.name
out = out.drop('name', axis=1)

In [None]:
# Define a function to scale the color based on the value
def color_scale(value):
    # Normalize the value to the new range [0, 2] with a midpoint at 1
    if value < 2:
        # Scale from light blue to blue as value approaches 1 from 0
        normalized = value  # No need to normalize since it's already 0 to 1
        r = int(173 + (65 - 173) * normalized)  # Decrease red channel towards darker blue
        g = int(216 + (105 - 216) * normalized)  # Decrease green channel towards darker blue
        b = int(230 + (225 - 230) * normalized)  # Slightly increase blue channel towards 225
    else:
        # Scale from blue to dark blue as value goes from 1 to 2
        normalized = (value - 1) / 1  # Normalize the upper half from 0 to 1
        r = int(65 + (0 - 65) * normalized)  # Decrease red channel towards darkest blue
        g = int(105 + (0 - 105) * normalized)  # Decrease green channel towards darkest blue
        b = int(225 + (139 - 225) * normalized)  # Decrease blue channel towards darker blue

    # Ensure RGB values are within the valid range
    r = max(0, min(255, r))
    g = max(0, min(255, g))
    b = max(0, min(255, b))

    return f'rgb({r},{g},{b})'

In [None]:
charting_data = out.copy(deep=True)
charting_data = charting_data.sort_values('pct_sf_housing_stock_portfolio_1000_plus', ascending=False)

In [None]:
# Define the function to scale the color based on the value
def color_scale(value):
    if value < 2:
        normalized = value / 2
        r = int(173 + (65 - 173) * normalized)
        g = int(216 + (105 - 216) * normalized)
        b = int(230 + (225 - 230) * normalized)
    elif value < 4:
        normalized = (value - 2) / 2
        r = int(65 + (0 - 65) * normalized)
        g = int(105 + (0 - 105) * normalized)
        b = int(225 + (139 - 225) * normalized)
    else:
        r, g, b = 0, 0, 139
    r = max(0, min(255, r))
    g = max(0, min(255, g))
    b = max(0, min(255, b))
    return f'rgb({r},{g},{b})'

# Prepare data and colors for the table
colors = [[] for _ in range(len(charting_data.columns))]
formatted_data = []

for i, col in enumerate(charting_data.columns):
    formatted_column = []
    color_column = []
    for value in charting_data[col]:
        # Formatting all values as bold percentages
        formatted_column.append(f"<b>{value:.2f}%</b>")
        # Apply color scale only to columns other than 'All Portfolios'
        if col != 'pct_sf_housing_stock_all_portfolios':
            color_column.append(color_scale(value))
        else:
            color_column.append("#080D16")  # Set 'All Portfolios' background to match the main background color
    formatted_data.append(formatted_column)
    colors[i] = color_column

# Define headers and table layout
column_headers = ['2 to 9 Units', '10 to 99 Units', '100 to 999 Units', '1000+ Units', 'All Portfolios']

fig = go.Figure(data=[go.Table(
    header=dict(values=['<b>Market</b>'] + [f"<b>{header}</b>" for header in column_headers],
                fill_color='#080D16',
                font=dict(color='#FFFFFF', size=12),
                align='center',
                height=30),
    cells=dict(values=[charting_data.index.tolist()] + formatted_data,
               fill=dict(color=[['#080D16']*len(charting_data)] + colors),
               font=dict(color='#FFFFFF', size=11),
               align='center',
               height=30)
)])

# Add the logo image
labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}
labs_logo_dict = dict(
    source=labs_logo_lookup['white'],
    xref="paper",
    yref="paper",
    x=0.5,
    y=1.01,
    sizex=0.3,
    sizey=0.2,
    xanchor="center",
    yanchor="bottom"
)
fig.add_layout_image(labs_logo_dict)

# Update layout and display the figure
fig.update_layout(
    title={
        'text': 'Percent of Single Family Homes owned by National Portfolio Size',
        'y': 0.98,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font_color='#FFFFFF',
    width=850,
    height=1600,
    paper_bgcolor='#080D16',
    margin=dict(l=10, r=10, t=120, b=10)
)
fig.show()


In [None]:
for index, row in out.sort_values('pct_sf_housing_stock_portfolio_2_to_9', ascending=False).head(20).iterrows():
    name = index
    institution = row['pct_sf_housing_stock_portfolio_1000_plus']
    mom = row['pct_sf_housing_stock_portfolio_2_to_9']
    print(f"{name}: {mom}% Owned")