In [1]:
# Environment setup
import os
import sys
import json
import subprocess
from datetime import datetime
from urllib.request import urlopen

# Collab setup from one click above
if "google.colab" in sys.modules:
    from google.colab import userdata
    %pip install parcllabs plotly kaleido
    api_key = userdata.get('PARCL_LABS_API_KEY')
else:
    api_key = os.getenv('PARCL_LABS_API_KEY')

In [2]:
import parcllabs
import pandas as pd
import plotly.express as px
from parcllabs import ParclLabsClient

print(f"Parcl Labs Version: {parcllabs.__version__}")

Parcl Labs Version: 0.2.1


In [3]:
client = ParclLabsClient(api_key=api_key)

In [4]:
# lets get all metros in the country
counties_df = client.search_markets.retrieve(
    location_type='COUNTY',
    as_dataframe=True,
    params={
        'limit': 1000
    },
    auto_paginate=True
)

In [5]:
# get the ids
county_ids = counties_df['parcl_id'].tolist()
len(county_ids)

3087

In [6]:
# How big are the investors? Let's find out
df = client.portfolio_metrics_sf_housing_stock_ownership.retrieve_many(
    parcl_ids=county_ids,
    as_dataframe=True,
    params={
        'limit': 1
    }
)

|████████████████████████████████████▏⚠︎  | (!) 2790/3087 [90%] in 8:01.1 (5.80/s) 


  return pd.concat(out).reset_index(drop=True)


In [7]:
df.head()

Unnamed: 0,date,count_portfolio_2_to_9,count_portfolio_10_to_99,count_portfolio_100_to_999,count_portfolio_1000_plus,count_all_portfolios,pct_sf_housing_stock_portfolio_2_to_9,pct_sf_housing_stock_portfolio_10_to_99,pct_sf_housing_stock_portfolio_100_to_999,pct_sf_housing_stock_portfolio_1000_plus,pct_sf_housing_stock_all_portfolios,parcl_id
0,2024-04-01,136492,7955,828,4073,149348,9.51,0.55,0.06,0.28,10.41,5821714
1,2024-04-01,33892,5499,1411,5100,45902,4.16,0.67,0.17,0.63,5.63,5822729
2,2024-04-01,91817,12556,1490,15509,121372,8.6,1.18,0.14,1.45,11.37,5821868
3,2024-04-01,117191,12116,1937,28714,159958,10.23,1.06,0.17,2.51,13.97,5823191
4,2024-04-01,56000,2210,39,352,58601,9.76,0.39,0.01,0.06,10.21,5822624


In [11]:
tmp = df.loc[(df['date']=='2024-04-01')]
out = pd.merge(tmp[['parcl_id', 'count_portfolio_1000_plus', 'pct_sf_housing_stock_portfolio_1000_plus']], counties_df[['parcl_id', 'geoid', 'name', 'state_abbreviation', 'total_population', 'median_income']], on='parcl_id', how='right')
out['pct_sf_housing_stock_portfolio_1000_plus'] = out['pct_sf_housing_stock_portfolio_1000_plus']/100

In [12]:
out = out.rename(columns={'geoid': 'id'})
out = out.fillna(0)
out.to_csv('/Users/jasonlewris/repos/viz/maps/institutional_ownership/large_ownership.csv', index=False)

  out = out.fillna(0)


In [10]:
out.head(1)

Unnamed: 0,parcl_id,count_portfolio_1000_plus,pct_sf_housing_stock_portfolio_1000_plus,id,name,state_abbreviation,total_population,median_income
0,5821714,0,0.0,6037,Los Angeles County,CA,9936690,83411.0


In [None]:
# lets create the map
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

In [None]:
# rename geoid to fips
out = out.rename(columns={'id': 'fips'})
out.head()

In [None]:
all_fips_ids = [i['id'] for i in counties['features']]
len(all_fips_ids)

In [None]:
# fill holes with no data
map_ids = out['fips'].unique().tolist()
missing = [i for i in all_fips_ids if i not in map_ids]
missing_df = pd.DataFrame({'fips': missing})
missing_df['counts'] = 0
missing_df['pct_sf_for_sale_market'] = 0


out = pd.concat([out, missing_df])
out

In [None]:
labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}

# set charting constants
labs_logo_dict = dict(
        source=labs_logo_lookup['white'],
        xref="paper",
        yref="paper",
        x=0.5,  # Centering the logo below the title
        y=1.05,  # Adjust this value to position the logo just below the title
        sizex=0.15, 
        sizey=0.15,
        xanchor="center",
        yanchor="bottom"
)

media_img_size_lookup = {
    'X': {
        'width': 1600,
        'height': 900
    }
}

PLATFORM = 'X'

# set image sizes
IMG_WIDTH = media_img_size_lookup[PLATFORM]['width']
IMG_HEIGHT = media_img_size_lookup[PLATFORM]['height']

# plotting title settings
PLOT_TITLE_SETTINGS = {
        'y':0,
        'x':0,
        'xanchor': 'left',
        'yanchor': 'bottom'
    }

In [None]:
# df['pct_sf_housing_stock_portfolio_1000_plus'] = df['pct_sf_housing_stock_portfolio_1000_plus']+100

In [None]:
# df.head()
# out['pct_sf_for_sale_market'] = out['pct_sf_for_sale_market']*100

In [None]:
import plotly.express as px
import pandas as pd
import os

# Assuming 'df', 'out', 'counties', and 'labs_logo_lookup' are properly configured

# Define the color scale
color_continuous_scale=[
    (0, "#FFFFFF"),
    (0.5, "#B0CFFF"),  
    (1, "#04428C") 
]

# Set your Mapbox access token
mapbox_access_token = os.getenv('MBOX_ACCESS_TOKEN')

px.set_mapbox_access_token(mapbox_access_token)

# Create the choropleth map
fig = px.choropleth_mapbox(out,
                           geojson=counties, 
                           locations='fips', 
                           color='pct_sf_for_sale_market',
                           color_continuous_scale=color_continuous_scale,
                           range_color=(5, 20),
                           mapbox_style="mapbox://styles/mapbox/dark-v9",
                           zoom=6,
                           center={"lat": 32.3846, "lon": -83.2554},
                           opacity=0.9,  # Set opacity for the map
                           labels={'pct_sf_for_sale_market':"% of SFH's Listed"}
                          )

# Adjust color bar and title text color
fig.update_coloraxes(colorbar=dict(
    title="% of New SFH's Listed",
    tickfont=dict(color='white', size=12),
    title_font=dict(color='white', size=14),  # Title font size set to 14
    y=-0.01, # Color bar y position (below the map)
    len=0.35, # Length of the color bar (35% of the horizontal dimension)
    orientation='h',  # Horizontal color bar
    thickness=15,  # Thickness of the color bar
    xanchor='center',  # Center the color bar horizontally
))

fig.add_layout_image(dict(
     source=labs_logo_lookup['white'],
     xref="paper",
     yref="paper",
     x=0.5,
     y=1.05,
     sizex=0.15, 
     sizey=0.15,
     xanchor="center",
     yanchor="top"
))

# Update the layout for background color, margins, and figure dimensions
fig.update_layout(
    title="🏠 % of all New SFH's Listed coming from Mom and Pops",
    title_x=0.5,  # Center the title
    title_font=dict(color='white', size=20),  # Title font size set to 20
    paper_bgcolor='#080D16',  # Background color of the paper
    geo_bgcolor='#080D16',  # Background color of the map
    margin={"r":0, "t":100, "l":0, "b":0},  # Adjust top margin to give space for the title
    width=1600,
    height=900,
    mapbox=dict(
        layers=[
            {
                'source': counties,
                'type': 'fill',
                'below': 'traces',
                'opacity': 1,
                'color': 'rgba(255,255,255,0.5)',  # Adjust the fill color and opacity
            }
        ],
        style='mapbox://styles/mapbox/dark-v9',
        center={"lat": 32.3846, "lon": -83.2554},
        zoom=6,
    ),
    showlegend=False,  # Hide the legend
    annotations=[],  # Remove any annotations
)

# Display the map
fig.show()


In [None]:
out['pct_sf_for_sale_market'].mean()

In [None]:
out.sort_values(['total_population', 'pct_sf_for_sale_market'], ascending=False)

In [None]:
top = out.loc[(out['total_population'] > 10000) & (out['counts']>10)].sort_values('pct_sf_for_sale_market', ascending=False).head(50).reset_index()
for index, row in top.iterrows():
    rank = index + 1
    name = row['name']
    state = row['state_abbreviation']
    pct = row['pct_sf_for_sale_market']
    msg = f"{rank}. {name}, {state}: {pct:.02%}"
    print(msg)

In [None]:
out['median_income'].describe()

In [None]:
def income_buckets(inc):
    if inc <= 50000:
        return '< $50,000'
    if inc > 50000 and inc <= 60000:
        return '$50k to $60k'
    if inc > 60000 and inc <= 70000:
        return '$60k to $70k'
    else:
        return '> $70k'

out['inc_buckets'] = out['median_income'].apply(lambda x: income_buckets(x))

In [None]:
out.groupby('inc_buckets')['pct_sf_for_sale_market'].mean()

In [None]:
out.groupby('inc_buckets')['parcl_id'].nunique()