# Fort Myers, Tampa Bay Region, 

All Florida Metros: 
- What share of new listings are coming from investors?
- By market and for the State, compared to National Average
- 

In [2]:
import os
import plotly
import numpy as np
import pandas as pd
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go
from parcllabs import ParclLabsClient
from plotly.subplots import make_subplots
from parcllabs.beta.charting.utils import create_labs_logo_dict
from parcllabs.beta.charting.styling import default_style_config as style_config


In [3]:
client = ParclLabsClient(
    api_key=os.environ.get('PARCL_LABS_API_KEY', "<your Parcl Labs API key if not set as environment variable>"), 
    turbo_mode=True,
    limit=12 # set default limit
)

In [4]:
# in this case, lets look at US market overall
us_market = client.search.markets.retrieve(
    query='United States',
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=1
)

metros = client.search.markets.retrieve(
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=1000,
    location_type='CBSA'
)

markets = pd.concat([us_market, metros])
market_ids = markets['parcl_id'].unique().tolist()
markets['state'] = markets['name'].apply(lambda x: x.split(',')[-1].strip().upper().split('-')[0])
markets['clean_name'] = markets.apply(lambda x: f"{x['name'].split('-')[0].split(',')[0].strip()}, {x['state']}", axis=1)
markets['clean_name'] = markets['clean_name'].replace({'United States Of America, UNITED STATES OF AMERICA': 'USA'})

In [225]:
fl_metro_pids = [
    2899989,
2900417,
2900241,
2900041,
2900213,
2900229,
2899748,
2900198,
2899718,
2900268,
2900192,
2899729,
2899961,
2900423,
2900227,
2899822,
2900416,
2900231,
2900279,
2900354,
2900039,
2900128,
2899882,
2900355,
2899655,
2900204,
2900021,
2900173
]

In [226]:
# fl_pids = markets.loc[markets['state']=='FL']['parcl_id'].tolist()
pids = fl_metro_pids + [us_market['parcl_id'].values[0]]

In [243]:
prop_types = [
    'SINGLE_FAMILY',
    'ALL_PROPERTIES',
    'CONDO'
]

output = []

for ptype in prop_types:

    # look at price cut activity
    price_changes = client.for_sale_market_metrics.for_sale_inventory_price_changes.retrieve(
        parcl_ids=pids,
        start_date='2022-09-01',
        auto_paginate=True,
        property_type=ptype
    )

    inventory = client.for_sale_market_metrics.for_sale_inventory.retrieve(
        parcl_ids=pids,
        start_date='2022-09-01',
        auto_paginate=True,
        property_type=ptype
    )

    price_changes = pd.merge(price_changes, markets[['parcl_id', 'name']], on='parcl_id', how='left')
    price_changes = pd.merge(price_changes, inventory[['parcl_id', 'date', 'for_sale_inventory']], on=['parcl_id', 'date'], how='left')
    price_changes['pct_price_drops'] = price_changes['count_price_drop'] / price_changes['for_sale_inventory']
    price_changes = price_changes.sort_values(['parcl_id', 'name', 'date'])[['parcl_id', 'name', 'pct_price_drops', 'property_type', 'date']]
    output.append(price_changes)

data: {'parcl_id': ['2899989', '2900417', '2900241', '2900041', '2900213', '2900229', '2899748', '2900198', '2899718', '2900268', '2900192', '2899729', '2899961', '2900423', '2900227', '2899822', '2900416', '2900231', '2900279', '2900354', '2900039', '2900128', '2899882', '2900355', '2899655', '2900204', '2900021', '2900173', '5826765'], 'start_date': '2022-09-01', 'property_type': 'SINGLE_FAMILY'}, params: {}
data: {'parcl_id': ['2899989', '2900417', '2900241', '2900041', '2900213', '2900229', '2899748', '2900198', '2899718', '2900268', '2900192', '2899729', '2899961', '2900423', '2900227', '2899822', '2900416', '2900231', '2900279', '2900354', '2900039', '2900128', '2899882', '2900355', '2899655', '2900204', '2900021', '2900173', '5826765'], 'start_date': '2022-09-01', 'property_type': 'SINGLE_FAMILY'}, params: {}
data: {'parcl_id': ['2899989', '2900417', '2900241', '2900041', '2900213', '2900229', '2899748', '2900198', '2899718', '2900268', '2900192', '2899729', '2899961', '2900423'

In [244]:
df = pd.concat(output)
df.head()

Unnamed: 0,parcl_id,name,pct_price_drops,property_type,date
3189,2899655,"Clewiston, Fl",0.140625,SINGLE_FAMILY,2022-09-05
3188,2899655,"Clewiston, Fl",0.238095,SINGLE_FAMILY,2022-09-12
3187,2899655,"Clewiston, Fl",0.213333,SINGLE_FAMILY,2022-09-19
3186,2899655,"Clewiston, Fl",0.243902,SINGLE_FAMILY,2022-09-26
3185,2899655,"Clewiston, Fl",0.242857,SINGLE_FAMILY,2022-10-03


In [266]:
df.loc[df['date']=='2024-10-07'].sort_values('pct_price_drops', ascending=False).pivot(index='name', columns='property_type', values='pct_price_drops').sort_values('ALL_PROPERTIES', ascending=False).to_csv('fl_price_listings_reductions.csv')

In [259]:
mkts = {
    'Tampa-St. Petersburg-Clearwater, Fl': 'Tampa',
    'Jacksonville, Fl': 'Jacksonville',
    'Orlando-Kissimmee-Sanford, Fl': 'Orlando', 
    'Miami-Fort Lauderdale-Pompano Beach, Fl': 'Miami',
    'United States Of America': 'USA'
}

chart = df.loc[(df['name'].isin(mkts.keys())) & (df['property_type']=='ALL_PROPERTIES')].sort_values(['parcl_id', 'date'], ascending=False)
chart['clean_name'] = chart['name'].apply(lambda x: mkts[x])

In [265]:
chart[['parcl_id', 'name', 'property_type', 'date', 'pct_price_drops']].to_csv('fl_price_listings_trend_reductions.csv', index=False)

In [261]:
# Get max date for chart
data = chart.copy(deep=True)
max_date_for_chart = data['date'].max().date()
max_date_for_chart = max_date_for_chart.strftime('%B %d, %Y')

CHART_WIDTH = 1600
CHART_HEIGHT = 800
# Create the line chart using Plotly Express
fig = px.line(
    data,
    x='date',
    y='pct_price_drops',
    color='clean_name',
    line_group='clean_name',
    labels={'pct_price_drops': '% of Inventory with Price Cuts'},
    title=f'Percentage of Inventory with Price Reductions ({max_date_for_chart})'
)

# Update traces to apply specific styles
for trace in fig.data:
    if trace.name == 'USA':
        trace.update(
            line=dict(color='red', width=4),
            opacity=1
        )
    else:
        trace.update(
            line=dict(color='lightblue', dash='dash', width=2),
            opacity=0.8
        )
    # Remove text annotations from traces
    trace.update(
        mode='lines'
    )

# Find the latest date in the dataset
latest_date = max(price_changes['date'])

# Add annotations for each line on the far right
annotations = []
y_positions = []

for trace in fig.data:
    # Get the last y-value for each clean_name
    last_y_value = data[
        (data['clean_name'] == trace.name) &
        (data['date'] == latest_date)
    ]['pct_price_drops'].values[0]
    
    # Only add the annotation if it doesn't overlap with existing annotations
    if not any(abs(last_y_value - y) < 0.005 for y in y_positions):  # Adjust threshold as needed
        annotations.append(dict(
            x=latest_date,
            y=last_y_value,
            xref='x',
            yref='y',
            text=trace.name,
            showarrow=False,
            xanchor='left',
            font=dict(size=12)  # Adjust the font size if needed
        ))
        y_positions.append(last_y_value)

fig.add_layout_image(
        create_labs_logo_dict()
)

# Update layout for axes, title, and other styling
fig.update_layout(
    width=CHART_WIDTH,
    height=CHART_HEIGHT,
    xaxis=dict(
        title='',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        # tickangle=style_config['tick_angle'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    yaxis=dict(
        title='% Price Reductions',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        tickfont=style_config['axis_font'],
        zeroline=False,
        tickformat='.0%',
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    plot_bgcolor=style_config['background_color'],
    paper_bgcolor=style_config['background_color'],
    font=dict(color=style_config['font_color']),
    showlegend=False,  # Remove the legend
    margin=dict(l=40, r=40, t=80, b=40),
    title={
        'y': 0.98,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=24)
    },
    annotations=annotations  # Add annotations
)
# save_figure(fig, save_path=f'{ROOT_DIR}/pct_inventory_price_reductions_line_chart.png', 
#             width=CHART_WIDTH, height=CHART_HEIGHT)

fig.show()

### Share of all new listings coming from investors

In [197]:
# get share of all listings coming from investors across FL markets,
# compared to the US overall
property_types = [
    # 'SINGLE_FAMILY',
    # 'CONDO',
    # 'TOWNHOUSE'
    'ALL_PROPERTIES'
]

listings = []
investor_listings = []
for ptype in property_types:
    new_investor_listings = client.investor_metrics.new_listings_for_sale_rolling_counts.retrieve(
        property_type=ptype,
        parcl_ids=pids,
        start_date='2022-09-01',
        auto_paginate=True
    )

    investor_listings.append(new_investor_listings)

    new_listings = client.for_sale_market_metrics.new_listings_rolling_counts.retrieve(
        property_type=ptype,
        parcl_ids=pids,
        start_date='2022-09-01',
        auto_paginate=True
    )

    listings.append(new_listings)

data: {'parcl_id': ['2900128', '2900417', '2900213', '2899989', '2900192', '2899822', '2900041', '2899748', '2900229', '2900241', '2900268', '2900416', '2900173', '2900198', '2899718', '2899729', '5826765'], 'start_date': '2022-09-01', 'property_type': 'ALL_PROPERTIES'}, params: {}
data: {'parcl_id': ['2900128', '2900417', '2900213', '2899989', '2900192', '2899822', '2900041', '2899748', '2900229', '2900241', '2900268', '2900416', '2900173', '2900198', '2899718', '2899729', '5826765'], 'start_date': '2022-09-01', 'property_type': 'ALL_PROPERTIES'}, params: {}


In [217]:
# listings = pd.concat(listings)
tmp = pd.concat(investor_listings)
tmp = pd.merge(tmp, markets[['parcl_id', 'clean_name']], on='parcl_id', how='left')

In [218]:
tmp[['parcl_id', 'clean_name']].drop_duplicates()

Unnamed: 0,parcl_id,clean_name
0,5826765,USA
110,2900417,"Tampa, FL"
220,2900416,"Tallahassee, FL"
330,2900268,"Port St. Lucie, FL"
440,2900241,"Pensacola, FL"
550,2900229,"Palm Bay, FL"
660,2900213,"Orlando, FL"
770,2900198,"Ocala, FL"
880,2900192,"North Port, FL"
990,2900173,"Naples, FL"


In [219]:
new_investor_listings = tmp# .loc[tmp['property_type']=='CONDO']
new_investor_listings['pct_for_sale_market_rolling_7_day'] = new_investor_listings['pct_for_sale_market_rolling_7_day']/100
new_investor_listings['pct_for_sale_market_rolling_30_day'] = new_investor_listings['pct_for_sale_market_rolling_30_day']/100
new_investor_listings['pct_for_sale_market_rolling_60_day'] = new_investor_listings['pct_for_sale_market_rolling_60_day']/100

In [220]:
interest = [
    2900417, # Tampa
    2900192, # north port
    2900213, # Orlando
    2899748, # Deltona
    5826765, # USA
    2899989, # Jacksonville
]

In [221]:
new_investor_listings = new_investor_listings.loc[new_investor_listings['parcl_id'].isin(interest)]
new_investor_listings.head()

Unnamed: 0,parcl_id,date,count_rolling_7_day,count_rolling_30_day,count_rolling_60_day,count_rolling_90_day,pct_for_sale_market_rolling_7_day,pct_for_sale_market_rolling_30_day,pct_for_sale_market_rolling_60_day,pct_for_sale_market_rolling_90_day,property_type,clean_name
0,5826765,2024-10-07,17484.0,68559,141120,216009,0.1459,0.1446,0.1445,14.38,ALL_PROPERTIES,USA
1,5826765,2024-09-30,14928.0,67295,140923,212101,0.1446,0.1434,0.1443,14.33,ALL_PROPERTIES,USA
2,5826765,2024-09-23,17220.0,67907,142321,216041,0.1454,0.1436,0.1442,14.37,ALL_PROPERTIES,USA
3,5826765,2024-09-16,16873.0,67503,142400,216764,0.1433,0.1439,0.1438,14.35,ALL_PROPERTIES,USA
4,5826765,2024-09-09,15952.0,67433,143551,218598,0.1393,0.144,0.1435,14.37,ALL_PROPERTIES,USA


In [222]:
# Get max date for chart
max_date_for_chart = new_investor_listings['date'].max().date()
max_date_for_chart = max_date_for_chart.strftime('%B %d, %Y')
new_investor_listings = new_investor_listings.sort_values(['parcl_id', 'date'])

WINDOW_PERIOD = 60

CHART_WIDTH = 1600
CHART_HEIGHT = 800
# Create the line chart using Plotly Express
fig = px.line(
    new_investor_listings,
    x='date',
    y=f'pct_for_sale_market_rolling_{WINDOW_PERIOD}_day',
    color='clean_name',
    line_group='clean_name',
    labels={f'pct_for_sale_market_rolling_{WINDOW_PERIOD}_day': '% of new listings'},
    title=f"<span style='font-size:20px;'>% of New Listings coming from Investors</span><br><span style='font-size:12px; font-style: italic;'>{max_date_for_chart} - Rolling {WINDOW_PERIOD} Days</span>"
)

# Define the cities to be highlighted in red
highlighted_cities = ['USA'] # , 'Tampa, FL', 'Gainesville, FL', 'Jacksonville, FL']# ['North Port, FL', 'Tampa, FL', 'Lakeland, FL', 'Jacksonville, FL']

# Update traces to apply specific styles
for trace in fig.data:
    if trace.name in highlighted_cities:
        trace.update(
            line=dict(color='red', width=3, dash='dash'),
            opacity=1
        )
    else:
        trace.update(
            line=dict(color='lightblue', dash='dash', width=2),
            opacity=0.8
        )
    # Remove text annotations from traces
    trace.update(
        mode='lines'
    )

# Find the latest date in the dataset
latest_date = max(new_investor_listings['date'])

# Add annotations for each line on the far right
annotations = []
y_positions = []

for trace in fig.data:
    # Get the last y-value for each clean_name
    last_y_value = new_investor_listings[
        (new_investor_listings['clean_name'] == trace.name) &
        (new_investor_listings['date'] == latest_date)
    ][f'pct_for_sale_market_rolling_{WINDOW_PERIOD}_day'].values[0]
    
    # Only add the annotation if it doesn't overlap with existing annotations
    if not any(abs(last_y_value - y) < 0.003 for y in y_positions):  # Adjust threshold as needed
        annotations.append(dict(
            x=latest_date,
            y=last_y_value,
            xref='x',
            yref='y',
            text=trace.name,
            showarrow=False,
            xanchor='left',
            font=dict(size=12)  # Adjust the font size if needed
        ))
        y_positions.append(last_y_value)

fig.add_layout_image(
    create_labs_logo_dict()
)

# Update layout for axes, title, and other styling
fig.update_layout(
    width=CHART_WIDTH,
    height=CHART_HEIGHT,
    xaxis=dict(
        title='',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        # tickangle=style_config['tick_angle'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    yaxis=dict(
        title='% of Single Family Home Listings',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        tickfont=style_config['axis_font'],
        zeroline=False,
        tickformat='.0%',
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    plot_bgcolor=style_config['background_color'],
    paper_bgcolor=style_config['background_color'],
    font=dict(color=style_config['font_color']),
    showlegend=False,  # Remove the legend
    margin=dict(l=40, r=40, t=100, b=40),
    title={
        'y': 0.96,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
    annotations=annotations  # Add annotations
)

fig.show()


In [267]:
us_market.head()

Unnamed: 0,parcl_id,country,geoid,state_fips_code,name,state_abbreviation,region,location_type,total_population,median_income,parcl_exchange_market,pricefeed_market,case_shiller_10_market,case_shiller_20_market
0,5826765,USA,,,United States Of America,,,COUNTRY,331097593,75149,1,1,0,0


### Top institutional concentration of homes across the country

In [5]:
# save all files
output_dir = '/Users/jasonlewris/Library/Mobile Documents/com~apple~CloudDocs/Parcl Labs/carol_wsj'

In [6]:
# we need to break this by zip and msa

# first get top 100 metros by population
metros = client.search.markets.retrieve(
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=100,
    location_type='CBSA'
)

us_market = client.search.markets.retrieve(
    query='United States',
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=1
)

In [7]:
# then get all zipcodes for the country
zips = client.search.markets.retrieve(
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    auto_paginate=True,
    limit=1000,
    location_type='ZIP5'
)

In [8]:
markets = pd.concat([us_market, metros])
market_ids = markets['parcl_id'].unique().tolist()

START_DATE = '2024-09-01'

# get housing stock ownership
ownership = client.portfolio_metrics.sf_housing_stock_ownership.retrieve(
    parcl_ids=market_ids,
    auto_paginate=True,
    start_date=START_DATE,
    limit=1
)

data: {'parcl_id': ['5826765', '2900187', '2900078', '2899845', '2899734', '2899967', '2900475', '2900244', '2900128', '2887280', '2899625', '2900245', '2900336', '2900295', '2899753', '2900353', '2900137', '2900332', '2900417', '2899750', '2887292', '2900321', '2900213', '2899841', '2900331', '2900266', '2900315', '2900251', '2887289', '2900049', '2899647', '2900012', '2899671', '2899979', '2899654', '2900174', '2900338', '2900462', '2900275', '2899989', '2900134', '2900205', '2900282', '2900122', '2900292', '2900079', '2900182', '2900329', '2899944', '2899645', '2899611', '2899916', '2900301', '2900436', '2900438', '2900447', '2899715', '2899896', '2900208', '2899636', '2899929', '2899862', '2887291', '2899858', '2900030', '2900116', '2899589', '2899787', '2900181', '2899867', '2900192', '2900223', '2899666', '2899742', '2899840', '2900404', '2899924', '2899822', '2899621', '2899664', '2900070', '2900041', '2899752', '2899854', '2900271', '2900201', '2900391', '2900089', '2900503', '

In [9]:
zip_ownership = client.portfolio_metrics.sf_housing_stock_ownership.retrieve(
    parcl_ids=zips['parcl_id'].tolist(),
    auto_paginate=True,
    start_date=START_DATE,
    limit=1
)

data: {'parcl_id': ['5445224', '5487000', '5364758', '5486793', '5453067', '5507083', '5468931', '5358467', '5486942', '5269001', '5506699', '5453121', '5452957', '5422176', '5358103', '5483467', '5483169', '5281029', '5314866', '5273480', '5495447', '5483329', '5358086', '5453093', '5542503', '5483382', '5273570', '5273788', '5469191', '5486717', '5273596', '5268545', '5358277', '5358465', '5268629', '5273633', '5453015', '5483332', '5414139', '5268698', '5364749', '5358370', '5358371', '5486940', '5364881', '5469187', '5358458', '5358554', '5453076', '5273644', '5421882', '5453084', '5268849', '5314567', '5483566', '5421819', '5486999', '5486907', '5486924', '5358148', '5403258', '5268569', '5483379', '5303125', '5483170', '5507078', '5402837', '5303025', '5445836', '5422158', '5507086', '5281031', '5268826', '5268526', '5473805', '5483251', '5483207', '5542725', '5495226', '5486937', '5414243', '5281135', '5358534', '5421920', '5340440', '5453023', '5483175', '5402456', '5350860', '

KeyboardInterrupt: 

In [51]:
ownership.head()

Unnamed: 0,market_name,parcl_id,as_of_date,count_1000_plus_portfolio,total_homes_owned_in_us,pct_of_1000_plus_portfolio,cumulative_pct_of_1000_plus_portfolio
100,"Atlanta-Sandy Springs-Alpharetta, Ga",2887280,2024-08-01,63200,611388,0.103371,0.103371
83,"Dallas-Fort Worth-Arlington, Tx",2899734,2024-08-01,40076,611388,0.065549,0.168921
61,"Houston-The Woodlands-Sugar Land, Tx",2899967,2024-08-01,32588,611388,0.053302,0.222222
32,"Phoenix-Mesa-Chandler, Az",2900245,2024-08-01,32084,611388,0.052477,0.2747
73,"Charlotte-Concord-Gastonia, Nc-Sc",2899841,2024-08-01,26364,611388,0.043122,0.317821


In [60]:
def prepare_institutional_data(
        data: pd.DataFrame,
        market_df: pd.DataFrame,
        us_ownership: int = 0
) -> pd.DataFrame:
    data = data[['parcl_id', 'date', 'count_portfolio_1000_plus', 'pct_sf_housing_stock_portfolio_1000_plus']]
    data = pd.merge(data, market_df[['parcl_id', 'name']], on='parcl_id', how='left')
    data['pct_sf_housing_stock_portfolio_1000_plus'] = data['pct_sf_housing_stock_portfolio_1000_plus']/100
    # get us count
    data = data.loc[data['parcl_id'] != us_market['parcl_id'].values[0]]
    data['total_homes_owned'] = us_ownership
    data['pct_of_1000_plus_portfolio'] = data['count_portfolio_1000_plus'] / data['total_homes_owned']
    data = data.sort_values('pct_of_1000_plus_portfolio', ascending=False)
    # get cumluative sum on pct_of_1000_plus_portfolio
    data['cum_sum'] = data['pct_of_1000_plus_portfolio'].cumsum()
    data = data[['name', 'parcl_id', 'date', 'pct_sf_housing_stock_portfolio_1000_plus', 'count_portfolio_1000_plus', 'total_homes_owned', 'pct_of_1000_plus_portfolio', 'cum_sum']]
    data = data.rename(columns={
        'name': 'market_name',
        'date': 'as_of_date',
        'count_portfolio_1000_plus': 'count_1000_plus_portfolio',
        'total_homes_owned': 'total_homes_owned_in_us',
        'pct_of_1000_plus_portfolio': 'pct_of_1000_plus_portfolio',
        'cum_sum': 'cumulative_pct_of_1000_plus_portfolio'
    })
    return data

us_ownership = ownership.loc[ownership['parcl_id']==us_market['parcl_id'].values[0]]['count_portfolio_1000_plus'].values[0]
metros_output = prepare_institutional_data(ownership, markets, us_ownership)
zips_output = prepare_institutional_data(zip_ownership, zips, us_ownership)
zips_output = zips_output.merge(zips[['parcl_id', 'state_abbreviation']], on='parcl_id', how='inner')

In [103]:
col_order = [
    'market_name',
    'state_abbreviation',
    'parcl_id',
]

for col in zips_output.columns:
    if col not in col_order:
        col_order.append(col)

zips_output = zips_output[col_order]
zips_to_save = zips_output.loc[zips_output['count_1000_plus_portfolio']> 1000].reset_index(drop=True)
zips_to_save['count_1000_plus_portfolio'] = zips_to_save['count_1000_plus_portfolio'].astype(int)
zips_to_save.to_csv(f'{output_dir}/institutional_ownership_by_zip.csv', index=False)

In [85]:
metros_to_save = metros_output.head(20)
metros_to_save.to_csv(f'{output_dir}/institutional_ownership_by_metro.csv', index=False)

In [101]:
zips_to_save.loc[zips_to_save['pct_sf_housing_stock_portfolio_1000_plus']>0.1]

Unnamed: 0,market_name,state_abbreviation,parcl_id,as_of_date,pct_sf_housing_stock_portfolio_1000_plus,count_1000_plus_portfolio,total_homes_owned_in_us,pct_of_1000_plus_portfolio,cumulative_pct_of_1000_plus_portfolio
1,30253,GA,5314470,2024-08-01,0.1144,2099.0,611388,0.003433,0.007128
9,48205,MI,5566080,2024-08-01,0.1136,1588.0,611388,0.002597,0.030238
11,28214,NC,5348947,2024-08-01,0.1014,1574.0,611388,0.002574,0.035408
22,48505,MI,5565664,2024-08-01,0.1488,1306.0,611388,0.002136,0.061318
27,37086,TN,5473863,2024-08-01,0.1229,1291.0,611388,0.002112,0.071892
35,38016,TN,5495454,2024-08-01,0.1158,1224.0,611388,0.002002,0.088288
39,30248,GA,5314864,2024-08-01,0.1058,1144.0,611388,0.001871,0.096031


### Top institutional activity in key markets

In [10]:
ports = [
    'AMH',
    'TRICON',
    'INVITATION_HOMES',
    'HOME_PARTNERS_OF_AMERICA',
    'PROGRESS_RESIDENTIAL',
    'FIRSTKEY_HOMES',
    'AMHERST'
]

us_market = 5826765

top_institional_markets = [
    2900245, # Phoenix, AZ
    2899967, # Houston, TX
    2899734, # Dallas, TX
    2900417, # Tampa, FL
    2887280, # Atlanta, GA
    2899841, # Charlotte, NC
]

homes = []
for port in ports:

    home = client.property.search.retrieve(
        current_entity_owner_name=port,
        parcl_ids=[us_market],
        property_type='SINGLE_FAMILY'
    )
    homes.append(home)

Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 26.8s (0.04/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 17.5s (0.06/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 38.4s (0.03/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 11.0s (0.09/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 38.6s (0.03/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 22.2s (0.04/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 18.7s (0.05/s) 


In [11]:
homes = pd.concat(homes)
homes.shape

(354426, 27)

In [12]:
homes.groupby('current_entity_owner_name')['parcl_property_id'].nunique()

current_entity_owner_name
AMH                         51851
AMHERST                     40793
FIRSTKEY_HOMES              46689
HOME_PARTNERS_OF_AMERICA    23216
INVITATION_HOMES            77673
PROGRESS_RESIDENTIAL        78920
TRICON                      35284
Name: parcl_property_id, dtype: int64

In [72]:
# pull down property history

history = client.property.events.retrieve(
    parcl_property_ids=homes['parcl_property_id'].tolist(),
    start_date='2024-08-01',
    event_type='LISTING'
)

Processing Parcl Property IDs |████████████████████████████████████████| 354426/354426 [100%] in 5.9s (60246.35/s) 


In [73]:
# total listings out of this cohort
# 1% of the top 7 institutional portfolios in the US
history['parcl_property_id'].nunique()/homes['parcl_property_id'].nunique()

0.011299961063804573

In [74]:
# history = history.drop(['current_entity_owner_name_x', 'current_entity_owner_name_y'], axis=1)
history = pd.merge(history, homes[['parcl_property_id', 'current_entity_owner_name', 'cbsa_parcl_id', 'cbsa']], on='parcl_property_id', how='left')

In [75]:
def calc_first_last_price(data):
    # calculate diff between first and last price for each parcl_property_id
    data = data.sort_values(['parcl_property_id', 'event_date'])
    data['price_diff'] = data.sort_values(['parcl_property_id', 'event_date']).groupby('parcl_property_id')['price'].diff()
    # create a column with the first price for each parcl_property_id
    data['first_price'] = data.groupby('parcl_property_id')['price'].transform('first')
    data['last_price'] = data.groupby('parcl_property_id')['price'].transform('last')
    data['first_date'] = data.groupby('parcl_property_id')['event_date'].transform('first')
    data['last_date'] = data.groupby('parcl_property_id')['event_date'].transform('last')
    data['last_event_name'] = data.groupby('parcl_property_id')['event_name'].transform('last')
    # strip out listing removed parcl_ids
    to_remove_parcl_ids = data.loc[data['last_event_name']=='LISTING_REMOVED']['parcl_property_id'].unique()
    data = data.loc[~data['parcl_property_id'].isin(to_remove_parcl_ids)]
    return data

In [151]:
from typing import List


def prepare_output_data(
        input_data: pd.DataFrame,
        scope='national',
        groupers=List[str],
        cols=List[str]
):
    if scope == 'national':
        data = input_data.copy(deep=True)
        list_changes = data[cols].drop_duplicates()
    if scope == 'cbsa':
        data = input_data.loc[input_data['cbsa_parcl_id'].notnull()].copy(deep=True)
        data['cbsa_parcl_id'] = data['cbsa_parcl_id'].astype(int)
        list_changes = data[cols].drop_duplicates()
    # calculate price declines
    list_changes['pct_price_change'] = (list_changes['last_price'] - list_changes['first_price']) / list_changes['first_price']
    # filter out listings where the absolute price change is greater than 20%
    list_changes = list_changes.loc[abs(list_changes['pct_price_change']) <= 0.2]
    list_changes['price_decline_idx'] = list_changes['pct_price_change'].apply(lambda x: 1 if x != 0 else 0)
    # calculate the percent of all listings by current_entity_owner_name that have seen a price decline. Also calculate the average price decline

    national_agg = list_changes.groupby(groupers).agg(
        num_listings=('parcl_property_id', 'count'),
        num_listings_with_price_decline=('price_decline_idx', 'sum'),
        avg_price_change=('pct_price_change', 'mean')
    )

    national_agg['pct_listings_with_price_decline'] = national_agg['num_listings_with_price_decline'] / national_agg['num_listings']
    national_agg = national_agg.reset_index()
    # break it by operator
    current_list = data.groupby(groupers)['parcl_property_id'].nunique().reset_index()
    current_list = current_list.rename(columns={'parcl_property_id': 'num_active_listings'})
    current_own = homes.groupby(groupers)['parcl_property_id'].nunique().reset_index()
    current_own = current_own.rename(columns={'parcl_property_id': 'num_homes_owned'})
    share_listed = pd.merge(current_own, current_list, on=groupers)
    share_listed['pct_listed'] = share_listed['num_active_listings'] / share_listed['num_homes_owned']
    # merge with national_agg
    share_listed = pd.merge(share_listed, national_agg, on=groupers)
    return share_listed

In [152]:
history = calc_first_last_price(history)
national = prepare_output_data(
    input_data=history,
    scope='national',
    groupers=['current_entity_owner_name'],
    cols=['parcl_property_id', 'current_entity_owner_name', 'first_price', 'last_price', 'first_date', 'last_date']
)
market = prepare_output_data(
    input_data=history,
    scope='cbsa',
    groupers=['cbsa_parcl_id','current_entity_owner_name'],
    cols=['parcl_property_id', 'cbsa_parcl_id', 'current_entity_owner_name', 'first_price', 'last_price', 'first_date', 'last_date']
)

In [153]:

national = national.drop('num_active_listings', axis=1)
national = national[[
    'current_entity_owner_name',
    'num_homes_owned',
    'num_listings',
    'num_listings_with_price_decline',
    'pct_listed',
    'avg_price_change',
    'pct_listings_with_price_decline'
]]

national = national.rename(columns={
    'current_entity_owner_name': 'institutional_investor',
    'pct_listed': 'pct_portfolio_listed'
})

national.to_csv(f'{output_dir}/national_institutional_investor_metrics_oct23.csv', index=False)

In [154]:
market

Unnamed: 0,cbsa_parcl_id,current_entity_owner_name,num_homes_owned,num_active_listings,pct_listed,num_listings,num_listings_with_price_decline,avg_price_change,pct_listings_with_price_decline
0,2887280.0,AMH,5037,47,0.009331,47,17,-0.016209,0.361702
1,2887280.0,AMHERST,6672,74,0.011091,74,39,-0.025673,0.527027
2,2887280.0,FIRSTKEY_HOMES,5993,20,0.003337,20,10,-0.012793,0.500000
3,2887280.0,HOME_PARTNERS_OF_AMERICA,3489,95,0.027228,95,53,-0.015095,0.557895
4,2887280.0,INVITATION_HOMES,11054,34,0.003076,33,13,-0.017431,0.393939
...,...,...,...,...,...,...,...,...,...
618,2900496.0,AMH,69,1,0.014493,1,0,0.000000,0.000000
619,2900499.0,AMH,1,1,1.000000,1,1,-0.026696,1.000000
620,2900499.0,INVITATION_HOMES,1,1,1.000000,1,1,-0.080032,1.000000
621,2900503.0,AMH,792,1,0.001263,1,0,0.000000,0.000000


In [155]:
mkt_breakout = market.groupby('cbsa_parcl_id').agg(
    num_listings=('num_listings', 'sum'),
    num_listings_with_price_decline=('num_listings_with_price_decline', 'sum'),
    avg_price_change=('avg_price_change', 'mean'),
    num_homes_owned=('num_homes_owned', 'sum')
).reset_index()
mkt_breakout = pd.merge(mkt_breakout, markets[['parcl_id', 'name']], left_on='cbsa_parcl_id', right_on='parcl_id', how='left')
top10_markets = [int(pid) for pid in mkt_breakout.sort_values('num_homes_owned', ascending=False).head(10)['cbsa_parcl_id'].unique().tolist()]
market_output = market.loc[market['cbsa_parcl_id'].isin(top10_markets)]
market_output = market_output.merge(homes[['cbsa_parcl_id', 'cbsa']].drop_duplicates(), on='cbsa_parcl_id')

In [156]:
# market_output = market_output.drop('num_active_listings', axis=1)
market_output = market_output[[
    'cbsa_parcl_id',
    'cbsa',
    'current_entity_owner_name',
    'num_homes_owned',
    'num_listings',
    'num_listings_with_price_decline',
    'pct_listed',
    'avg_price_change',
    'pct_listings_with_price_decline'
]]

market_output = market_output.rename(columns={
    'current_entity_owner_name': 'institutional_investor',
    'pct_listed': 'pct_portfolio_listed'
})

market_output['cbsa_parcl_id'] = market_output['cbsa_parcl_id'].astype(int)
market_output.to_csv(f'{output_dir}/top10cbsa_institutional_investor_metrics_oct23.csv', index=False)

market_pivot_pct_listed = market_output.pivot(index=['cbsa'], columns='institutional_investor', values='pct_portfolio_listed')
market_pivot_pct_listed.to_csv(f'{output_dir}/top10cbsa_institutional_investor_pct_portfolio_listed_pivot_oct23.csv')

market_pivot_pct_change = market_output.pivot(index=['cbsa'], columns='institutional_investor', values='avg_price_change')
market_pivot_pct_change.to_csv(f'{output_dir}/top10cbsa_institutional_investor_avg_price_change_pivot_oct23.csv')

market_homes_owned_pivot = market_output.pivot(index=['cbsa'], columns='institutional_investor', values='num_homes_owned')
market_homes_owned_pivot.to_csv(f'{output_dir}/top10cbsa_institutional_investor_homes_owned_pivot_oct23.csv')

In [44]:
# now comp to all listings on the market
# top10_markets

mkt_homes = []

for mkt in top10_markets:
    print(mkt)
    all_listings = client.property.search.retrieve(
            parcl_ids=[mkt],
            property_type='SINGLE_FAMILY',
            event_history_listing_flag=True
    )
    mkt_homes.append(all_listings)

2887280
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 30.8s (0.03/s) 
2900245
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 40.5s (0.02/s) 
2899841
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 29.7s (0.03/s) 
2899734
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 47.3s (0.02/s) 
2900417
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 43.7s (0.02/s) 
2900213
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 6.2s (0.16/s) 
2899967
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 43.7s (0.02/s) 
2899989
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 5.2s (0.19/s) 
2899979
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 5.6s (0.18/s) 
2900174
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in

In [45]:
all_listings = pd.concat(mkt_homes)

# capture all properties
history_mkts = client.property.events.retrieve(
    parcl_property_ids=all_listings['parcl_property_id'].tolist(),
    start_date='2024-08-01',
    event_type='LISTING'
)

Processing Parcl Property IDs |████████████████████████████████████████| 1124014/1124014 [100%] in 1:34.8 (11858.13/s) 


In [95]:
# history = history.drop(['current_entity_owner_name_x', 'current_entity_owner_name_y'], axis=1)
history_mkts = pd.merge(history_mkts, all_listings[['parcl_property_id', 'current_entity_owner_name', 'cbsa_parcl_id', 'cbsa']], on='parcl_property_id', how='left')

history_mkts = calc_first_last_price(history_mkts)

KeyError: 'current_entity_owner_name'

In [157]:
all_markets = prepare_output_data(
    input_data=history_mkts,
    scope='cbsa',
    groupers=['cbsa_parcl_id'],
    cols=['parcl_property_id', 'cbsa_parcl_id', 'first_price', 'last_price', 'first_date', 'last_date']
)

In [158]:
all_markets

Unnamed: 0,cbsa_parcl_id,num_homes_owned,num_active_listings,pct_listed,num_listings,num_listings_with_price_decline,avg_price_change,pct_listings_with_price_decline
0,2887280.0,50772,22505,0.443256,22355,8262,-0.014679,0.369582
1,2899734.0,23050,32854,1.425336,32651,14126,-0.015443,0.432636
2,2899841.0,23126,9159,0.396048,9116,3260,-0.013576,0.357613
3,2899967.0,14820,26963,1.819366,26768,10732,-0.014096,0.400926
4,2899979.0,11328,8430,0.744174,8374,2984,-0.014879,0.356341
5,2899989.0,14401,8689,0.603361,8622,3439,-0.014179,0.398863
6,2900174.0,11155,10831,0.970955,10776,3305,-0.010526,0.3067
7,2900213.0,15463,10207,0.660092,10149,3881,-0.012209,0.382402
8,2900245.0,27285,20502,0.751402,20418,7533,-0.009477,0.368939
9,2900417.0,18210,14156,0.777375,14044,5451,-0.014316,0.388137


In [159]:
all_markets['cbsa_parcl_id'] = all_markets['cbsa_parcl_id'].astype(int)
all_markets = all_markets.merge(all_listings[['cbsa_parcl_id', 'cbsa']].drop_duplicates(), on='cbsa_parcl_id', how='inner')
all_markets.head()

Unnamed: 0,cbsa_parcl_id,num_homes_owned,num_active_listings,pct_listed,num_listings,num_listings_with_price_decline,avg_price_change,pct_listings_with_price_decline,cbsa
0,2887280,50772,22505,0.443256,22355,8262,-0.014679,0.369582,"Atlanta-Sandy Springs-Alpharetta, GA"
1,2899734,23050,32854,1.425336,32651,14126,-0.015443,0.432636,"Dallas-Fort Worth-Arlington, TX"
2,2899841,23126,9159,0.396048,9116,3260,-0.013576,0.357613,"Charlotte-Concord-Gastonia, NC-SC"
3,2899967,14820,26963,1.819366,26768,10732,-0.014096,0.400926,"Houston-The Woodlands-Sugar Land, TX"
4,2899979,11328,8430,0.744174,8374,2984,-0.014879,0.356341,"Indianapolis-Carmel-Anderson, IN"


In [160]:
goat = market_pivot_pct_change.reset_index()
goat.columns.tolist()

['cbsa',
 'AMH',
 'AMHERST',
 'FIRSTKEY_HOMES',
 'HOME_PARTNERS_OF_AMERICA',
 'INVITATION_HOMES',
 'PROGRESS_RESIDENTIAL',
 'TRICON']

In [161]:
final_output = pd.merge(goat, all_markets[['cbsa', 'avg_price_change']], on='cbsa', how='inner')
final_output = final_output.rename(columns={'avg_price_change': 'MKT_PRICE_CHANGE'})
final_output.to_csv(f'{output_dir}/top10cbsa_institutional_investor_avg_price_change_vs_mkt_oct23.csv', index=False)

In [162]:
final_output

Unnamed: 0,cbsa,AMH,AMHERST,FIRSTKEY_HOMES,HOME_PARTNERS_OF_AMERICA,INVITATION_HOMES,PROGRESS_RESIDENTIAL,TRICON,MKT_PRICE_CHANGE
0,"Atlanta-Sandy Springs-Alpharetta, GA",-0.016209,-0.025673,-0.012793,-0.015095,-0.017431,-0.031724,-0.035245,-0.014679
1,"Charlotte-Concord-Gastonia, NC-SC",-0.00869,-0.025399,-0.022436,-0.014526,0.0,-0.014174,-0.018984,-0.013576
2,"Dallas-Fort Worth-Arlington, TX",-0.025936,-0.025619,-0.018881,-0.016661,-0.025503,-0.026731,-0.015197,-0.015443
3,"Houston-The Woodlands-Sugar Land, TX",-0.016826,-0.017772,-0.036551,-0.014267,-0.014739,-0.033974,-0.019869,-0.014096
4,"Indianapolis-Carmel-Anderson, IN",-0.011364,-0.02109,-0.003444,-0.008495,-0.078829,-0.044566,-0.007145,-0.014879
5,"Jacksonville, FL",-0.010136,-0.020914,-0.023178,-0.005969,0.0,-0.045846,0.0,-0.014179
6,"Nashville-Davidson--Murfreesboro--Franklin, TN",-0.0262,-0.019106,,-0.005001,-0.037839,-0.020453,-0.046364,-0.010526
7,"Orlando-Kissimmee-Sanford, FL",-0.013085,-0.018951,-0.008289,-0.012077,-0.016693,-0.028382,0.0,-0.012209
8,"Phoenix-Mesa-Chandler, AZ",-0.019129,-0.022119,-0.010442,-0.013096,-0.013011,-0.026265,0.0,-0.009477
9,"Tampa-St. Petersburg-Clearwater, FL",-0.007056,-0.023783,-0.016123,-0.0175,-0.016424,-0.040117,-0.001235,-0.014316
