# Fort Myers, Tampa Bay Region, 

All Florida Metros: 
- What share of new listings are coming from investors?
- By market and for the State, compared to National Average
- 

In [1]:
import os
import plotly
import numpy as np
import pandas as pd
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go
from parcllabs import ParclLabsClient
from plotly.subplots import make_subplots
from parcllabs.beta.charting.utils import create_labs_logo_dict
from parcllabs.beta.charting.styling import default_style_config as style_config


In [2]:
client = ParclLabsClient(
    api_key=os.environ.get('PARCL_LABS_API_KEY', "<your Parcl Labs API key if not set as environment variable>"), 
    turbo_mode=True,
    limit=12 # set default limit
)

In [224]:
# in this case, lets look at US market overall
us_market = client.search.markets.retrieve(
    query='United States',
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=1
)

metros = client.search.markets.retrieve(
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=1000,
    location_type='CBSA'
)

markets = pd.concat([us_market, metros])
market_ids = markets['parcl_id'].unique().tolist()
markets['state'] = markets['name'].apply(lambda x: x.split(',')[-1].strip().upper().split('-')[0])
markets['clean_name'] = markets.apply(lambda x: f"{x['name'].split('-')[0].split(',')[0].strip()}, {x['state']}", axis=1)
markets['clean_name'] = markets['clean_name'].replace({'United States Of America, UNITED STATES OF AMERICA': 'USA'})

In [225]:
fl_metro_pids = [
    2899989,
2900417,
2900241,
2900041,
2900213,
2900229,
2899748,
2900198,
2899718,
2900268,
2900192,
2899729,
2899961,
2900423,
2900227,
2899822,
2900416,
2900231,
2900279,
2900354,
2900039,
2900128,
2899882,
2900355,
2899655,
2900204,
2900021,
2900173
]

In [226]:
# fl_pids = markets.loc[markets['state']=='FL']['parcl_id'].tolist()
pids = fl_metro_pids + [us_market['parcl_id'].values[0]]

In [243]:
prop_types = [
    'SINGLE_FAMILY',
    'ALL_PROPERTIES',
    'CONDO'
]

output = []

for ptype in prop_types:

    # look at price cut activity
    price_changes = client.for_sale_market_metrics.for_sale_inventory_price_changes.retrieve(
        parcl_ids=pids,
        start_date='2022-09-01',
        auto_paginate=True,
        property_type=ptype
    )

    inventory = client.for_sale_market_metrics.for_sale_inventory.retrieve(
        parcl_ids=pids,
        start_date='2022-09-01',
        auto_paginate=True,
        property_type=ptype
    )

    price_changes = pd.merge(price_changes, markets[['parcl_id', 'name']], on='parcl_id', how='left')
    price_changes = pd.merge(price_changes, inventory[['parcl_id', 'date', 'for_sale_inventory']], on=['parcl_id', 'date'], how='left')
    price_changes['pct_price_drops'] = price_changes['count_price_drop'] / price_changes['for_sale_inventory']
    price_changes = price_changes.sort_values(['parcl_id', 'name', 'date'])[['parcl_id', 'name', 'pct_price_drops', 'property_type', 'date']]
    output.append(price_changes)

data: {'parcl_id': ['2899989', '2900417', '2900241', '2900041', '2900213', '2900229', '2899748', '2900198', '2899718', '2900268', '2900192', '2899729', '2899961', '2900423', '2900227', '2899822', '2900416', '2900231', '2900279', '2900354', '2900039', '2900128', '2899882', '2900355', '2899655', '2900204', '2900021', '2900173', '5826765'], 'start_date': '2022-09-01', 'property_type': 'SINGLE_FAMILY'}, params: {}
data: {'parcl_id': ['2899989', '2900417', '2900241', '2900041', '2900213', '2900229', '2899748', '2900198', '2899718', '2900268', '2900192', '2899729', '2899961', '2900423', '2900227', '2899822', '2900416', '2900231', '2900279', '2900354', '2900039', '2900128', '2899882', '2900355', '2899655', '2900204', '2900021', '2900173', '5826765'], 'start_date': '2022-09-01', 'property_type': 'SINGLE_FAMILY'}, params: {}
data: {'parcl_id': ['2899989', '2900417', '2900241', '2900041', '2900213', '2900229', '2899748', '2900198', '2899718', '2900268', '2900192', '2899729', '2899961', '2900423'

In [244]:
df = pd.concat(output)
df.head()

Unnamed: 0,parcl_id,name,pct_price_drops,property_type,date
3189,2899655,"Clewiston, Fl",0.140625,SINGLE_FAMILY,2022-09-05
3188,2899655,"Clewiston, Fl",0.238095,SINGLE_FAMILY,2022-09-12
3187,2899655,"Clewiston, Fl",0.213333,SINGLE_FAMILY,2022-09-19
3186,2899655,"Clewiston, Fl",0.243902,SINGLE_FAMILY,2022-09-26
3185,2899655,"Clewiston, Fl",0.242857,SINGLE_FAMILY,2022-10-03


In [266]:
df.loc[df['date']=='2024-10-07'].sort_values('pct_price_drops', ascending=False).pivot(index='name', columns='property_type', values='pct_price_drops').sort_values('ALL_PROPERTIES', ascending=False).to_csv('fl_price_listings_reductions.csv')

In [259]:
mkts = {
    'Tampa-St. Petersburg-Clearwater, Fl': 'Tampa',
    'Jacksonville, Fl': 'Jacksonville',
    'Orlando-Kissimmee-Sanford, Fl': 'Orlando', 
    'Miami-Fort Lauderdale-Pompano Beach, Fl': 'Miami',
    'United States Of America': 'USA'
}

chart = df.loc[(df['name'].isin(mkts.keys())) & (df['property_type']=='ALL_PROPERTIES')].sort_values(['parcl_id', 'date'], ascending=False)
chart['clean_name'] = chart['name'].apply(lambda x: mkts[x])

In [265]:
chart[['parcl_id', 'name', 'property_type', 'date', 'pct_price_drops']].to_csv('fl_price_listings_trend_reductions.csv', index=False)

In [261]:
# Get max date for chart
data = chart.copy(deep=True)
max_date_for_chart = data['date'].max().date()
max_date_for_chart = max_date_for_chart.strftime('%B %d, %Y')

CHART_WIDTH = 1600
CHART_HEIGHT = 800
# Create the line chart using Plotly Express
fig = px.line(
    data,
    x='date',
    y='pct_price_drops',
    color='clean_name',
    line_group='clean_name',
    labels={'pct_price_drops': '% of Inventory with Price Cuts'},
    title=f'Percentage of Inventory with Price Reductions ({max_date_for_chart})'
)

# Update traces to apply specific styles
for trace in fig.data:
    if trace.name == 'USA':
        trace.update(
            line=dict(color='red', width=4),
            opacity=1
        )
    else:
        trace.update(
            line=dict(color='lightblue', dash='dash', width=2),
            opacity=0.8
        )
    # Remove text annotations from traces
    trace.update(
        mode='lines'
    )

# Find the latest date in the dataset
latest_date = max(price_changes['date'])

# Add annotations for each line on the far right
annotations = []
y_positions = []

for trace in fig.data:
    # Get the last y-value for each clean_name
    last_y_value = data[
        (data['clean_name'] == trace.name) &
        (data['date'] == latest_date)
    ]['pct_price_drops'].values[0]
    
    # Only add the annotation if it doesn't overlap with existing annotations
    if not any(abs(last_y_value - y) < 0.005 for y in y_positions):  # Adjust threshold as needed
        annotations.append(dict(
            x=latest_date,
            y=last_y_value,
            xref='x',
            yref='y',
            text=trace.name,
            showarrow=False,
            xanchor='left',
            font=dict(size=12)  # Adjust the font size if needed
        ))
        y_positions.append(last_y_value)

fig.add_layout_image(
        create_labs_logo_dict()
)

# Update layout for axes, title, and other styling
fig.update_layout(
    width=CHART_WIDTH,
    height=CHART_HEIGHT,
    xaxis=dict(
        title='',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        # tickangle=style_config['tick_angle'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    yaxis=dict(
        title='% Price Reductions',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        tickfont=style_config['axis_font'],
        zeroline=False,
        tickformat='.0%',
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    plot_bgcolor=style_config['background_color'],
    paper_bgcolor=style_config['background_color'],
    font=dict(color=style_config['font_color']),
    showlegend=False,  # Remove the legend
    margin=dict(l=40, r=40, t=80, b=40),
    title={
        'y': 0.98,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=24)
    },
    annotations=annotations  # Add annotations
)
# save_figure(fig, save_path=f'{ROOT_DIR}/pct_inventory_price_reductions_line_chart.png', 
#             width=CHART_WIDTH, height=CHART_HEIGHT)

fig.show()

### Share of all new listings coming from investors

In [197]:
# get share of all listings coming from investors across FL markets,
# compared to the US overall
property_types = [
    # 'SINGLE_FAMILY',
    # 'CONDO',
    # 'TOWNHOUSE'
    'ALL_PROPERTIES'
]

listings = []
investor_listings = []
for ptype in property_types:
    new_investor_listings = client.investor_metrics.new_listings_for_sale_rolling_counts.retrieve(
        property_type=ptype,
        parcl_ids=pids,
        start_date='2022-09-01',
        auto_paginate=True
    )

    investor_listings.append(new_investor_listings)

    new_listings = client.for_sale_market_metrics.new_listings_rolling_counts.retrieve(
        property_type=ptype,
        parcl_ids=pids,
        start_date='2022-09-01',
        auto_paginate=True
    )

    listings.append(new_listings)

data: {'parcl_id': ['2900128', '2900417', '2900213', '2899989', '2900192', '2899822', '2900041', '2899748', '2900229', '2900241', '2900268', '2900416', '2900173', '2900198', '2899718', '2899729', '5826765'], 'start_date': '2022-09-01', 'property_type': 'ALL_PROPERTIES'}, params: {}
data: {'parcl_id': ['2900128', '2900417', '2900213', '2899989', '2900192', '2899822', '2900041', '2899748', '2900229', '2900241', '2900268', '2900416', '2900173', '2900198', '2899718', '2899729', '5826765'], 'start_date': '2022-09-01', 'property_type': 'ALL_PROPERTIES'}, params: {}


In [217]:
# listings = pd.concat(listings)
tmp = pd.concat(investor_listings)
tmp = pd.merge(tmp, markets[['parcl_id', 'clean_name']], on='parcl_id', how='left')

In [218]:
tmp[['parcl_id', 'clean_name']].drop_duplicates()

Unnamed: 0,parcl_id,clean_name
0,5826765,USA
110,2900417,"Tampa, FL"
220,2900416,"Tallahassee, FL"
330,2900268,"Port St. Lucie, FL"
440,2900241,"Pensacola, FL"
550,2900229,"Palm Bay, FL"
660,2900213,"Orlando, FL"
770,2900198,"Ocala, FL"
880,2900192,"North Port, FL"
990,2900173,"Naples, FL"


In [219]:
new_investor_listings = tmp# .loc[tmp['property_type']=='CONDO']
new_investor_listings['pct_for_sale_market_rolling_7_day'] = new_investor_listings['pct_for_sale_market_rolling_7_day']/100
new_investor_listings['pct_for_sale_market_rolling_30_day'] = new_investor_listings['pct_for_sale_market_rolling_30_day']/100
new_investor_listings['pct_for_sale_market_rolling_60_day'] = new_investor_listings['pct_for_sale_market_rolling_60_day']/100

In [220]:
interest = [
    2900417, # Tampa
    2900192, # north port
    2900213, # Orlando
    2899748, # Deltona
    5826765, # USA
    2899989, # Jacksonville
]

In [221]:
new_investor_listings = new_investor_listings.loc[new_investor_listings['parcl_id'].isin(interest)]
new_investor_listings.head()

Unnamed: 0,parcl_id,date,count_rolling_7_day,count_rolling_30_day,count_rolling_60_day,count_rolling_90_day,pct_for_sale_market_rolling_7_day,pct_for_sale_market_rolling_30_day,pct_for_sale_market_rolling_60_day,pct_for_sale_market_rolling_90_day,property_type,clean_name
0,5826765,2024-10-07,17484.0,68559,141120,216009,0.1459,0.1446,0.1445,14.38,ALL_PROPERTIES,USA
1,5826765,2024-09-30,14928.0,67295,140923,212101,0.1446,0.1434,0.1443,14.33,ALL_PROPERTIES,USA
2,5826765,2024-09-23,17220.0,67907,142321,216041,0.1454,0.1436,0.1442,14.37,ALL_PROPERTIES,USA
3,5826765,2024-09-16,16873.0,67503,142400,216764,0.1433,0.1439,0.1438,14.35,ALL_PROPERTIES,USA
4,5826765,2024-09-09,15952.0,67433,143551,218598,0.1393,0.144,0.1435,14.37,ALL_PROPERTIES,USA


In [222]:
# Get max date for chart
max_date_for_chart = new_investor_listings['date'].max().date()
max_date_for_chart = max_date_for_chart.strftime('%B %d, %Y')
new_investor_listings = new_investor_listings.sort_values(['parcl_id', 'date'])

WINDOW_PERIOD = 60

CHART_WIDTH = 1600
CHART_HEIGHT = 800
# Create the line chart using Plotly Express
fig = px.line(
    new_investor_listings,
    x='date',
    y=f'pct_for_sale_market_rolling_{WINDOW_PERIOD}_day',
    color='clean_name',
    line_group='clean_name',
    labels={f'pct_for_sale_market_rolling_{WINDOW_PERIOD}_day': '% of new listings'},
    title=f"<span style='font-size:20px;'>% of New Listings coming from Investors</span><br><span style='font-size:12px; font-style: italic;'>{max_date_for_chart} - Rolling {WINDOW_PERIOD} Days</span>"
)

# Define the cities to be highlighted in red
highlighted_cities = ['USA'] # , 'Tampa, FL', 'Gainesville, FL', 'Jacksonville, FL']# ['North Port, FL', 'Tampa, FL', 'Lakeland, FL', 'Jacksonville, FL']

# Update traces to apply specific styles
for trace in fig.data:
    if trace.name in highlighted_cities:
        trace.update(
            line=dict(color='red', width=3, dash='dash'),
            opacity=1
        )
    else:
        trace.update(
            line=dict(color='lightblue', dash='dash', width=2),
            opacity=0.8
        )
    # Remove text annotations from traces
    trace.update(
        mode='lines'
    )

# Find the latest date in the dataset
latest_date = max(new_investor_listings['date'])

# Add annotations for each line on the far right
annotations = []
y_positions = []

for trace in fig.data:
    # Get the last y-value for each clean_name
    last_y_value = new_investor_listings[
        (new_investor_listings['clean_name'] == trace.name) &
        (new_investor_listings['date'] == latest_date)
    ][f'pct_for_sale_market_rolling_{WINDOW_PERIOD}_day'].values[0]
    
    # Only add the annotation if it doesn't overlap with existing annotations
    if not any(abs(last_y_value - y) < 0.003 for y in y_positions):  # Adjust threshold as needed
        annotations.append(dict(
            x=latest_date,
            y=last_y_value,
            xref='x',
            yref='y',
            text=trace.name,
            showarrow=False,
            xanchor='left',
            font=dict(size=12)  # Adjust the font size if needed
        ))
        y_positions.append(last_y_value)

fig.add_layout_image(
    create_labs_logo_dict()
)

# Update layout for axes, title, and other styling
fig.update_layout(
    width=CHART_WIDTH,
    height=CHART_HEIGHT,
    xaxis=dict(
        title='',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        # tickangle=style_config['tick_angle'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    yaxis=dict(
        title='% of Single Family Home Listings',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        tickfont=style_config['axis_font'],
        zeroline=False,
        tickformat='.0%',
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    plot_bgcolor=style_config['background_color'],
    paper_bgcolor=style_config['background_color'],
    font=dict(color=style_config['font_color']),
    showlegend=False,  # Remove the legend
    margin=dict(l=40, r=40, t=100, b=40),
    title={
        'y': 0.96,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
    annotations=annotations  # Add annotations
)

fig.show()


In [267]:
us_market.head()

Unnamed: 0,parcl_id,country,geoid,state_fips_code,name,state_abbreviation,region,location_type,total_population,median_income,parcl_exchange_market,pricefeed_market,case_shiller_10_market,case_shiller_20_market
0,5826765,USA,,,United States Of America,,,COUNTRY,331097593,75149,1,1,0,0


### Top institutional concentration of homes across the country

In [71]:
# save all files
output_dir = '/Users/jasonlewris/Library/Mobile Documents/com~apple~CloudDocs/Parcl Labs/carol_wsj'

In [8]:
# we need to break this by zip and msa

# first get top 100 metros by population
metros = client.search.markets.retrieve(
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=100,
    location_type='CBSA'
)

us_market = client.search.markets.retrieve(
    query='United States',
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    limit=1
)

In [59]:
# then get all zipcodes for the country
zips = client.search.markets.retrieve(
    sort_by='TOTAL_POPULATION',
    sort_order='DESC',
    auto_paginate=True,
    limit=1000,
    location_type='ZIP5'
)

In [52]:
markets = pd.concat([us_market, metros])
market_ids = markets['parcl_id'].unique().tolist()

# get housing stock ownership
ownership = client.portfolio_metrics.sf_housing_stock_ownership.retrieve(
    parcl_ids=market_ids,
    auto_paginate=True,
    start_date='2024-08-01',
    limit=1
)

data: {'parcl_id': ['5826765', '2900187', '2900078', '2899845', '2899734', '2899967', '2900475', '2900244', '2900128', '2887280', '2899625', '2900245', '2900336', '2900295', '2899753', '2900353', '2900137', '2900332', '2900417', '2899750', '2887292', '2900321', '2900213', '2899841', '2900331', '2900266', '2900315', '2900251', '2887289', '2900049', '2899647', '2900012', '2899671', '2899979', '2899654', '2900174', '2900338', '2900462', '2900275', '2899989', '2900134', '2900205', '2900282', '2900122', '2900292', '2900079', '2900182', '2900329', '2899944', '2899645', '2899611', '2899916', '2900301', '2900436', '2900438', '2900447', '2899715', '2899896', '2900208', '2899636', '2899929', '2899862', '2887291', '2899858', '2900030', '2900116', '2899589', '2899787', '2900181', '2899867', '2900192', '2900223', '2899666', '2899742', '2899840', '2900404', '2899924', '2899822', '2899621', '2899664', '2900070', '2900041', '2899752', '2899854', '2900271', '2900201', '2900391', '2900089', '2900503', '

In [48]:
zip_ownership = client.portfolio_metrics.sf_housing_stock_ownership.retrieve(
    parcl_ids=zips['parcl_id'].tolist(),
    auto_paginate=True,
    start_date='2024-08-01',
    limit=1
)

data: {'parcl_id': ['5445224', '5487000', '5364758', '5486793', '5453067', '5507083', '5468931', '5358467', '5486942', '5269001', '5506699', '5453121', '5452957', '5422176', '5358103', '5483467', '5483169', '5281029', '5314866', '5273480', '5495447', '5483329', '5358086', '5453093', '5542503', '5483382', '5273570', '5273788', '5469191', '5486717', '5273596', '5268545', '5358277', '5358465', '5268629', '5273633', '5453015', '5483332', '5414139', '5268698', '5364749', '5358370', '5358371', '5486940', '5364881', '5469187', '5358458', '5358554', '5453076', '5273644', '5421882', '5453084', '5268849', '5314567', '5483566', '5421819', '5486999', '5486907', '5486924', '5358148', '5403258', '5268569', '5483379', '5303125', '5483170', '5507078', '5402837', '5303025', '5445836', '5422158', '5507086', '5281031', '5268826', '5268526', '5473805', '5483251', '5483207', '5542725', '5495226', '5486937', '5414243', '5281135', '5358534', '5421920', '5340440', '5453023', '5483175', '5402456', '5350860', '

In [51]:
ownership.head()

Unnamed: 0,market_name,parcl_id,as_of_date,count_1000_plus_portfolio,total_homes_owned_in_us,pct_of_1000_plus_portfolio,cumulative_pct_of_1000_plus_portfolio
100,"Atlanta-Sandy Springs-Alpharetta, Ga",2887280,2024-08-01,63200,611388,0.103371,0.103371
83,"Dallas-Fort Worth-Arlington, Tx",2899734,2024-08-01,40076,611388,0.065549,0.168921
61,"Houston-The Woodlands-Sugar Land, Tx",2899967,2024-08-01,32588,611388,0.053302,0.222222
32,"Phoenix-Mesa-Chandler, Az",2900245,2024-08-01,32084,611388,0.052477,0.2747
73,"Charlotte-Concord-Gastonia, Nc-Sc",2899841,2024-08-01,26364,611388,0.043122,0.317821


In [60]:
def prepare_institutional_data(
        data: pd.DataFrame,
        market_df: pd.DataFrame,
        us_ownership: int = 0
) -> pd.DataFrame:
    data = data[['parcl_id', 'date', 'count_portfolio_1000_plus', 'pct_sf_housing_stock_portfolio_1000_plus']]
    data = pd.merge(data, market_df[['parcl_id', 'name']], on='parcl_id', how='left')
    data['pct_sf_housing_stock_portfolio_1000_plus'] = data['pct_sf_housing_stock_portfolio_1000_plus']/100
    # get us count
    data = data.loc[data['parcl_id'] != us_market['parcl_id'].values[0]]
    data['total_homes_owned'] = us_ownership
    data['pct_of_1000_plus_portfolio'] = data['count_portfolio_1000_plus'] / data['total_homes_owned']
    data = data.sort_values('pct_of_1000_plus_portfolio', ascending=False)
    # get cumluative sum on pct_of_1000_plus_portfolio
    data['cum_sum'] = data['pct_of_1000_plus_portfolio'].cumsum()
    data = data[['name', 'parcl_id', 'date', 'pct_sf_housing_stock_portfolio_1000_plus', 'count_portfolio_1000_plus', 'total_homes_owned', 'pct_of_1000_plus_portfolio', 'cum_sum']]
    data = data.rename(columns={
        'name': 'market_name',
        'date': 'as_of_date',
        'count_portfolio_1000_plus': 'count_1000_plus_portfolio',
        'total_homes_owned': 'total_homes_owned_in_us',
        'pct_of_1000_plus_portfolio': 'pct_of_1000_plus_portfolio',
        'cum_sum': 'cumulative_pct_of_1000_plus_portfolio'
    })
    return data

us_ownership = ownership.loc[ownership['parcl_id']==us_market['parcl_id'].values[0]]['count_portfolio_1000_plus'].values[0]
metros_output = prepare_institutional_data(ownership, markets, us_ownership)
zips_output = prepare_institutional_data(zip_ownership, zips, us_ownership)
zips_output = zips_output.merge(zips[['parcl_id', 'state_abbreviation']], on='parcl_id', how='inner')

In [103]:
col_order = [
    'market_name',
    'state_abbreviation',
    'parcl_id',
]

for col in zips_output.columns:
    if col not in col_order:
        col_order.append(col)

zips_output = zips_output[col_order]
zips_to_save = zips_output.loc[zips_output['count_1000_plus_portfolio']> 1000].reset_index(drop=True)
zips_to_save['count_1000_plus_portfolio'] = zips_to_save['count_1000_plus_portfolio'].astype(int)
zips_to_save.to_csv(f'{output_dir}/institutional_ownership_by_zip.csv', index=False)

In [85]:
metros_to_save = metros_output.head(20)
metros_to_save.to_csv(f'{output_dir}/institutional_ownership_by_metro.csv', index=False)

In [101]:
zips_to_save.loc[zips_to_save['pct_sf_housing_stock_portfolio_1000_plus']>0.1]

Unnamed: 0,market_name,state_abbreviation,parcl_id,as_of_date,pct_sf_housing_stock_portfolio_1000_plus,count_1000_plus_portfolio,total_homes_owned_in_us,pct_of_1000_plus_portfolio,cumulative_pct_of_1000_plus_portfolio
1,30253,GA,5314470,2024-08-01,0.1144,2099.0,611388,0.003433,0.007128
9,48205,MI,5566080,2024-08-01,0.1136,1588.0,611388,0.002597,0.030238
11,28214,NC,5348947,2024-08-01,0.1014,1574.0,611388,0.002574,0.035408
22,48505,MI,5565664,2024-08-01,0.1488,1306.0,611388,0.002136,0.061318
27,37086,TN,5473863,2024-08-01,0.1229,1291.0,611388,0.002112,0.071892
35,38016,TN,5495454,2024-08-01,0.1158,1224.0,611388,0.002002,0.088288
39,30248,GA,5314864,2024-08-01,0.1058,1144.0,611388,0.001871,0.096031


### Top institutional activity in key markets

In [273]:
ports = [
    'AMH',
    'TRICON',
    'INVITATION_HOMES',
    'HOME_PARTNERS_OF_AMERICA',
    'PROGRESS_RESIDENTIAL',
    'FIRSTKEY_HOMES',
    'AMHERST'
]

us_market = 5826765

top_institional_markets = [
    2900245, # Phoenix, AZ
    2899967, # Houston, TX
    2899734, # Dallas, TX
    2900417, # Tampa, FL
    2887280, # Atlanta, GA
    2899841, # Charlotte, NC
]

homes = []
for port in ports:

    home = client.property.search.retrieve(
        current_entity_owner_name=port,
        parcl_ids=[us_market],
        property_type='SINGLE_FAMILY'
    )
    homes.append(home)

Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 16.7s (0.06/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 7.0s (0.14/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 7.4s (0.13/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 5.2s (0.19/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 7.1s (0.14/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 6.9s (0.15/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 4.2s (0.24/s) 


In [274]:
homes = pd.concat(homes)
homes.shape

(354652, 27)

In [275]:
homes.groupby('current_entity_owner_name')['parcl_property_id'].nunique()

current_entity_owner_name
AMH                         51847
AMHERST                     40831
FIRSTKEY_HOMES              46722
HOME_PARTNERS_OF_AMERICA    23258
INVITATION_HOMES            77741
PROGRESS_RESIDENTIAL        78992
TRICON                      35261
Name: parcl_property_id, dtype: int64

In [276]:
# pull down property history

history = client.property.events.retrieve(
    parcl_property_ids=homes['parcl_property_id'].tolist(),
    start_date='2024-08-01',
    event_type='LISTING'
)

on 0: Error processing batch ['164517791', '86075090', '199007153', '96872621', '66097094', '67879117', '141985435', '72622160', '85202918', '138289388', '107594495', '127658070', '74900089', '171975458', '94184725', '137436608', '52730307', '142152134', '82623548', '127894205', '68498668', '51846185', '168681725', '156812785', '164318994', '176804006', '110596300', '164591883', '158160499', '158632287', '87223612', '159428402', '154812266', '59769934', '138956984', '93541229', '151279123', '48792053', '167238039', '89734689', '171343532', '167340622', '78887564', '95604778', '86667451', '196674715', '148224315', '155876274', '83549205', '87716561', '92765801', '165418546', '54118327', '163024422', '111037373', '173161150', '172492623', '214530226', '151193622', '170139118', '219580458', '94353316', '57932722', '140305554', '117261219', '80451831', '164324792', '94897250', '152337612', '157250709', '84879060', '58854597', '49687506', '74245557', '50601492', '118665386', '170760575', '1

In [279]:
# total listings out of this cohort
# 1% of the top 7 institutional portfolios in the US
history['parcl_property_id'].nunique()/homes['parcl_property_id'].nunique()

0.010032369759651714

In [328]:
# history = history.drop(['current_entity_owner_name_x', 'current_entity_owner_name_y'], axis=1)
history = pd.merge(history, homes[['parcl_property_id', 'current_entity_owner_name']], on='parcl_property_id', how='left')

In [329]:
# calculate diff between first and last price for each parcl_property_id
history = history.sort_values(['parcl_property_id', 'event_date'])
history['price_diff'] = history.sort_values(['parcl_property_id', 'event_date']).groupby('parcl_property_id')['price'].diff()
# create a column with the first price for each parcl_property_id
history['first_price'] = history.groupby('parcl_property_id')['price'].transform('first')
history['last_price'] = history.groupby('parcl_property_id')['price'].transform('last')
history['first_date'] = history.groupby('parcl_property_id')['event_date'].transform('first')
history['last_date'] = history.groupby('parcl_property_id')['event_date'].transform('last')
history['last_event_name'] = history.groupby('parcl_property_id')['event_name'].transform('last')
history.head(100)

Unnamed: 0,parcl_property_id,event_date,event_type,event_name,price,owner_occupied_flag,new_construction_flag,investor_flag,entity_owner_name,current_owner_flag,transfer_index,true_sale_index,price_diff,first_price,last_price,first_date,last_date,last_event_name,cbsa_parcl_id,current_entity_owner_name
0,48743427,2024-09-05,LISTING,LISTED_SALE,515000.0,0.0,0,1.0,AMHERST,1,6,4,,515000.0,505000.0,2024-09-05,2024-10-01,PRICE_CHANGE,2899734.0,AMHERST
1,48743427,2024-09-20,LISTING,LISTING_REMOVED,515000.0,0.0,0,1.0,AMHERST,1,6,4,0.0,515000.0,505000.0,2024-09-05,2024-10-01,PRICE_CHANGE,2899734.0,AMHERST
2,48743427,2024-09-30,LISTING,LISTED_SALE,515000.0,0.0,0,1.0,AMHERST,1,6,4,0.0,515000.0,505000.0,2024-09-05,2024-10-01,PRICE_CHANGE,2899734.0,AMHERST
3,48743427,2024-10-01,LISTING,PRICE_CHANGE,505000.0,0.0,0,1.0,AMHERST,1,6,4,-10000.0,515000.0,505000.0,2024-09-05,2024-10-01,PRICE_CHANGE,2899734.0,AMHERST
4,48771578,2024-08-08,LISTING,PRICE_CHANGE,399900.0,0.0,0,1.0,PROGRESS_RESIDENTIAL,1,9,3,,399900.0,399900.0,2024-08-08,2024-08-16,PRICE_CHANGE,2900245.0,PROGRESS_RESIDENTIAL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,50002520,2024-09-30,LISTING,LISTED_SALE,329900.0,0.0,0,1.0,AMH,1,2,2,0.0,339900.0,329900.0,2024-08-19,2024-09-30,LISTED_SALE,2899734.0,AMH
96,50007878,2024-09-02,LISTING,LISTED_SALE,1525000.0,0.0,0,1.0,AMH,1,7,3,,1525000.0,1525000.0,2024-09-02,2024-09-02,LISTED_SALE,2900245.0,AMH
97,50010055,2024-09-24,LISTING,LISTED_SALE,410000.0,0.0,0,1.0,HOME_PARTNERS_OF_AMERICA,1,5,4,,410000.0,410000.0,2024-09-24,2024-09-24,LISTED_SALE,2899734.0,HOME_PARTNERS_OF_AMERICA
98,50027799,2024-10-03,LISTING,LISTED_SALE,350000.0,0.0,0,1.0,INVITATION_HOMES,1,5,5,,350000.0,350000.0,2024-10-03,2024-10-03,LISTED_SALE,2900245.0,INVITATION_HOMES


In [330]:
# strip out listing removed parcl_ids
to_remove_parcl_ids = history.loc[history['last_event_name']=='LISTING_REMOVED']['parcl_property_id'].unique()
history = history.loc[~history['parcl_property_id'].isin(to_remove_parcl_ids)]

In [342]:
national_agg.head()

Unnamed: 0,current_entity_owner_name,num_listings,num_listings_with_price_decline,avg_price_change,pct_listings_with_price_decline
0,AMH,306,130,-0.018687,0.424837
1,AMHERST,717,318,-0.017715,0.443515
2,FIRSTKEY_HOMES,254,135,-0.017841,0.531496
3,HOME_PARTNERS_OF_AMERICA,839,347,-0.009216,0.413588
4,INVITATION_HOMES,335,109,-0.004505,0.325373


In [424]:
def prepare_output_data(
        scope='national'
):
    if scope == 'national':
        groupers = ['current_entity_owner_name']
        data = history.copy(deep=True)
        list_changes = data[['parcl_property_id', 'current_entity_owner_name', 'first_price', 'last_price', 'first_date', 'last_date']].drop_duplicates()
    if scope == 'cbsa':
        groupers = ['cbsa_parcl_id','current_entity_owner_name']
        # 
        data = history.loc[history['cbsa_parcl_id'].notnull()]
        data['cbsa_parcl_id'] = data['cbsa_parcl_id'].astype(int)
        list_changes = tmp[['parcl_property_id', 'cbsa_parcl_id', 'current_entity_owner_name', 'first_price', 'last_price', 'first_date', 'last_date']].drop_duplicates()
    # calculate price declines
    list_changes['pct_price_change'] = (list_changes['last_price'] - list_changes['first_price']) / list_changes['first_price']
    list_changes['price_decline_idx'] = list_changes['pct_price_change'].apply(lambda x: 1 if x != 0 else 0)
    # calculate the percent of all listings by current_entity_owner_name that have seen a price decline. Also calculate the average price decline

    national_agg = list_changes.groupby(groupers).agg(
        num_listings=('parcl_property_id', 'count'),
        num_listings_with_price_decline=('price_decline_idx', 'sum'),
        avg_price_change=('pct_price_change', 'mean')
    )

    national_agg['pct_listings_with_price_decline'] = national_agg['num_listings_with_price_decline'] / national_agg['num_listings']
    national_agg = national_agg.reset_index()
    # break it by operator
    current_list = data.groupby(groupers)['parcl_property_id'].nunique().reset_index()
    current_list = current_list.rename(columns={'parcl_property_id': 'num_active_listings'})
    current_own = homes.groupby(groupers)['parcl_property_id'].nunique().reset_index()
    current_own = current_own.rename(columns={'parcl_property_id': 'num_homes_owned'})
    share_listed = pd.merge(current_own, current_list, on=groupers)
    share_listed['pct_listed'] = share_listed['num_active_listings'] / share_listed['num_homes_owned']

    # merge with national_agg
    share_listed = pd.merge(share_listed, national_agg, on=groupers)
    return share_listed

national = prepare_output_data(scope='national')
market = prepare_output_data(scope='cbsa')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [425]:
# save the following files
# national, market, output_pivot_pct_listed

In [426]:

national = national.drop('num_active_listings', axis=1)
national = national[[
    'current_entity_owner_name',
    'num_homes_owned',
    'num_listings',
    'num_listings_with_price_decline',
    'pct_listed',
    'avg_price_change',
    'pct_listings_with_price_decline'
]]

national = national.rename(columns={
    'current_entity_owner_name': 'institutional_investor',
    'pct_listed': 'pct_portfolio_listed'
})

national.to_csv(f'{output_dir}/national_institutional_investor_metrics.csv', index=False)

In [427]:
mkt_breakout = market.groupby('cbsa_parcl_id').agg(
    num_listings=('num_listings', 'sum'),
    num_listings_with_price_decline=('num_listings_with_price_decline', 'sum'),
    avg_price_change=('avg_price_change', 'mean'),
    num_homes_owned=('num_homes_owned', 'sum')
).reset_index()
mkt_breakout = pd.merge(mkt_breakout, markets[['parcl_id', 'clean_name']], left_on='cbsa_parcl_id', right_on='parcl_id', how='left')
top10_markets = mkt_breakout.sort_values('num_homes_owned', ascending=False).head(10)['cbsa_parcl_id'].unique().tolist()
market_output = market.loc[market['cbsa_parcl_id'].isin(top10_markets)]
market_output = market_output.merge(homes[['cbsa_parcl_id', 'cbsa']].drop_duplicates(), on='cbsa_parcl_id')

In [428]:
# market_output = market_output.drop('num_active_listings', axis=1)
market_output = market_output[[
    'cbsa_parcl_id',
    'cbsa',
    'current_entity_owner_name',
    'num_homes_owned',
    'num_listings',
    'num_listings_with_price_decline',
    'pct_listed',
    'avg_price_change',
    'pct_listings_with_price_decline'
]]

market_output = market_output.rename(columns={
    'current_entity_owner_name': 'institutional_investor',
    'pct_listed': 'pct_portfolio_listed'
})

market_output['cbsa_parcl_id'] = market_output['cbsa_parcl_id'].astype(int)
market_output.to_csv(f'{output_dir}/top10cbsa_institutional_investor_metrics.csv', index=False)

In [434]:
market_pivot_pct_listed = market_output.pivot(index=['cbsa'], columns='institutional_investor', values='pct_portfolio_listed')
market_pivot_pct_listed.to_csv(f'{output_dir}/top10cbsa_institutional_investor_pct_portfolio_listed_pivot.csv')

In [436]:
market_homes_owned_pivot = market_output.pivot(index=['cbsa'], columns='institutional_investor', values='num_homes_owned')
market_homes_owned_pivot.to_csv(f'{output_dir}/top10cbsa_institutional_investor_homes_owned_pivot.csv')