# Welcome to the Lab 🥼🧪

## Investor Purchase Price vs. Market Price

In this notebook, we will go over how much investors are paying for properties vs. the market prices. 

We will create a chart like this: 

![Chart](../assets/atlanta_investor_share.png)

**Note** This notebook will work with any of the 70k+ markets supported by the Parcl Labs API.

As a reminder, you can get your Parcl Labs API key [here](https://dashboard.parcllabs.com/signup) to follow along. 

To run this immediately, you can use Google Colab. Remember, you must set your `PARCL_LABS_API_KEY` as a secret. See this [guide](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) for more information.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ParclLabs/parcllabs-examples/blob/main/python/inspiration/investor_purchase_price_vs_market_price.ipynb)

In [None]:
# Environment setup
import os
import sys
import subprocess
from datetime import datetime

# Collab setup from one click above
if "google.colab" in sys.modules:
    from google.colab import userdata
    %pip install parcllabs plotly kaleido
    api_key = userdata.get('PARCL_LABS_API_KEY')
else:
    api_key = os.getenv('PARCL_LABS_API_KEY')

In [None]:
import parcllabs
import pandas as pd
import plotly.express as px
from parcllabs import ParclLabsClient

print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}

# set charting constants
labs_logo_dict = dict(
        source=labs_logo_lookup['white'],
        xref="paper",
        yref="paper",
        x=0.5,  # Centering the logo below the title
        y=1.02,  # Adjust this value to position the logo just below the title
        sizex=0.15, 
        sizey=0.15,
        xanchor="center",
        yanchor="bottom"
)

def build_chart(
    name,
    data
): 
    HEIGHT = 900
    WIDTH = 1600
    fig = px.line(
        data,
        x='date',
        y='price_median_sales',
        color='type',  # This creates separate lines for each period
        title=f'Investor Purchases vs. All Purchases (Median Purchase Price): {name}',
        labels={'price_median_sales': 'Median Purchase Price ($)'},
        line_shape='spline'  # 'spline' for smooth lines
    )
    
    fig.add_layout_image(labs_logo_dict)
    
    fig.update_layout(
        margin=dict(l=0, r=0, t=110, b=0),
        height=HEIGHT,
        width=WIDTH,
        legend=dict(
            x=0.01,
            y=0.99,
            traceorder="normal",
            xanchor='left',
            yanchor='top',
            title='Buyer Type',
            font=dict(size=12),
            bgcolor='rgba(0, 0, 0, 0)',
        ),
        title={
            'y':0.98,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=24),
        },
        plot_bgcolor='#080D16',
        paper_bgcolor='#080D16',
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=True,
            gridwidth=1,
            gridcolor='LightGrey',
            tickangle=-45,
            tickfont=dict(size=12),
        ),
        yaxis=dict(
            title_text='Median Purchase Price ($)',
            showgrid=True,
            gridwidth=1,
            gridcolor='LightGrey',
            tickfont=dict(size=12),
        ),
    )
    
    color_map = {
        'Investor': '#448CF2',
        'All': '#FFFFFF',
    }
    
    fig.update_traces(
        line=dict(width=3),
        mode='lines+markers',
        marker=dict(size=8, symbol='circle', line=dict(width=2, color='#080D16'))
    )
    
    for trace in fig.data:
        if trace.name in color_map:
            trace.line.color = color_map[trace.name]
            trace.marker.color = color_map[trace.name]
    
    # Adding gridlines
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGrey')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGrey')
    
    fig.write_image(os.path.join(output_dir, f'{name}.png'), width=WIDTH, height=HEIGHT)
    
    # Show the plot
    fig.show()

def format_names(nme):
    state = nme.split(',')[-1].strip().upper().split('-')[0]
    metro = nme.split(',')[0].split('-')[0].strip()
    metro = metro.split('/')[0].strip()
    return f"{metro}, {state}"

In [None]:
# set output location
# set default save directory
save_dir = './graphics'
new_dir_name = f"investor-vs-market-buying-{datetime.today().date()}"
output_dir = os.path.join(save_dir, new_dir_name)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
# initialize client
client = ParclLabsClient(api_key=api_key, limit=100)

In [None]:
# retrieving info for the Phoenix Parcl
usa = client.search_markets.retrieve(
    query='United States',
    as_dataframe=True
)

usa_parcl_id = usa['parcl_id'].values[0]
us_name = usa['name'].values[0]

In [None]:
# top 25 markets
# retrieving info for the Phoenix Parcl
mkts = client.search_markets.retrieve(
    as_dataframe=True,
    location_type='CBSA',
    sort_by='TOTAL_POPULATION',
    params={'limit': 50}
)

mkts_parcl_ids = mkts['parcl_id'].tolist()

In [None]:
# identify top investor owned markets
results = client.investor_metrics_housing_stock_ownership.retrieve_many(
    as_dataframe=True,
    params={'limit': 1},
    parcl_ids=mkts_parcl_ids
)
results.head()

us_results = client.investor_metrics_housing_stock_ownership.retrieve(
    as_dataframe=True,
    params={'limit': 1},
    parcl_id=usa_parcl_id
)

us_results

In [None]:
def print_text(pid, name):
    
    investor_sales = mkt_data.loc[(mkt_data['parcl_id'] == pid) & (mkt_data['date']=='2024-03-01') & (mkt_data['type'] == 'Investor')]['price_median_sales'].values[0]
    all_sales = mkt_data.loc[(mkt_data['parcl_id'] == pid) & (mkt_data['date']=='2024-03-01') & (mkt_data['type'] == 'All')]['price_median_sales'].values[0]
    
    pct_diff = (investor_sales-all_sales)/all_sales
    diff2 = abs(pct_diff)
    pct_diff_str = f"{diff2:.02%}"
    direction = 'Discount' if pct_diff < 0 else 'Premium'
    investor_ownership = results.loc[(results['parcl_id'] == pid) & (results['date']=='2024-03-01')]['pct_ownership'].values[0]/100
    investor_ownership_str = f"{investor_ownership:.02%}"
    print(f'{name}')
    print('')
    print(f"Total Investor Market Ownership: {investor_ownership_str}")
    print(f"Investor Purchase Price: ${investor_sales:,.0f}")
    print(f"Market Purchase Price: ${all_sales:,.0f}")
    print(f"Investors Buy at {pct_diff_str} {direction}")

print_text(2899753, 'NY')

In [None]:
diff = (98000-240000)/240000
f"{diff:.02%}"

In [None]:
ids = results.sort_values('pct_ownership', ascending=False).head(50)['parcl_id'].tolist()

In [None]:
# now lets get the investor share of listings for multiple markets

START_DATE = '2021-03-01'

usa_investor_prices = client.investor_metrics_housing_event_prices.retrieve(
    parcl_id=usa_parcl_id,
    start_date=START_DATE,
    as_dataframe=True
)

usa_prices = client.market_metrics_housing_event_prices.retrieve(
    parcl_id=usa_parcl_id,
    start_date=START_DATE,
    as_dataframe=True
)


mkt_investor_prices = client.investor_metrics_housing_event_prices.retrieve_many(
    parcl_ids=ids,
    start_date=START_DATE,
    as_dataframe=True
)

mkt_prices = client.market_metrics_housing_event_prices.retrieve_many(
    parcl_ids=ids,
    start_date=START_DATE,
    as_dataframe=True
)

In [None]:
def prepare_data(
    investors,
    all_purchases
):
    investors['type'] = 'Investor'
    all_purchases['type'] = 'All'
    
    investors = investors.rename(columns={'price_median_acquisitions': 'price_median_sales'})
    out = pd.concat([all_purchases[['parcl_id', 'date', 'price_median_sales', 'type']], investors[['parcl_id', 'date', 'price_median_sales', 'type']]], axis=0)
    return out

usa = prepare_data(usa_investor_prices, usa_prices)
mkt_data = prepare_data(mkt_investor_prices, mkt_prices)

In [None]:
mkt_data = pd.merge(mkts[['parcl_id', 'name']], mkt_data, on='parcl_id')
mkt_data['name'] = mkt_data['name'].apply(lambda x: format_names(x))
mkt_data['name'].unique()

In [None]:
build_chart(us_name, out)

In [None]:
tmp = mkt_data[['parcl_id', 'name']].drop_duplicates().head(25)
tmp

In [None]:
pid = 2900266
print_text(pid, tmp.loc[tmp['parcl_id']==pid]['name'].values[0])

In [None]:
for name in mkt_data['name'].unique():
    data = mkt_data.loc[mkt_data['name']==name]
    build_chart(name, data)

In [None]:
format_names('San Antonio-New Braunfels, Tx')