# Welcome to the Lab 🥼🧪

Exploratory data analysis of price feeds, portfolio construction, seasonality, etc. 

In [None]:
import os
import sys
import json
import requests
import subprocess
from datetime import datetime, timedelta
from urllib.request import urlopen

import parcllabs
import numpy as np
import pandas as pd
import seaborn as sns
from prophet import Prophet
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from parcllabs import ParclLabsClient
from pypfopt import EfficientFrontier, risk_models, expected_returns

api_key = os.getenv('PARCL_LABS_API_KEY')
print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
cur_dir = os.getcwd()
chart_dir = os.path.join(cur_dir, '..')
sys.path.append(chart_dir)

from charting.utils import create_labs_logo_dict


In [None]:
# Initialize the Parcl Labs client
client = ParclLabsClient(api_key)

In [None]:
# set nb config
pf_options = {
    'rental': 'rental_price_feed',
    'pricefeed': 'price_feed'
}

PF_TYPE = pf_options['pricefeed']

In [None]:
# Get top 100 CBSAs by population
markets = client.search_markets.retrieve(
    as_dataframe=True,
    sort_by='PARCL_EXCHANGE_MARKET',
    sort_order='DESC',
    params={
        'limit': 14
    }
)

miami = client.search_markets.retrieve(
    parcl_id=5352987,
    as_dataframe=True
)

markets = pd.concat([markets, miami])
markets

In [None]:
# lets retrieve data back to 2011 for these price feeds
START_DATE = '2019-01-01'
feeds = client.price_feed.retrieve_many(
    parcl_ids=markets['parcl_id'].tolist(),
    start_date=START_DATE,
    as_dataframe=True,
    params={'limit': 1000},  # expand the limit to 1000, these are daily series
    auto_paginate=True, # auto paginate to get all the data - WARNING: ~6k credits can be used in one parcl price feed. Change the START_DATE to a more recent date to reduce the number of credits used
)

In [None]:
# get supply and demand
demand = client.market_metrics_housing_event_counts.retrieve_many(
    parcl_ids=markets['parcl_id'].tolist(),
    start_date=START_DATE,
    as_dataframe=True,
    params={'limit': 1000},  # expand the limit to 1000, these are daily series
)

In [None]:
def calculate_stats(
    mf: pd.DataFrame
):
    # Calculate 52-week high and low
    last_date = mf['date'].max()
    one_year_ago = last_date - pd.DateOffset(weeks=52)

    # Filter data for the past 52 weeks
    last_52_weeks_data = mf[mf['date'] >= one_year_ago]

    # 52-week high
    high_52w = last_52_weeks_data['price_feed'].max()

    # 52-week low
    low_52w = last_52_weeks_data['price_feed'].min()

    # Last value
    last_value = mf['price_feed'].iloc[-1]

    # Year-over-Year (YoY) Change
    one_year_ago_value = mf.loc[mf['date'] == one_year_ago, 'price_feed'].values
    if len(one_year_ago_value) == 0:
        # If there's no exact match for the date, get the closest date before
        one_year_ago_value = mf[mf['date'] < one_year_ago]['price_feed'].iloc[-1]
    else:
        one_year_ago_value = one_year_ago_value[0]

    yoy_change_pct = (last_value - one_year_ago_value) / one_year_ago_value
    yoy_change_delta = last_value - one_year_ago_value
    yoy_sign = '' if yoy_change_delta >= 0 else '-'
    return high_52w, low_52w, last_value, yoy_change_delta, yoy_change_pct, yoy_sign

def create_ticker(name):
    name = name.replace('City', '')
    name = name.replace('New York', 'NYC')
    name = name.replace('Los Angeles', 'LAX')
    name = name.replace('Atlanta', 'ATL')
    name = name.replace('Las Vegas', 'LAS')
    name = name.replace('San Diego', 'SAN')
    name = name.replace('Boston', 'BOS')
    name = name.replace('Miami Beach', 'MIABCH')
    name = name.replace('San Francisco', 'SFO')
    name = name.replace('Denver', 'DEN')
    name = name.replace('Chicago', 'CHI')
    name = name.replace('Washington', 'DC')
    name = name.replace('Austin', 'ATX')
    name = name.replace('United States Of America', 'USA')
    name = name.replace('Kings County', 'BRKLYN')
    name = name.replace('Miami', 'MIA')
    return name

def format_names(nme):
    nme = nme.replace('United States Of America', 'USA')
    nme = nme.replace('Kings County', 'Brooklyn, NY')
    nme = nme.replace('Washington City', 'Washington, DC')
    return nme

markets['ticker'] = markets['name'].apply(create_ticker)
markets['clean_name'] = markets['name'].apply(format_names)

In [None]:
# Define the function to build the combined line chart
def build_combined_line_chart(
    data_main: pd.DataFrame,
    data_secondary: pd.DataFrame,
    # data_third = pd.DataFrame,
    title_main: str = None,
    x_axis_title: str = None,
    save_path: str = None,
    value_name_main: str = None,
    value_name_secondary: str = None,
    # value_name_third: str = None,
    ticker_msg: str = None,
    volume_msg: str = None,
    # supply_msg: str = None,
    pricefeed_msg: str = None,
    last_pf_date: str = None,
    msg: str = None
):
    HEIGHT_MAIN = 500
    HEIGHT_SECONDARY = 200
    HEIGHT_THIRD = 200
    HEIGHT_SPACE = 40  # Increased space between the two charts
    TOTAL_HEIGHT = HEIGHT_MAIN + HEIGHT_SECONDARY
    WIDTH = 1200

    data_main['date'] = pd.to_datetime(data_main['date'])
    data_secondary['date'] = pd.to_datetime(data_secondary['date'])
    # data_third['date'] = pd.to_datetime(data_third['date'])

    # Get the date range for the x-axis
    date_range = [min(data_main['date'].min(), data_secondary['date'].min()), max(data_main['date'].max(), data_secondary['date'].max())]

    # Create subplots: 2 rows, 1 column
    fig = make_subplots(
        rows=2, cols=1,
        shared_xaxes=False,
        row_heights=[0.8, 0.2],
        vertical_spacing=0.05  # Increased vertical spacing
    )

    # Add traces for positive and negative segments based on daily_return
    for i in range(1, len(data_main)):
        color = 'green' if data_main['daily_return'].iloc[i] >= 0 else 'red'
        fig.add_trace(
            go.Scatter(
                x=[data_main['date'].iloc[i-1], data_main['date'].iloc[i]],
                y=[data_main[value_name_main].iloc[i-1], data_main[value_name_main].iloc[i]],
                mode='lines',
                line=dict(width=3, color=color),
                showlegend=False
            ),
            row=1, col=1
        )

    # Plot the secondary data as a bar chart
    fig.add_trace(
        go.Bar(
            x=data_secondary['date'],
            y=data_secondary[value_name_secondary],
            name='Sales Volume',
            marker=dict(color=data_secondary['volColor'])  # Use volColor for the bar chart colors
        ),
        row=2, col=1
    )

    # Plot the secondary data as a bar chart
    # fig.add_trace(
    #     go.Bar(
    #         x=data_third['date'],
    #         y=data_third[value_name_third],
    #         name='Listings Volume',
    #         marker=dict(color=data_third['volColor'])  # Use volColor for the bar chart colors
    #     ),
    #     row=3, col=1
    # )

    # Add the 6-month moving average line
    data_secondary['sales_ma'] = data_secondary[value_name_secondary].rolling(window=6).mean()
    ma_last = data_secondary['sales_ma'].iloc[-1]
    # data_third['listings_ma'] = data_third[value_name_third].rolling(window=6).mean()
    fig.add_trace(
        go.Scatter(
            x=data_secondary['date'],
            y=data_secondary['sales_ma'],
            mode='lines',
            name='6-Month Moving Average',
            line=dict(width=2, color='#FFA500'),  # Orange for the moving average line
            showlegend=False
        ),
        row=2, col=1
    )

    # fig.add_trace(
    #     go.Scatter(
    #         x=data_third['date'],
    #         y=data_third['listings_ma'],
    #         mode='lines',
    #         name='6-Month Moving Average',
    #         line=dict(width=2, color='#FFA500'),  # Orange for the moving average line
    #         showlegend=False
    #     ),
    #     row=3, col=1
    # )

    # Update layout
    fig.update_layout(
        height=TOTAL_HEIGHT,
        width=WIDTH,
        plot_bgcolor='#000000',  # Dark background for better contrast
        paper_bgcolor='#000000',  # Dark background for the paper
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=True,  # Disable vertical grid lines
            gridcolor='rgba(255, 255, 255, 0.2)',  # Vertical grid line color with opacity
            tickangle=0,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1,  # Axis line width
            tickformat='`%y',
            range=date_range,
        ),
        xaxis2=dict(
            title_text='',
            showgrid=True,  # Disable vertical grid lines
            gridcolor='rgba(255, 255, 255, 0.2)',  # Horizontal grid line color with opacity
            tickangle=0,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1,  # Axis line width
            tickformat='`%y',
            range=date_range,
        ),
        # xaxis3=dict(
        #     title_text='',
        #     showgrid=True,  # Disable vertical grid lines
        #     gridcolor='rgba(255, 255, 255, 0.2)',  # Horizontal grid line color with opacity
        #     tickangle=0,
        #     tickfont=dict(size=14),
        #     linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
        #     linewidth=1,  # Axis line width
        #     tickformat='`%y',
        #     range=date_range,
        # ),
        yaxis=dict(
            showgrid=True,
            gridwidth=0.5,  # Horizontal grid line width
            gridcolor='rgba(255, 255, 255, 0.2)',  # Horizontal grid line color with opacity
            tickfont=dict(size=14),
            ticksuffix='',  # Remove dollar sign suffix
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1,  # Axis line width
            side='right',
            ticks='outside',
            rangemode='normal',
            range=[data_main[value_name_main].min(), data_main[value_name_main].max() + 10]
        ),
        yaxis2=dict(
            showgrid=True,
            gridwidth=0.5,  # Horizontal grid line width
            gridcolor='rgba(255, 255, 255, 0.2)',  # Horizontal grid line color with opacity
            tickfont=dict(size=14),
            ticksuffix='',  # Remove dollar sign suffix
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1,  # Axis line width
            side='right',
            ticks='outside',
            rangemode='normal',
            range=[data_secondary[value_name_secondary].min(), data_secondary[value_name_secondary].max() + 10]
        ),
        # yaxis3=dict(
        #     showgrid=True,
        #     gridwidth=0.5,  # Horizontal grid line width
        #     gridcolor='rgba(255, 255, 255, 0.2)',  # Horizontal grid line color with opacity
        #     tickfont=dict(size=14),
        #     ticksuffix='',  # Remove dollar sign suffix
        #     zeroline=False,
        #     linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
        #     linewidth=1,  # Axis line width
        #     side='right',
        #     ticks='outside',
        #     rangemode='normal',
        #     range=[data_third[value_name_third].min(), data_third[value_name_third].max() + 10]
        # ),
        hovermode='x unified',  # Unified hover mode for better interactivity
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        ),
        showlegend=False,  # Show the legend to include the moving average line
    )

    # Add stats annotation
    fig.add_annotation(
        dict(
            text=msg,
            x=1,
            y=1.04,
            xref='paper',
            yref='paper',
            xanchor='right',
            yanchor='top',
            showarrow=False,
            font=dict(size=14, color='#FFFFFF')
        )
    )

    # Add placeholder text in the top left of each chart
    fig.add_annotation(
        dict(
            text=pricefeed_msg,
            x=0.003,
            y=0.99,
            xref='paper',
            yref='paper',
            xanchor='left',
            showarrow=False,
            font=dict(size=14, color='#FFFFFF')
        )
    )

    fig.add_annotation(
        dict(
            text=volume_msg,
            x=0.003,
            y=0.15,
            xref='paper',
            yref='paper',
            xanchor='left',
            showarrow=False,
            font=dict(size=14, color='#FFFFFF')
        )
    )

    fig.add_annotation(
        dict(
            text=f'--- MA(6) {ma_last:,.2f}',
            x=0.003,
            y=0.12,
            xref='paper',
            yref='paper',
            xanchor='left',
            showarrow=False,
            font=dict(size=14, color='#FFFFFF')
        )
    )

    fig.add_annotation(
        dict(
            text=last_pf_date,
            x=0,
            y=1.04,
            xref='paper',
            yref='paper',
            xanchor='left',
            yanchor='top',
            showarrow=False,
            font=dict(size=14, color='#FFFFFF')
        )
    )

    # Add ticker message
    fig.add_annotation(
        dict(
            text=ticker_msg,
            x=0,
            y=1.07,
            xref='paper',
            yref='paper',
            xanchor='left',
            yanchor='top',
            showarrow=False,
            font=dict(size=14, color='#FFFFFF')
        )
    )

    # Add borders and hover effect for both charts
    fig.update_xaxes(
        showline=True,
        linewidth=2,
        linecolor='#FFFFFF',
        mirror=True
    )
    fig.update_yaxes(
        showline=True,
        linewidth=2,
        linecolor='#FFFFFF',
        mirror=True,
        ticks="outside"
    )

    fig.update_layout(
    margin=dict(
        l=10,  # Left margin
        r=10,  # Right margin
        b=10,  # Bottom margin
        t=50   # Top margin
    )
)

    # Add Parcl Labs logo
    fig.add_layout_image(
        labs_logo_dict
    )

    if save_path:
        fig.write_image(save_path, width=WIDTH, height=TOTAL_HEIGHT)
    
    # Show the plot
    fig.show()

# Example usage
# Please provide your data frames `feeds` and `demand` to run this code
labs_logo_dict = create_labs_logo_dict(
    src='labs',
    y=1.07,
    x=1,
    xanchor='right',
    yanchor='top',
    sizex=0.15,
    sizey=0.15,
)




In [None]:
pid = 5373892
mf = feeds[feeds['parcl_id'] == pid]
df = demand[demand['parcl_id'] == pid]
supply = demand[demand['parcl_id'] == pid]

# Ensure the DataFrame is sorted by date and calculate daily return
mf = mf.sort_values('date')
last_pricefeed_value = mf['price_feed'].iloc[-1]
last_pricefeed_date_str = mf['date'].iloc[-1]
last_pricefeed_date = pd.to_datetime(last_pricefeed_date_str).strftime('%-d %B')
last_pf_date = pd.to_datetime(last_pricefeed_date_str).strftime('%-d-%B-%Y')
mf['date'] = pd.to_datetime(mf['date'])
mf['daily_return'] = mf['price_feed'].pct_change()
mf = mf.dropna()  # Remove rows with NaN values resulting from pct_change calculation

mf['month'] = mf['date'].dt.to_period('M')
monthly_return = mf.groupby('month')['price_feed'].agg(lambda x: x.iloc[-1] / x.iloc[0] - 1).reset_index(name='volColor')
df = df.sort_values('date')
supply = supply.sort_values('date')
last_sales_value = df['sales'].iloc[-1]
last_sales_month = df['date'].iloc[-1]
last_supply_value = supply['new_listings_for_sale'].iloc[-1]
last_supply_month = supply['date'].iloc[-1]
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.to_period('M')
supply['date'] = pd.to_datetime(supply['date'])
supply['month'] = supply['date'].dt.to_period('M')
df = df.merge(monthly_return, on='month')
supply = supply.merge(monthly_return, on='month')

# Add volColor column directly to data_secondary
df['volColor'] = np.where(df['volColor'] >= 0, 'green', 'red')  # Modify this condition as per your actual data logic
supply['volColor'] = np.where(supply['volColor'] >= 0, 'green', 'red')  # Modify this condition as per your actual data logic
ticker = markets[markets['parcl_id'] == pid]['ticker'].values[0]
name = markets[markets['parcl_id'] == pid]['clean_name'].values[0]

high_52w, low_52w, last_value, yoy_change_delta, yoy_change_pct, yoy_sign = calculate_stats(mf)
msg = f"<b>52w High</b> ${high_52w:.2f} <b>52w Low</b> ${low_52w:.2f} <b>Last</b> ${last_value:.2f} <b>YoY Change</b> {round(yoy_change_delta, 2)} ({yoy_change_pct*100:.02f}%)"
# msg = f"52w High ${high_52w:.2f} 52w Low ${low_52w:.2f} Last ${last_value:.2f} YoY Change {round(yoy_change_delta, 2)} ({yoy_change_pct:.2f}%)"
ticker_msg = f"<b>{ticker}</b> {name} Parcl Exchange"
volume_msg = f'Sales Volume (Monthly) {last_sales_value:,} (Apr)'
pricefeed_msg = f'{ticker} (Daily) ${last_pricefeed_value} ({last_pricefeed_date})'

build_combined_line_chart(
    data_main=mf,
    data_secondary=df,
    # data_third=supply,
    title_main='Price per Square Foot',
    value_name_main='price_feed',
    value_name_secondary='sales',
    # value_name_third='new_listings_for_sale',
    ticker_msg=ticker_msg,  
    msg=msg,
    volume_msg=volume_msg,
    pricefeed_msg=pricefeed_msg,
    last_pf_date=last_pf_date,
    save_path=f'../graphics/pf_vs_volume_charts/{name}_pf_vs_volume.png'
)


In [None]:
for pid in markets['parcl_id']:

    mf = feeds[feeds['parcl_id'] == pid]
    df = demand[demand['parcl_id'] == pid]
    supply = demand[demand['parcl_id'] == pid]

    # Ensure the DataFrame is sorted by date and calculate daily return
    mf = mf.sort_values('date')
    last_pricefeed_value = mf['price_feed'].iloc[-1]
    last_pricefeed_date_str = mf['date'].iloc[-1]
    last_pricefeed_date = pd.to_datetime(last_pricefeed_date_str).strftime('%-d %B')
    last_pf_date = pd.to_datetime(last_pricefeed_date_str).strftime('%-d-%B-%Y')
    mf['date'] = pd.to_datetime(mf['date'])
    mf['daily_return'] = mf['price_feed'].pct_change()
    mf = mf.dropna()  # Remove rows with NaN values resulting from pct_change calculation

    mf['month'] = mf['date'].dt.to_period('M')
    monthly_return = mf.groupby('month')['price_feed'].agg(lambda x: x.iloc[-1] / x.iloc[0] - 1).reset_index(name='volColor')
    df = df.sort_values('date')
    supply = supply.sort_values('date')
    last_sales_value = df['sales'].iloc[-1]
    last_sales_month = df['date'].iloc[-1]
    last_supply_value = supply['new_listings_for_sale'].iloc[-1]
    last_supply_month = supply['date'].iloc[-1]
    df['date'] = pd.to_datetime(df['date'])
    df['month'] = df['date'].dt.to_period('M')
    supply['date'] = pd.to_datetime(supply['date'])
    supply['month'] = supply['date'].dt.to_period('M')
    df = df.merge(monthly_return, on='month')
    supply = supply.merge(monthly_return, on='month')

    # Add volColor column directly to data_secondary
    df['volColor'] = np.where(df['volColor'] >= 0, 'green', 'red')  # Modify this condition as per your actual data logic
    supply['volColor'] = np.where(supply['volColor'] >= 0, 'green', 'red')  # Modify this condition as per your actual data logic
    ticker = markets[markets['parcl_id'] == pid]['ticker'].values[0]
    name = markets[markets['parcl_id'] == pid]['clean_name'].values[0]

    high_52w, low_52w, last_value, yoy_change_delta, yoy_change_pct, yoy_sign = calculate_stats(mf)
    msg = f"<b>52w High</b> ${high_52w:.2f} <b>52w Low</b> ${low_52w:.2f} <b>Last</b> ${last_value:.2f} <b>YoY Change</b> {round(yoy_change_delta, 2)} ({yoy_change_pct*100:.02f}%)"
    # msg = f"52w High ${high_52w:.2f} 52w Low ${low_52w:.2f} Last ${last_value:.2f} YoY Change {round(yoy_change_delta, 2)} ({yoy_change_pct:.2f}%)"
    ticker_msg = f"<b>{ticker}</b> {name} Parcl Exchange"
    volume_msg = f'Sales Volume (Monthly) {last_sales_value:,} (Apr)'
    pricefeed_msg = f'{ticker} (Daily) ${last_pricefeed_value} ({last_pricefeed_date})'

    build_combined_line_chart(
        data_main=mf,
        data_secondary=df,
        # data_third=supply,
        title_main='Price per Square Foot',
        value_name_main='price_feed',
        value_name_secondary='sales',
        # value_name_third='new_listings_for_sale',
        ticker_msg=ticker_msg,  
        msg=msg,
        volume_msg=volume_msg,
        pricefeed_msg=pricefeed_msg,
        last_pf_date=last_pf_date,
        save_path=f'../graphics/pf_vs_volume_charts/{name}_pf_vs_volume.png'
    )
