# Welcome to the Lab 🥼🧪

Price feed vs. rental analysis. 

In [None]:
import os
import sys
import json
import requests
import subprocess
from datetime import datetime

import parcllabs
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from parcllabs import ParclLabsClient

api_key = os.getenv('PARCL_LABS_API_KEY')
print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
# Initialize the Parcl Labs client
client = ParclLabsClient(api_key)

In [None]:
# set nb config
pf_options = {
    'rental': 'rental_price_feed',
    'pricefeed': 'price_feed'
}

PF_TYPE = pf_options['rental']

In [None]:
# lets get all US markets currently available to trade on the Parcl Exchange
# Now lets say you want all price feed markets that are on the parcl exchange
market_df = client.search_markets.retrieve(
    sort_by='PARCL_EXCHANGE_MARKET',
    sort_order='DESC',
    as_dataframe=True,
    params={'limit': 14},  # expand the default limit to 14, as of this writing, 14 markets are available
)

parcl_ids = market_df['parcl_id'].tolist()
market_df.head()

In [None]:
# lets retrieve data back to 2011 for these price feeds
START_DATE = '2020-01-01'
feeds = client.price_feed.retrieve_many(
    parcl_ids=parcl_ids,
    start_date=START_DATE,
    as_dataframe=True,
    params={'limit': 1000},  # expand the limit to 1000, these are daily series)
    auto_paginate=True
)
    
rentals = client.rental_price_feed.retrieve_many(
    parcl_ids=parcl_ids,
    start_date=START_DATE,
    as_dataframe=True,
    params={'limit': 1000},  # expand the limit to 1000, these are daily series
    auto_paginate=True, # auto paginate to get all the data - WARNING: ~6k credits can be used in one parcl price feed. Change the START_DATE to a more recent date to reduce the number of credits used
)

In [None]:
rental_ids = [
    5826765,
    5387853,
    5306725
]

feeds = feeds.loc[feeds['parcl_id'].isin(rental_ids)]
rentals = rentals.loc[rentals['parcl_id'].isin(rental_ids)]

In [None]:
feeds = feeds.merge(market_df[['parcl_id', 'name']], on='parcl_id', how='left')
rentals = rentals.merge(market_df[['parcl_id', 'name']], on=['parcl_id'], how='inner')


In [None]:
rentals = rentals.rename(columns={'name': 'rental_name', 'parcl_id': 'rental_parcl_id'})
# need to separate naming conventions for rentals and price feeds
rentals['rental_name'] = rentals['rental_name'] + '(R)'

In [None]:
def build_corr_matrix(
        feeds,
        rentals,
        title: str='Correlation Matrix Heatmap (Rental vs Price Feed since `20)',
        output_title: str='Median Correlation Coefficient (`20)',
        rr: bool=False
):
    
    rental_pivot = rentals.pivot(index='date', columns='rental_name', values='rental_price_feed')
    if not rr:
        price_pivot = feeds.pivot(index='date', columns='name', values='price_feed')
        # Combine the two pivoted DataFrames
        combined_df = pd.concat([price_pivot, rental_pivot], axis=1)

        # Compute the correlation matrix
        correlation_matrix = combined_df.corr()

        rental_indices = [idx for idx in correlation_matrix.index if '(R)' not in idx]
        col_index = [idx for idx in correlation_matrix.columns if '(R)' in idx]
        correlation_matrix_filtered = correlation_matrix.loc[rental_indices, col_index]
    else:
        correlation_matrix_filtered = rental_pivot.corr()

    # Plot the heatmap
    plt.figure(figsize=(20, 15))
    sns.heatmap(correlation_matrix_filtered, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title(title)
    plt.show()

    return correlation_matrix_filtered.median().reset_index(name=output_title)

In [None]:
# price feed vs. rental correlation matrix
corr1 = build_corr_matrix(feeds, rentals)
std = rentals.groupby('rental_name')['rental_price_feed'].std().reset_index(name='std_since_20')

In [None]:
# check rental vs rental correlation
rr_corr1 = build_corr_matrix(
    None, 
    rentals, 
    title='Correlation Matrix Heatmap (Rental vs Rental since `20)', 
    output_title='Median Correlation Coefficient (Rental since `20)', 
    rr=True
)

In [None]:
rr_corr1.head()

In [None]:
# filter to last year
feeds_yr = feeds[feeds['date'] > '2023-05-01']
rentals_yr = rentals[rentals['date'] > '2023-05-01']
std_yr = rentals_yr.groupby('rental_name')['rental_price_feed'].std().reset_index(name='std_since_23')
corr2 = build_corr_matrix(feeds_yr, rentals_yr, title='Correlation Matrix Heatmap (Rental vs Price Feed since `23)', output_title='Median Correlation Coefficient (`23)')

In [None]:
rr_corr2 = build_corr_matrix(
    None, 
    rentals_yr, 
    title='Correlation Matrix Heatmap (Rental vs Rental since `23)', 
    output_title='Median Correlation Coefficient (Rental since `23)', 
    rr=True
)

In [None]:
out = corr2.merge(corr1, on='index', how='inner')
out = out.rename(columns={'index': 'rental_name'})
out = out.merge(std, on='rental_name', how='inner')
out = out.merge(rr_corr1, on='rental_name', how='inner')
out = out.merge(rr_corr2, on='rental_name', how='inner')
out = out.merge(std_yr, on='rental_name', how='inner')
out['Diff in Median Correlation Coefficients'] = out['Median Correlation Coefficient (`20)'] - out['Median Correlation Coefficient (`23)']
out = out.sort_values('Diff in Median Correlation Coefficients', ascending=True)
out.head()

In [None]:
out = out.rename(columns={
    'rental_name': 'Rental Market',
    'std_since_20': 'Standard Deviation (Since `20)',
    'std_since_23': 'Standard Deviation (Since `23)',
    'diff': 'Diff in Median Correlation Coefficients'
})




out[[
    'Rental Market',
    'Median Correlation Coefficient (`20)',
    'Median Correlation Coefficient (`23)',
    'Diff in Median Correlation Coefficients',
    'Median Correlation Coefficient (Rental since `20)',
    'Median Correlation Coefficient (Rental since `23)',
    'Standard Deviation (Since `20)',
    'Standard Deviation (Since `23)',
]]

In [None]:
df = pd.merge(feeds, rentals.rename(columns={'rental_parcl_id': 'parcl_id'})[['date', 'parcl_id', 'rental_price_feed']], on=['date', 'parcl_id'], how='inner')

In [None]:
tmp = df.copy(deep=True)
tmp['rental_price_feed'] = tmp['rental_price_feed'].shift(-361)
tmp.head()

In [None]:
tmp.tail()

In [None]:
df = df.sort_values('date')
pf = df.loc[df['name'] == 'United States Of America']
r = df.loc[df['name']=='Boston City']

In [None]:

labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}

# Set charting constants
labs_logo_dict = dict(
    source=labs_logo_lookup['white'],
    xref="paper",
    yref="paper",
    x=0.5,  # Centering the logo below the title
    y=1.02,  # Adjust this value to position the logo just below the title
    sizex=0.15, 
    sizey=0.15,
    xanchor="center",
    yanchor="bottom"
)

In [None]:
# Normalize time series
def normalize_time_series(time_series):
    """Normalize the time series to have a mean of 0."""
    mean = np.mean(time_series)
    normalized_series = time_series - mean
    return normalized_series

# Calculate cross-correlation
def calculate_cross_correlation(ts1, ts2, max_lag):
    """Calculate cross-correlation between two time series for a range of lags."""
    lags = np.arange(-max_lag, max_lag + 1)
    cross_correlation = []
    for lag in lags:
        if lag >= 0:
            ts1_shifted = ts1[:-lag] if lag != 0 else ts1
            ts2_shifted = ts2[lag:]
        else:
            ts1_shifted = ts1[-lag:]
            ts2_shifted = ts2[:lag] if lag != 0 else ts2
        
        if len(ts1_shifted) > 1 and len(ts2_shifted) > 1:
            corr = np.corrcoef(ts1_shifted, ts2_shifted)[0, 1]
        else:
            corr = 0
        cross_correlation.append(corr)
    return lags, cross_correlation

In [None]:
def cross_corr_plot(
        lags, cross_corr, max_corr_lag, title: str='Cross-Correlation Plot', max_cross_corr: float=0.5
):
    # Plot the cross-correlation using Plotly
    fig = go.Figure()

    # Add cross-correlation data
    fig.add_trace(go.Scatter(
        x=lags,
        y=cross_corr,
        mode='lines+markers',
        marker=dict(size=6, color='#FFFFFF'),
        line=dict(width=3, color='#FFFFFF'),
        name='Cross-Correlation'
    ))

    # Add vertical line for max correlation lag
    fig.add_vline(x=max_corr_lag, line=dict(color='red', dash='dash'), annotation_text=f'Max Correlation Lag: {max_corr_lag} ({round(max_cross_corr, 2)})', annotation_position='top right')

    # Add layout details
    fig.update_layout(
        height=800,
        width=1600,
        title={
            'text': title,
            'y': 0.97,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        title_font=dict(size=28, color='#FFFFFF'),
        plot_bgcolor='#1e1e1e',
        paper_bgcolor='#1e1e1e',
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title='Lag (Days)',
            showgrid=False,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis=dict(
            title='Cross-Correlation',
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.2)',
            tickfont=dict(size=14),
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        hovermode='x unified',
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        ),
        legend=dict(
            x=0.95,
            y=0.01,
            xanchor='right',
            yanchor='bottom',
            font=dict(size=14, color='#FFFFFF'),
            bgcolor='rgba(0, 0, 0, 0.5)'
        ),
        images=[labs_logo_dict]
    )

    # Show the figure
    fig.show()

In [None]:

def build_dual_axis_chart(
        market_name: str, 
        data: pd.DataFrame,
        price_series: str = 'price_feed',
        rental_series: str = 'rental_price_feed',
        lag_value: int = None
    ):

    series_format = {
        'price_feed': 'Price per Square Foot ($)',
        'rental_price_feed': 'Rental Price per Square Foot ($)',
    }

    titles_format = {
        'price_feed': "Sales",
        'rental_price_feed': 'Rentals'
    }

    if lag_value is not None:
        titles_format['rental_price_feed'] = f'Rentals ({lag_value} Day Shift)'

    aspect_ratios ={
        '16:9': {
            'HEIGHT': 1080,
            'WIDTH': 1920
        },
        '4:3': {
            'HEIGHT': 768,
            'WIDTH': 1024
        }
    }


    HEIGHT = aspect_ratios['16:9']['HEIGHT']
    WIDTH = aspect_ratios['16:9']['WIDTH']
    
    fig = go.Figure()

    # Add primary y-axis trace for price series
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data[price_series],
        mode='lines+markers',
        line=dict(width=3, color='#FFFFFF'),  # White color for price series
        marker=dict(size=6, color='#FFFFFF', symbol='circle'),
        name=titles_format[price_series],
    ))

    # Add secondary y-axis trace for rental price series
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data[rental_series],
        mode='lines+markers',
        line=dict(width=3, color='#FF4500'),  # Red color for rental series
        marker=dict(size=6, color='#FF4500', symbol='square'),
        name=titles_format[rental_series],
        yaxis='y2'
    ))
    
    fig.add_layout_image(labs_logo_dict)
    
    fig.update_layout(
        margin=dict(l=100, r=100, t=150, b=100),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': f'{market_name}: {titles_format[price_series]} vs. {titles_format[rental_series]}',
            'y': 0.94,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#1e1e1e',
        paper_bgcolor='#1e1e1e',
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=False,
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis=dict(
            title_text=series_format[price_series],
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.2)',
            tickfont=dict(size=14),
            tickprefix='$',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis2=dict(
            title_text=series_format[rental_series],
            showgrid=False,
            tickfont=dict(size=14),
            tickprefix='$',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1,
            overlaying='y',
            side='right',
            tickformat=".2f"  # Round to three decimals
        ),
        hovermode='x unified',
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        ),
        legend=dict(
            x=0.99,  # Position legend in the bottom right corner
            y=0.01,
            xanchor='right',
            yanchor='bottom',
            font=dict(size=14, color='#FFFFFF'),
            bgcolor='rgba(0, 0, 0, 0.5)'
        )
    )

    root = f'../../graphics/{price_series}'
    timestamp = datetime.now().strftime('%Y-%m-%d')
    path = os.path.join(root, timestamp)
    if not os.path.exists(path):
        os.makedirs(path)

    fig.write_image(os.path.join(path, f'{market_name}_{price_series}.png'), width=WIDTH, height=HEIGHT)
    fig.show()


In [None]:
# lets analyze the series more closely
name = 'United States Of America'
title_name = 'USA'
s = df.loc[df['name'] == name, ['date', 'price_feed', 'rental_price_feed']].sort_values('date')
# s = pd.merge(pf[['date', 'price_feed']], r[['date', 'rental_price_feed']], on='date', how='inner')
ts_1 = s['price_feed']
ts_2 = s['rental_price_feed']

# Example data
time_series_1 = normalize_time_series(ts_1)
time_series_2 = normalize_time_series(ts_2)

# Set the maximum lag
max_lag = 720

# Calculate cross-correlation
lags, cross_corr = calculate_cross_correlation(time_series_1, time_series_2, max_lag)

# Find the lag with the maximum correlation
max_corr_lag = lags[np.argmax(cross_corr)]
max_corr = np.max(cross_corr)

# Interpret the result
if max_corr_lag > 0:
    title= f"{title_name} ({round(max_corr, 2)} Cross-Correlation): Sales lead Rentals by {max_corr_lag} days."
if max_corr_lag < 0:
    title = f"{title_name} ({round(max_corr, 2)} Cross-Correlation): Rentals lead Sales by {-max_corr_lag} days."

cross_corr_plot(lags, cross_corr, max_corr_lag, title=title, max_cross_corr=max_corr)

In [None]:
build_dual_axis_chart(
    title_name,
    s,
    lag_value=None
)

In [None]:
tmp = s.copy(deep=True)
tmp['rental_price_feed'] = tmp['rental_price_feed'].shift(-max_corr_lag)
tmp = tmp.dropna()

build_dual_axis_chart(
    title_name, 
    tmp,
    lag_value=-max_corr_lag
)