In [None]:
%pip install --upgrade parcllabs==1.10.0 nbformat

In [2]:
from typing import Union
import os
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from parcllabs import ParclLabsClient
from parcllabs.beta.charting.styling import SIZE_CONFIG
from parcllabs.beta.ts_stats import TimeSeriesAnalysis
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from parcllabs.beta.charting.utils import create_labs_logo_dict
from parcllabs.beta.charting.utils import (
    create_labs_logo_dict,
    save_figure,
    )
from parcllabs.beta.charting.styling import default_style_config as style_config


# Create a ParclLabsClient instance
client = ParclLabsClient(
    api_key=os.environ.get('PARCL_LABS_API_KEY', "<your Parcl Labs API key if not set as environment variable>"), 
    limit=1000, 
    turbo_mode=True # set turbo mode to True
)

In [3]:
# define usa market
usa_market_id = 5826765
ROOT_DIR = "../../../outputs/price_analysis_2024" # change this to the directory where you want to save the outputs

In [4]:
# get markets with pricefeed available
metros = client.search.markets.retrieve(
    sort_by='PRICEFEED_MARKET',  # Sort by total population
    sort_order='DESC',           # In descending order
    location_type='CBSA',        # Location type set to Core Based Statistical Area (CBSA)
    limit=100                    # Limit results to top 100 metros
)

In [5]:
# get the parcl_id of the metros with pricefeed available
metros_pricefeed = (metros.copy(deep=True).query('pricefeed_market==1'))
metros_pricefeed_id_list = metros_pricefeed['parcl_id'].unique().tolist()

In [6]:
# get the name of the metros
market_name = (metros_pricefeed
               .copy(deep=True)
               .loc[:,['parcl_id','name']]
               .assign(state=lambda df: df['name'].apply(lambda x: x.split(',')[-1].strip().upper().split('-')[0]))
               .assign(Metro=lambda df: df.apply(
            lambda x: f"{x['name'].split('-')[0].split(',')[0].strip()}, {x['state']}", axis=1
               ))
)

In [None]:
# query pricefeed data for the metros with pricefeed available
START_DATE = '2020-03-01'
END_DATE = '2024-12-02'
# get sales pricefeed
sales_price_feeds = client.price_feed.price_feed.retrieve(
     parcl_ids=metros_pricefeed_id_list+[usa_market_id],
     start_date=START_DATE,
     end_date=END_DATE,
     limit=1000,  # expand the limit to 1000, these are daily series
     auto_paginate=True, # auto paginate to get all the data - WARNING: ~6k credits can be used in one parcl price feed. Change the START_DATE to a more recent date to reduce the number of credits used
)

In [8]:
# We will iterate over the parcl_ids to get the time series analysis to het the statistics of interest
all_rows = []
year_to_date_rows = []
for pid in sales_price_feeds['parcl_id'].unique().tolist():
    prices_skew_test = sales_price_feeds.copy(deep=True).loc[sales_price_feeds['parcl_id']==pid]
    price_ts_analysis = TimeSeriesAnalysis(prices_skew_test, 'date', 'price_feed', freq='M')
    price_rate_of_change_stats = price_ts_analysis.calculate_changes(change_since_date='3/1/2020')
    year_to_date_stats = price_ts_analysis.calculate_changes(change_since_date='1/1/2024')
    row = pd.json_normalize(price_rate_of_change_stats)
    ytd_row = pd.json_normalize(year_to_date_stats)
    row['parcl_id'] = pid
    ytd_row['parcl_id'] = pid
    all_rows.append(row)
    year_to_date_rows.append(ytd_row)

In [9]:
# concatenate all the rows and prepare the data for the table.
# start with the 12 month change
data_for_table_change = pd.concat(all_rows)
data_for_table_change = (data_for_table_change.copy(deep=True)
                          .loc[:,['1_month.percent_change',
                                  '12_month.percent_change',
                                   'change_since_date.percent_change',
                                   'peak_to_current.percent_change',
                                   'parcl_id']]
                        .rename(columns={'1_month.percent_change':'MoM',
                                         '12_month.percent_change':'YoY',
                                         'peak_to_current.percent_change': 'Appreciation_since_peak',
                                         'change_since_date.percent_change':'Since March 2020'}
                                         )
                        
                        )


In [10]:
# Repeat with the year to date data
year_to_date_data = pd.concat(year_to_date_rows)
year_to_date_data = (year_to_date_data.copy(deep=True)
                     .loc[:,['change_since_date.percent_change',
                            'parcl_id']]
                     .rename(columns={'change_since_date.percent_change':'YTD'}
                            )
                     )
# merge and prepare the data for the table
final_table_data = (
    data_for_table_change.copy(deep=True)
    .merge(year_to_date_data.copy(deep=True), how='inner', on='parcl_id')
    .merge(market_name.copy(deep=True), how='left', on='parcl_id')
    .loc[:,['parcl_id', 'Metro', 'MoM', 'YoY', 'Appreciation_since_peak', 'Since March 2020', 'YTD']]
    .assign(
            Metro=lambda x: np.where(x['Metro'].isna(), 'USA',
                   np.where(x['Metro'] == 'Louisville/Jefferson County, KY', 'Louisville, KY', x['Metro']))
        )
    .sort_values(by='YTD', ascending=False)
)
# save the table to a csv
final_table_data.to_csv(os.path.join(ROOT_DIR, 'final_table_data_pf.csv'), index=False)


In [11]:
# sort the table by YTD
final_table_data = final_table_data.sort_values(by='YTD', ascending=False)

In [None]:
# Define the updated colors
def get_cell_color(val, column_type='default'):
    if pd.isna(val):
        return 'white'

    # Updated color palettes
    colors = {
        'soft_red': '#c72a2c',
        'light_red': '#F5B7B1',
        'neutral': '#FDF2E9',
        'light_blue': '#AED6F1',
        'soft_blue': '#85C1E9',
        'deep_blue': '#3498DB',
    }
    
    march_colors = {
        'light_blue': '#D6EAF8',
        'blue': '#AED6F1',
        'deep_blue': '#5DADE2',
        'dark_blue': '#3498DB',
        'darkest_blue': '#0852c9',
    }
    
    if column_type == 'since_march':
        if val < 0.3:
            return march_colors['light_blue']
        elif val < 0.45:
            return march_colors['blue']
        elif val < 0.55:
            return march_colors['deep_blue']
        elif val < 0.6:
            return march_colors['dark_blue']
        else:
            return march_colors['darkest_blue']
    
    if column_type in ['MoM', 'YTD', 'YoY']:
        if val < -0.02:
            return colors['soft_red']
        elif val < -0.01:
            return colors['light_red']
        elif val < 0.01:
            return colors['neutral']
        elif val < 0.05:
            return colors['light_blue']
        else:
            return colors['soft_blue']
    
    elif column_type == 'appreciation':
        if val < -0.15:
            return colors['soft_red']
        elif val < -0.10:
            return colors['light_red']
        elif val < -0.05:
            return colors['neutral']
        elif val < -0.00:
            return colors['neutral']
        else:
            return colors['soft_blue']
    return 'white'

# Apply the updated font settings
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.size'] = 12

# Setup the figure
fig = plt.figure(figsize=(14, len(final_table_data) * 0.5))
ax = plt.subplot()

# Set the plot limits
ncols = 6  # number of columns
nrows = final_table_data.shape[0]
ax.set_xlim(0, ncols + 0.5)
ax.set_ylim(0, nrows + 1)

# Define positions for each column
positions = [0.25, 1.75, 2.75, 3.75, 4.75, 5.75]  # Moved columns right
column_widths = [1.3, 1.0, 1.0, 1.0, 1.0, 1.0] 

# Add background colors and data
columns = ['Metro', 'MoM', 'YTD', 'YoY', 'Appreciation_since_peak', 'Since March 2020']
for i in range(nrows):
    for j, column in enumerate(columns):
        # Add background rectangle
        if j > 0:  # Skip Metro column
            value = final_table_data[column].iloc[i]
            column_type = {
                'MoM': 'MoM',
                'YTD': 'YTD',
                'YoY': 'YoY',
                'Appreciation_since_peak': 'appreciation',
                'Since March 2020': 'since_march'
            }.get(column, 'default')
            
            color = get_cell_color(value, column_type)
            rect = patches.Rectangle(
                (positions[j] - 0.45, i),
                column_widths[j], 1.0,
                facecolor=color, 
                edgecolor='none',
                alpha=0.75,
                zorder=1
            )
            ax.add_patch(rect)
        
        # Add text
        if j == 0:  # Metro column
            text_label = f"{final_table_data[column].iloc[i]}"
            text_color = 'black'
            ha = 'left'
            fontsize = 14
        else:  # Numeric columns
            value = final_table_data[column].iloc[i]
            text_label = f"{value:.1%}"
            text_color = 'white' if color in ['#3498DB', '#21618C', '#F1948A','#4335A7','#FF4C4C','#0852c9','#c72a2c'] else 'black'
            ha = 'center'
            fontsize = 12
        
        ax.annotate(
            xy=(positions[j], i + .5),
            text=text_label,
            ha=ha,
            va='center',
            color=text_color,
            weight='medium',
            fontsize=fontsize,
            zorder=2
        )

# Add column headers
column_names = ['Metro\nMarket', 'MoM', 'YTD', 'YoY', 
                'Change\nfrom\npeak', 'Since\nMarch\n2020']
for index, col_name in enumerate(column_names):
    ha = 'left' if index == 0 else 'center'
    ax.annotate(
        xy=(positions[index], nrows + .25),
        text=col_name,
        ha=ha,
        va='bottom',
        weight='bold',
        fontsize=14,
        linespacing=1.1
    )

# Add dividing lines
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [nrows, nrows], 
        lw=1.5, color='black', marker='', zorder=4)
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [0, 0], 
        lw=1.5, color='black', marker='', zorder=4)

# Add dotted lines between rows
for x in range(1, nrows):
    ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [x, x], 
            lw=0.5, color='#404040', ls=':', zorder=3, marker='')

# Turn off axes
ax.set_axis_off()

# Add title
plt.title("Price Evolution in America's Metro Areas (Pricefeed Available)", 
          pad=20, y=1.02, weight='bold', fontsize=16)

# Add the logo in the lower right corner
path_logo = '../../../outputs/powered-by-parcllabs-api-logo-blue.png'
logo = plt.imread(path_logo)

# Create the image box with reduced zoom
imagebox = OffsetImage(logo, zoom=0.05, alpha=0.9)  # Reduced zoom for smaller logo size

# Position the logo using AnnotationBbox
logo_position = AnnotationBbox(
    imagebox,
    (0.98, -0.02),  # Coordinates near the lower-right corner within visible area
    frameon=False,
    xycoords='axes fraction',  # Use 'axes fraction' to place relative to axes
    box_alignment=(1, 0)  # Align to the lower right of the annotation box
)
ax.add_artist(logo_position)

# Turn off axes for a cleaner look
ax.set_axis_off()

# Adjust layout
plt.tight_layout()

# Save figure
plt.savefig(
    os.path.join(ROOT_DIR, 'table_evolution_prices_updated.png'),
    dpi=250,
    bbox_inches='tight',
    transparent=False,
    facecolor='white'
)

plt.show()


In [11]:
# prepare the line charts for all the pricefeeds
sales_price_feeds_wmane = (
    sales_price_feeds.copy(deep=True)
    .merge(market_name.copy(deep=True), how='left', on='parcl_id')
    .assign(
        Metro=lambda x: np.where(x['Metro'].isna(), 'USA',
                                 np.where(x['Metro'] == 'Louisville/Jefferson County, KY', 'Louisville, KY', x['Metro']))
    )
)


In [None]:
# create a list and iterate
list_parcls = sales_price_feeds_wmane['parcl_id'].unique() 
for parcl in list_parcls:
    name = sales_price_feeds_wmane.copy(deep=True).query('parcl_id==@parcl')['Metro'].values[0]
    chart_data = sales_price_feeds_wmane.copy(deep=True).query('parcl_id==@parcl')
    chart_data = (chart_data
                  .loc[:,['date', 'price_feed', 'Metro']]
                  )
    # Get max date for chart
    max_date_for_chart = chart_data['date'].max().date()

    CHART_WIDTH = 1600
    CHART_HEIGHT = 800

    # Create the line chart using Plotly Express
    fig = px.line(
        chart_data,
        x='date',
        y='price_feed',
        color='Metro',
        labels={'pricefeed': f'Parcl Labs Price Feed (PLPF) for {name}'},
        title=f'Parcl Labs Price Feed for {name}'
    )

    # Update traces to apply uniform styles and hide legend for individual traces
    for trace in fig.data:
        trace.update(
            line=dict(
                color='lightblue',  # Apply default color
                width=2  # Apply default line width
            ),
            opacity=0.8,  # Apply default opacity
            mode='lines',  # Remove text annotations
            showlegend=False  # Disable legend for traces
        )
    fig.add_layout_image(
        create_labs_logo_dict()
    )
    # Update layout
    fig.update_layout(
        width=CHART_WIDTH,
        height=CHART_HEIGHT,
        xaxis=dict(
        title='',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
        yaxis=dict(
            title='Price Per Square Foot (USD)',
            showgrid=style_config['showgrid'],
            gridwidth=style_config['gridwidth'],
            gridcolor=style_config['grid_color'],
            tickfont=style_config['axis_font'],
            zeroline=False,
            tickprefix='$',  # Prefix values with a dollar sign
            tickformat=',.0f',  # Use thousands separator
            linecolor=style_config['line_color_axis'],
            linewidth=style_config['linewidth'],
                titlefont=style_config['title_font_axis']
            ),
            plot_bgcolor=style_config['background_color'],
            paper_bgcolor=style_config['background_color'],
            font=dict(color=style_config['font_color']),
            showlegend=True,  # Enable legend
            legend=dict(
            orientation='v',  # Vertical legend
            x=0.8,  # Place legend slightly outside the chart
            y=0.8,  # Align to the bottom
            xanchor='left',
            yanchor='bottom',
            font=dict(size=12),
        ),
            margin=dict(l=40, r=40, t=80, b=40),
            title={
                'y': 0.98,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top',
                'font': dict(size=24)
            },
            legend_title=None  # Remove legend title
        )

    save_figure(fig, os.path.join(ROOT_DIR, f'charts_pricefeed/{name}_price_feed_line_chart.png'), 
                    width=CHART_WIDTH, height=CHART_HEIGHT)


In [None]:
# create chart for the usa market, need to filter just the usa market for 2023 and 2024
usa_data = (sales_price_feeds_wmane
            .copy(deep=True)
            .query('Metro=="USA"')
            .assign(
                year = lambda df: df['date'].dt.year,
                month = lambda df: df['date'].dt.to_period('M')  # Convert to monthly period
            )
            .groupby('month')['price_feed']
            .median()
            .reset_index()
            .assign(
                price_feed_pct_change=lambda df: df['price_feed'].pct_change(),
                year = lambda df: df['month'].dt.year
            )
            .query('year>2022')
            .assign(
                is_negative = lambda df: df['price_feed_pct_change'] < 0,
                negative_months_in_year = lambda df: df.groupby('year')['is_negative'].transform('sum')
            )
)


In [26]:
# modify the data to use in the chart
usa_data_2023_2024 = (
    sales_price_feeds_wmane
    .copy(deep=True)
    .query('Metro=="USA"')
    .assign(
        year = lambda df: df['date'].dt.year
    )
    .query('year>2022')
    .sort_values(by='date', ascending=True)
    .assign(
        # Get January 1st value for each year
        jan_1_value = lambda df: df.groupby('year')['price_feed'].transform('first'),
        # Calculate percentage change from January 1st
        pct_change_from_jan = lambda df: (df['price_feed'] / df['jan_1_value'] - 1) * 100
    )
)


In [None]:
# Get 2023 max
max_date_2023 = (usa_data_2023_2024
    .query('year==2023')
    .loc[lambda df: df['price_feed'] == df['price_feed'].max(), 'date']
    .iloc[0]
)
max_price_2023 = usa_data_2023_2024.query('year==2023')['price_feed'].max()

# Get 2024 max
max_date_2024 = (usa_data_2023_2024
    .query('year==2024')
    .loc[lambda df: df['price_feed'] == df['price_feed'].max(), 'date']
    .iloc[0]
)
max_price_2024 = usa_data_2023_2024.query('year==2024')['price_feed'].max()

print(f"Date of maximum price in 2023: {max_date_2023.strftime('%Y-%m-%d')} at ${max_price_2023:.2f}")
print(f"Date of maximum price in 2024: {max_date_2024.strftime('%Y-%m-%d')} at ${max_price_2024:.2f}")

In [None]:
# now do a plot of 2023 and 2024
usa_data_2023_2024 = (
    sales_price_feeds_wmane
    .copy(deep=True)
    .query('Metro=="USA"')
    .assign(
        year = lambda df: df['date'].dt.year
    )
    .query('year>2022')
    .sort_values(by='date', ascending=True)
    .assign(
        # Get January 1st value for each year
        jan_1_value = lambda df: df.groupby('year')['price_feed'].transform('first'),
        # Calculate percentage change from January 1st
        pct_change_from_jan = lambda df: (df['price_feed'] / df['jan_1_value'] - 1) * 100
    )
)

# Create plot_date and sort by it within each year
usa_data_2023_2024['plot_date'] = usa_data_2023_2024['date'].dt.strftime('%m-%d')
usa_data_2023_2024 = usa_data_2023_2024.sort_values(['year', 'plot_date'])

# Create month labels for x-axis
month_labels = {
    '01-01': 'Jan',
    '02-01': 'Feb',
    '03-01': 'Mar',
    '04-01': 'Apr',
    '05-01': 'May',
    '06-01': 'Jun',
    '07-01': 'Jul',
    '08-01': 'Aug',
    '09-01': 'Sep',
    '10-01': 'Oct',
    '11-01': 'Nov',
    '12-01': 'Dec'
}

# Create the line chart using Plotly Express
fig = px.line(
    usa_data_2023_2024,
    x='plot_date',
    y='pct_change_from_jan',
    color='year',
    labels={'pct_change_from_jan': 'Change since January 1st (%)', 'plot_date': 'Date'},
    title='USA Price Feed Changes YTD: 2023 vs 2024'
)

# Update traces to apply uniform styles
for trace in fig.data:
    trace.update(
        line=dict(width=2),
        opacity=0.8,
        mode='lines'
    )

# Get all unique dates and create tick positions for first of each month
all_dates = sorted(usa_data_2023_2024['plot_date'].unique())
month_ticks = [date for date in all_dates if date.endswith('-01')]

# Update layout
fig.update_layout(
    width=CHART_WIDTH,
    height=CHART_HEIGHT,
    xaxis=dict(
        title='',
        showgrid=False,
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis'],
        tickangle=0,
        type='category',
        categoryorder='array',
        categoryarray=all_dates,
        tickmode='array',
        ticktext=[month_labels.get(date, '') for date in all_dates],
        tickvals=all_dates
    ),
    yaxis=dict(
        title='Change since January 1st (%)',
        showgrid=style_config['showgrid'],
        gridwidth=style_config['gridwidth'],
        gridcolor=style_config['grid_color'],
        griddash='dot',
        tickfont=style_config['axis_font'],
        zeroline=True,
        tickprefix='',
        ticksuffix='%',
        tickformat='.1f',
        linecolor=style_config['line_color_axis'],
        linewidth=style_config['linewidth'],
        titlefont=style_config['title_font_axis']
    ),
    plot_bgcolor=style_config['background_color'],
    paper_bgcolor=style_config['background_color'],
    font=dict(color=style_config['font_color']),
    showlegend=True,
    legend=dict(
        orientation='h',  # Changed to horizontal
        x=0.95,          # Moved to right
        y=0.997,          # Moved to top
        xanchor='right', # Align right edge of legend
        yanchor='top',   # Align top edge of legend
        font=dict(size=12),
    ),
    margin=dict(l=40, r=40, t=80, b=40),
    title={
        'y': 0.98,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=24)
    },
    legend_title=None
)

# Add vertical lines only for first of each month with dotted style
for month_tick in month_ticks:
    fig.add_vline(
        x=all_dates.index(month_tick),
        line_width=style_config['gridwidth'],
        line_color=style_config['grid_color'],
        line_dash="dot"
    )

save_figure(fig, os.path.join(ROOT_DIR, f'charts_pricefeed/usa_price_feed_comparison_2023_2024_pct_change.png'), 
           width=CHART_WIDTH, height=CHART_HEIGHT)
fig.show()