# Welcome to the Lab 🥼🧪
## How do I retrieve and download all cash trends?

**Note** This notebook will work with any of the 70k+ markets in the Parcl Labs API

As a reminder, you can get your Parcl Labs API key [here](https://dashboard.parcllabs.com/signup) to follow along.

To run this immediately, you can use Google Colab. Remember, you must set your `PARCL_LABS_API_KEY` as a secret. See this [guide](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) for more information.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ParclLabs/parcllabs-examples/blob/main/python/inspiration/all_cash_analysis.ipynb)

In [1]:
import os
import sys
import json
import subprocess
from datetime import datetime
from urllib.request import urlopen

# Collab setup from one click above
if "google.colab" in sys.modules:
    from google.colab import userdata
    %pip install parcllabs plotly kaleido
    api_key = userdata.get('PARCL_LABS_API_KEY')
else:
    api_key = os.getenv('PARCL_LABS_API_KEY')

In [2]:
import parcllabs
import pandas as pd
import plotly.express as px
from parcllabs import ParclLabsClient

print(f"Parcl Labs Version: {parcllabs.__version__}")

Parcl Labs Version: 0.2.1


In [3]:
# Initialize the Parcl Labs client
client = ParclLabsClient(api_key)

In [4]:
import pandas as pd
from datetime import datetime, timedelta

import plotly.express as px
import plotly.graph_objects as go

labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}

# Set charting constants
labs_logo_dict = dict(
    source=labs_logo_lookup['white'],
    xref="paper",
    yref="paper",
    x=0.5,  # Centering the logo below the title
    y=1.04,  # Adjust this value to position the logo just below the title
    sizex=0.15, 
    sizey=0.15,
    xanchor="center",
    yanchor="bottom"
)

def build_chart(name, data):
    HEIGHT = 900
    WIDTH = 1600
    
    # Calculate median price_feed
    median_price_feed = data['price_feed'].median()
    
    fig = go.Figure()

    # Split data into continuous segments based on median
    segments = []
    current_segment = []
    current_color = None

    for i in range(len(data)):
        if current_color is None:
            current_color = '#FFFFFF' if data.iloc[i]['price_feed'] >= median_price_feed else '#57A3FF'
            current_segment.append(data.iloc[i])
        elif (data.iloc[i]['price_feed'] >= median_price_feed and current_color == '#FFFFFF') or (data.iloc[i]['price_feed'] < median_price_feed and current_color == '#57A3FF'):
            current_segment.append(data.iloc[i])
        else:
            segments.append((current_segment, current_color))
            current_color = '#FFFFFF' if data.iloc[i]['price_feed'] >= median_price_feed else '#57A3FF'
            current_segment = [data.iloc[i]]
    
    if current_segment:
        segments.append((current_segment, current_color))

    for segment, color in segments:
        segment_df = pd.DataFrame(segment)
        fig.add_trace(go.Scatter(
            x=segment_df['date'],
            y=segment_df['price_feed'],
            mode='lines',
            line=dict(width=2, color=color),  # Reduced line width for thinner lines
            showlegend=False
        ))

    # Add horizontal line for median price feed value
    fig.add_shape(
        type="line",
        x0=data['date'].min(),
        y0=median_price_feed,
        x1=data['date'].max(),
        y1=median_price_feed,
        line=dict(
            color="#FFFFFF",
            width=1,
            dash="dot",  # Small dots for the median line
        ),
        opacity=1  # Set the opacity to 0.7
    )
    
    fig.add_layout_image(labs_logo_dict)
    
    fig.update_layout(
        margin=dict(l=0, r=0, t=110, b=0),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': f'Price Feed (Price per Square Foot $): {name}',
            'y': 0.99,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#000000',  # Dark background for better contrast
        paper_bgcolor='#000000',  # Dark background for the paper
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=False,  # Disable vertical grid lines
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1  # Axis line width
        ),
        yaxis=dict(
            title_text='Price per Square Foot ($)',
            showgrid=True,
            gridwidth=0.5,  # Horizontal grid line width
            gridcolor='rgba(255, 255, 255, 0.2)',  # Horizontal grid line color with opacity
            tickfont=dict(size=14),
            tickprefix='$',  # Add dollar sign to y-axis labels
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1  # Axis line width
        ),
        hovermode='x unified',  # Unified hover mode for better interactivity
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        )
    )
    
    # Show the plot
    fig.write_image(os.path.join('../graphics/pricefeeds', f'{name}_price_feed.png'), width=WIDTH, height=HEIGHT)
    fig.show()

def format_names(nme):
    state = nme.split(',')[-1].strip().upper().split('-')[0]
    metro = nme.split(',')[0].split('-')[0].strip()
    metro = metro.split('/')[0].strip()
    return f"{metro}, {state}"


def calculate_percent_changes(data, raw=False):
    # Ensure the date column is in datetime format
    data['date'] = pd.to_datetime(data['date'])
    
    # Sort the data by date
    data = data.sort_values(by='date').reset_index(drop=True)
    
    # Get the start and current price_feed values
    start_price = data.iloc[0]['price_feed']
    current_price = data.iloc[-1]['price_feed']
    
    # Define the date ranges
    now = data.iloc[-1]['date']
    five_years_ago = now - timedelta(days=5*365)
    one_year_ago = now - timedelta(days=365)
    six_months_ago = now - timedelta(days=6*30)
    thirty_days_ago = now - timedelta(days=30)
    
    # Helper function to get the price at a specific date
    def get_price_at_date(date):
        filtered_data = data[data['date'] <= date]
        if not filtered_data.empty:
            return filtered_data.iloc[-1]['price_feed']
        else:
            return None

    # Get the prices at the specified dates
    price_5_years_ago = get_price_at_date(five_years_ago)
    price_1_year_ago = get_price_at_date(one_year_ago)
    price_6_months_ago = get_price_at_date(six_months_ago)
    price_30_days_ago = get_price_at_date(thirty_days_ago)

    # Calculate percent changes
    def percent_change(old, new, raw=raw):
        if old is not None and new is not None:
            change = ((new - old) / old)
            emoji = '📈' if change > 0 else '📉'
            if raw:
                return change
            else:
                change = change * 100
                return f"{change:.2f}% {emoji}"
        else:
            return None
    
    changes = {
        '% Change (30 Day)': percent_change(price_30_days_ago, current_price, raw=raw),
        '% Change (6 mo)': percent_change(price_6_months_ago, current_price, raw=raw),
        '% Change (YoY)': percent_change(price_1_year_ago, current_price, raw=raw),
        '% Change (5 yr)': percent_change(price_5_years_ago, current_price, raw=raw),
        '% Change (Since `10)': percent_change(start_price, current_price, raw=raw),
        
        
    }
    
    return changes


In [54]:
poll_markets = [
    5290147, # charlotte
    5381001, # Dallas
    5352987, # Miami City
    5352995, # Tampa
    5384705, # Seattle
]

outputs = []

for pid in poll_markets:
    results = client.search_markets.retrieve(
        parcl_id=pid,
        as_dataframe=True
    )

    outputs.append(results)

market_df = pd.concat(outputs)
parcl_ids = market_df['parcl_id'].tolist()

cash = client.market_metrics_all_cash.retrieve_many(
    parcl_ids=parcl_ids,
    as_dataframe=True,
    end_date='2024-03-01',
    params={'limit': 1000},  # expand the limit to 1000, these are daily series
)


events = client.market_metrics_housing_event_counts.retrieve_many(
    parcl_ids=parcl_ids,
    as_dataframe=True,
    params={'limit': 1000}
)


mix = client.market_metrics_housing_stock.retrieve_many(
    parcl_ids=parcl_ids,
    as_dataframe=True,
    params={'limit': 1000}
)

investors = client.market_metrics_housing_event_prices.retrieve_many(
    parcl_ids=parcl_ids,
    as_dataframe=True,
    params={'limit': 1000}
)


investors = client.investor_metrics_housing_event_counts.retrieve_many(
    parcl_ids=parcl_ids,
    as_dataframe=True,
    params={'limit': 1000}
)


investor_ownership = client.investor_metrics_housing_stock_ownership.retrieve_many(
    parcl_ids=parcl_ids,
    as_dataframe=True,
    params={'limit': 1000}
)

cash.head()

|████████████████████████████████████████| 5/5 [100%] in 0.6s (8.50/s) 
|████████████████████████████████████████| 5/5 [100%] in 0.6s (8.58/s) 
|████████████████████████████████████████| 5/5 [100%] in 0.6s (8.88/s) 
|████████████████████████████████████████| 5/5 [100%] in 0.6s (8.92/s) 
|████████████████████████████████████████| 5/5 [100%] in 0.6s (8.55/s) 


Unnamed: 0,date,count,pct_all_cash,parcl_id
0,2024-03-01,456,27.55,5290147
1,2024-02-01,360,24.86,5290147
2,2024-01-01,393,27.2,5290147
3,2023-12-01,404,25.81,5290147
4,2023-11-01,419,27.15,5290147


In [38]:
mix.head()

Unnamed: 0,date,single_family,condo,townhouse,other,all_properties,parcl_id,pct_sfh,pct_condo
0,2024-04-01,201477,94914,30115,31251,357757,5290147,0.563167,0.265303
1,2024-03-01,201448,94862,30095,31171,357576,5290147,0.563371,0.265292
2,2024-02-01,201415,94820,30073,31082,357390,5290147,0.563572,0.265312
3,2024-01-01,201391,94784,30055,30980,357210,5290147,0.563789,0.265345
4,2023-12-01,201363,94756,30033,30879,357031,5290147,0.563993,0.2654


In [86]:
name = 'Seattle City'
ids = market_df[market_df['name'] == name]['parcl_id'].tolist()[0]
mix['pct_sfh'] = mix['single_family']/mix['all_properties']
mix['pct_condo'] = mix['condo']/mix['all_properties']
mix['pct_townhouse'] = mix['townhouse']/mix['all_properties']
mix.loc[mix['parcl_id']==ids].head()


Unnamed: 0,date,single_family,condo,townhouse,other,all_properties,parcl_id,pct_sfh,pct_condo,pct_townhouse
256,2024-04-01,133454,107781,26652,24583,292470,5384705,0.4563,0.36852,0.091127
257,2024-03-01,133424,107735,26629,24534,292322,5384705,0.456428,0.368549,0.091095
258,2024-02-01,133393,107694,26615,24472,292174,5384705,0.456553,0.368595,0.091093
259,2024-01-01,133364,107636,26598,24414,292012,5384705,0.456707,0.368601,0.091085
260,2023-12-01,133331,107578,26587,24385,291881,5384705,0.456799,0.368568,0.091088


In [87]:
events['delta'] = events['new_listings_for_sale']-events['sales']
events.loc[events['parcl_id']==ids].head(20)

Unnamed: 0,date,sales,new_listings_for_sale,new_rental_listings,parcl_id,delta
256,2024-04-01,973,1272,5223,5384705,299
257,2024-03-01,1217,1078,5279,5384705,-139
258,2024-02-01,1166,920,4486,5384705,-246
259,2024-01-01,860,772,5857,5384705,-88
260,2023-12-01,1024,294,4079,5384705,-730
261,2023-11-01,1013,549,4407,5384705,-464
262,2023-10-01,1129,1063,4705,5384705,-66
263,2023-09-01,1119,1135,4873,5384705,16
264,2023-08-01,1342,891,5642,5384705,-451
265,2023-07-01,1258,952,5905,5384705,-306


In [88]:
events.loc[(events['parcl_id']==ids) & (events['delta']>0)]

Unnamed: 0,date,sales,new_listings_for_sale,new_rental_listings,parcl_id,delta
256,2024-04-01,973,1272,5223,5384705,299
263,2023-09-01,1119,1135,4873,5384705,16


In [89]:
cash.loc[cash['parcl_id']==ids].head(20)

Unnamed: 0,date,count,pct_all_cash,parcl_id
252,2024-03-01,185,15.2,5384705
253,2024-02-01,209,17.92,5384705
254,2024-01-01,136,15.81,5384705
255,2023-12-01,142,13.87,5384705
256,2023-11-01,157,15.5,5384705
257,2023-10-01,173,15.32,5384705
258,2023-09-01,157,14.03,5384705
259,2023-08-01,193,14.38,5384705
260,2023-07-01,182,14.47,5384705
261,2023-06-01,271,17.28,5384705


In [93]:
investors['delta'] = investors['dispositions'] - investors['acquisitions']
investors.loc[investors['parcl_id']==ids].head(20)

Unnamed: 0,date,acquisitions,dispositions,new_listings_for_sale,new_rental_listings,parcl_id,delta
256,2024-04-01,6,81,103,108,5384705,75
257,2024-03-01,38,97,109,111,5384705,59
258,2024-02-01,81,100,84,103,5384705,19
259,2024-01-01,62,95,59,104,5384705,33
260,2023-12-01,68,117,31,84,5384705,49
261,2023-11-01,69,91,70,93,5384705,22
262,2023-10-01,85,76,92,108,5384705,-9
263,2023-09-01,80,105,104,153,5384705,25
264,2023-08-01,89,113,73,143,5384705,24
265,2023-07-01,93,110,68,149,5384705,17


In [91]:
investors.loc[(investors['parcl_id']==ids) & (investors['delta']>0)]

Unnamed: 0,date,acquisitions,dispositions,new_listings_for_sale,new_rental_listings,parcl_id,delta
256,2024-04-01,6,81,103,108,5384705,75
257,2024-03-01,38,97,109,111,5384705,59
258,2024-02-01,81,100,84,103,5384705,19
259,2024-01-01,62,95,59,104,5384705,33
260,2023-12-01,68,117,31,84,5384705,49
261,2023-11-01,69,91,70,93,5384705,22
263,2023-09-01,80,105,104,153,5384705,25
264,2023-08-01,89,113,73,143,5384705,24
265,2023-07-01,93,110,68,149,5384705,17
266,2023-06-01,103,161,115,140,5384705,58


In [92]:
investor_ownership.loc[investor_ownership['parcl_id']==ids].head(20)

Unnamed: 0,date,count,pct_ownership,parcl_id
256,2024-04-01,14623,5.0,5384705
257,2024-03-01,14619,5.0,5384705
258,2024-02-01,14639,5.01,5384705
259,2024-01-01,14638,5.01,5384705
260,2023-12-01,14643,5.02,5384705
261,2023-11-01,14680,5.03,5384705
262,2023-10-01,14696,5.04,5384705
263,2023-09-01,14693,5.04,5384705
264,2023-08-01,14705,5.05,5384705
265,2023-07-01,14726,5.05,5384705


In [197]:
all_data = []

for pid in price_feeds.sort_values('name')['parcl_id'].unique():
    data = price_feeds.loc[price_feeds['parcl_id'] == pid].sort_values('date')
    name = data['name'].iloc[0].replace('Kings County', 'Brooklyn County').replace('Washington City', 'Washington, DC')
    # build_chart(name, data)
    changes = calculate_percent_changes(data, raw=True)
    print(name)
    vol = volatility.loc[volatility['parcl_id'] == pid]['pct_volatility'].iloc[0]
    for k, v in changes.items():
        print(f"{k}: {v}\n")
    print(f"Annualized Volatility: {vol:.02%}\n")
    print('Trade today on: @parcl')
    # create row
    row = pd.DataFrame(changes, index=[0])
    row['name'] = name
    row['volatility'] = vol
    all_data.append(row)

Charlotte City
% Change (30 Day): 0.04157649409022811

% Change (6 mo): 0.07495060561807412

% Change (YoY): 0.11782571798651134

% Change (5 yr): 0.7968839747271683

% Change (Since `10): 1.9794047619047621

Annualized Volatility: 1.53%

Trade today on: @parcl
Dallas City
% Change (30 Day): 0.04773399781965445

% Change (6 mo): 0.1034567597490467

% Change (YoY): 0.10350200934962686

% Change (5 yr): 0.5363973736797032

% Change (Since `10): 1.9250000000000003

Annualized Volatility: 3.13%

Trade today on: @parcl
Miami City
% Change (30 Day): -0.001546219668617004

% Change (6 mo): 0.05009794138300627

% Change (YoY): 0.026129509913684186

% Change (5 yr): 0.8430526725480021

% Change (Since `10): 2.2658045977011496

Annualized Volatility: 1.04%

Trade today on: @parcl
Seattle City
% Change (30 Day): 0.009881895139268291

% Change (6 mo): 0.041632926099355864

% Change (YoY): 0.0354536704873567

% Change (5 yr): 0.19020416373327984

% Change (Since `10): 1.128453237410072

Annualized 

In [198]:
output = pd.concat(all_data)
output.head()

Unnamed: 0,% Change (30 Day),% Change (6 mo),% Change (YoY),% Change (5 yr),% Change (Since `10),name,volatility
0,0.041576,0.074951,0.117826,0.796884,1.979405,Charlotte City,0.0153
0,0.047734,0.103457,0.103502,0.536397,1.925,Dallas City,0.0313
0,-0.001546,0.050098,0.02613,0.843053,2.265805,Miami City,0.0104
0,0.009882,0.041633,0.035454,0.190204,1.128453,Seattle City,0.0117
0,-0.010726,0.014912,0.031424,0.899403,2.724025,Tampa City,0.0248


In [201]:
import plotly.graph_objects as go
import pandas as pd

# Use your actual data here
df = output.sort_values('% Change (Since `10)', ascending=False).rename(columns={'volatility': "Annualized Volatility"})
df['name'] = df['name'].replace({'United States Of America': 'USA'})

# Function to format the percentage with arrows
def format_percent(value, show_arrow=True):
    formatted_value = f"{value:6.2%}"  # Fixed width of 6 characters
    if value > 0 and show_arrow:
        return f"<b>{formatted_value} ⬆️</b>"  # Up arrow
    elif value < 0 and show_arrow:
        return f"<b>{formatted_value} ⬇️</b>"  # Down arrow
    else:
        return f"<b>{formatted_value}</b>"

# Define the function to scale the color based on the value
def color_scale(value, min_val, max_val):
    if value < 0:
        r = 255
        g = 0
        b = 0
    else:
        normalized = (value - min_val) / (max_val - min_val)
        r = 0
        g = int(100 + 155 * normalized)  # Starts from 100 to ensure readability
        b = 0
    return f'rgb({r},{g},{b})'

# Prepare data and colors for the table
colors = [[] for _ in range(len(df.columns))]
formatted_data = [[] for _ in range(len(df.columns))]

# Calculate color scales for each column
for col in df.columns:
    if col != 'name' and col != 'Annualized Volatility':
        min_val = df[col].min()
        max_val = df[col].max()
        for value in df[col]:
            formatted_data[df.columns.get_loc(col)].append(format_percent(value))
            colors[df.columns.get_loc(col)].append(color_scale(value, min_val, max_val))
    else:
        for value in df[col]:
            if col == 'name':
                formatted_data[df.columns.get_loc(col)].append(f"<b>{value}</b>")
                colors[df.columns.get_loc(col)].append('#000000')  # Black for name column
            else:
                formatted_data[df.columns.get_loc(col)].append(f"<b>{value:.2%}</b>")
                colors[df.columns.get_loc(col)].append('#000000')  # Black for volatility column

# Define headers and table layout
column_headers = ['% Change (30 Day)', '% Change (6 mo)', '% Change (YoY)', '% Change (5 yr)', '% Change (Since `10)', 'Annualized Volatility']

fig = go.Figure(data=[go.Table(
    header=dict(values=['<b>Market</b>'] + [f"<b>{header}</b>" for header in column_headers],
                fill_color='#000000',  # Black for header
                font=dict(color='#FFFFFF', size=12),
                align='center',
                height=30),
    cells=dict(values=[formatted_data[df.columns.get_loc('name')]] + 
               [formatted_data[df.columns.get_loc(col)] for col in column_headers],
               fill=dict(color=[['#000000']*len(df)] + colors),
               font=dict(color='#FFFFFF', size=12),
               align='center',
               height=30)
)])

# Add the logo image
labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}
labs_logo_dict = dict(
    source=labs_logo_lookup['white'],
    xref="paper",
    yref="paper",
    x=0.5,
    y=1.01,
    sizex=0.2,
    sizey=0.2,
    xanchor="center",
    yanchor="bottom"
)
fig.add_layout_image(labs_logo_dict)

w = 1250
h = 300

# Update layout and display the figure
fig.update_layout(
    title={
        'text': 'Proposed Parcl Exchange Trading Markets Review',
        'y': 0.94,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font_color='#FFFFFF',
    width=w,  # Increase the width for wider cells
    height=h,
    paper_bgcolor='#080D16',
    margin=dict(l=10, r=10, t=100, b=10)
)

fig.write_image(os.path.join('../graphics/pricefeeds', f'comp_table_price_feed-{datetime.now().date()}.png'), width=w, height=h)
fig.show()



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def calculate_seasonal_probability(df, market_column='parcl_id', date_column='date', price_column='price_feed'):
    # Ensure the Date column is in datetime format
    df[date_column] = pd.to_datetime(df[date_column])
    df['Year'] = df[date_column].dt.year
    df['DayOfYear'] = df[date_column].dt.dayofyear

    # Create a dictionary to store probability data for each market
    market_probabilities = {}

    # Iterate over each market
    for market in df[market_column].unique():
        market_data = df[df[market_column] == market]
        market_data = market_data.sort_values(date_column)

        # Calculate daily returns
        market_data['Return'] = market_data[price_column].pct_change()

        # Group by day of the year
        grouped = market_data.groupby('DayOfYear')

        # Calculate the probability of positive returns for each day of the year
        probability_positive = grouped['Return'].apply(lambda x: (x > 0).sum() / len(x))

        # Store the results in the dictionary
        market_probabilities[market] = probability_positive

    return market_probabilities

def plot_seasonal_probability(market_probabilities):
    plt.figure(figsize=(12, 6))
    
    # Plot the probability for each market
    for market, probability_positive in market_probabilities.items():
        plt.plot(probability_positive.index, probability_positive.values, label=f'Market {market}')
    
    plt.xlabel('Day of Year')
    plt.ylabel('Probability')
    plt.title('Daily Probability of Positive Returns')
    plt.legend()
    plt.show()

# Example usage
# Assume df is your DataFrame
market_probabilities = calculate_seasonal_probability(price_feeds)
plot_seasonal_probability(market_probabilities)
