# Welcome to the Lab 🥼🧪
## How do I retrieve and download price feeds for historical backtesting?

In this notebook, we will retrieve all price feeds, back to 2011, for markets currently tradeable on the [Parcl Exchange](https://app.parcl.co/collection/active-markets)

**Note** This notebook will work with any of the dozens of daily price feeds available.

As a reminder, you can get your Parcl Labs API key [here](https://dashboard.parcllabs.com/signup) to follow along.

To run this immediately, you can use Google Colab. Remember, you must set your `PARCL_LABS_API_KEY` as a secret. See this [guide](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) for more information.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ParclLabs/parcllabs-examples/blob/main/python/introduction/price_feed.ipynb)

In [None]:
import os
import sys
import json
import subprocess
from datetime import datetime
from urllib.request import urlopen

# Collab setup from one click above
if "google.colab" in sys.modules:
    from google.colab import userdata
    %pip install parcllabs plotly kaleido
    api_key = userdata.get('PARCL_LABS_API_KEY')
else:
    api_key = os.getenv('PARCL_LABS_API_KEY')

In [None]:
import parcllabs
import pandas as pd
import plotly.express as px
from parcllabs import ParclLabsClient

print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
# Initialize the Parcl Labs client
client = ParclLabsClient(api_key)

In [None]:
import pandas as pd
from datetime import datetime, timedelta

import plotly.express as px
import plotly.graph_objects as go

labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}

# Set charting constants
labs_logo_dict = dict(
    source=labs_logo_lookup['white'],
    xref="paper",
    yref="paper",
    x=0.5,  # Centering the logo below the title
    y=1.04,  # Adjust this value to position the logo just below the title
    sizex=0.15, 
    sizey=0.15,
    xanchor="center",
    yanchor="bottom"
)

def build_chart(name, data):
    HEIGHT = 900
    WIDTH = 1600
    
    # Calculate median price_feed
    median_price_feed = data['price_feed'].median()
    
    fig = go.Figure()

    # Split data into continuous segments based on median
    segments = []
    current_segment = []
    current_color = None

    for i in range(len(data)):
        if current_color is None:
            current_color = '#FFFFFF' if data.iloc[i]['price_feed'] >= median_price_feed else '#57A3FF'
            current_segment.append(data.iloc[i])
        elif (data.iloc[i]['price_feed'] >= median_price_feed and current_color == '#FFFFFF') or (data.iloc[i]['price_feed'] < median_price_feed and current_color == '#57A3FF'):
            current_segment.append(data.iloc[i])
        else:
            segments.append((current_segment, current_color))
            current_color = '#FFFFFF' if data.iloc[i]['price_feed'] >= median_price_feed else '#57A3FF'
            current_segment = [data.iloc[i]]
    
    if current_segment:
        segments.append((current_segment, current_color))

    for segment, color in segments:
        segment_df = pd.DataFrame(segment)
        fig.add_trace(go.Scatter(
            x=segment_df['date'],
            y=segment_df['price_feed'],
            mode='lines',
            line=dict(width=2, color=color),  # Reduced line width for thinner lines
            showlegend=False
        ))

    # Add horizontal line for median price feed value
    fig.add_shape(
        type="line",
        x0=data['date'].min(),
        y0=median_price_feed,
        x1=data['date'].max(),
        y1=median_price_feed,
        line=dict(
            color="#FFFFFF",
            width=1,
            dash="dot",  # Small dots for the median line
        ),
        opacity=1  # Set the opacity to 0.7
    )
    
    fig.add_layout_image(labs_logo_dict)
    
    fig.update_layout(
        margin=dict(l=0, r=0, t=110, b=0),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': f'Price Feed (Price per Square Foot $): {name}',
            'y': 0.99,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#000000',  # Dark background for better contrast
        paper_bgcolor='#000000',  # Dark background for the paper
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=False,  # Disable vertical grid lines
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1  # Axis line width
        ),
        yaxis=dict(
            title_text='Price per Square Foot ($)',
            showgrid=True,
            gridwidth=0.5,  # Horizontal grid line width
            gridcolor='rgba(255, 255, 255, 0.2)',  # Horizontal grid line color with opacity
            tickfont=dict(size=14),
            tickprefix='$',  # Add dollar sign to y-axis labels
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',  # Axis line color with opacity
            linewidth=1  # Axis line width
        ),
        hovermode='x unified',  # Unified hover mode for better interactivity
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        )
    )
    
    # Show the plot
    fig.write_image(os.path.join('../graphics/pricefeeds', f'{name}_price_feed.png'), width=WIDTH, height=HEIGHT)
    fig.show()

def format_names(nme):
    state = nme.split(',')[-1].strip().upper().split('-')[0]
    metro = nme.split(',')[0].split('-')[0].strip()
    metro = metro.split('/')[0].strip()
    return f"{metro}, {state}"


def calculate_percent_changes(data, raw=False):
    # Ensure the date column is in datetime format
    data['date'] = pd.to_datetime(data['date'])
    
    # Sort the data by date
    data = data.sort_values(by='date').reset_index(drop=True)
    
    # Get the start and current price_feed values
    start_price = data.iloc[0]['price_feed']
    current_price = data.iloc[-1]['price_feed']
    
    # Define the date ranges
    now = data.iloc[-1]['date']
    five_years_ago = now - timedelta(days=5*365)
    one_year_ago = now - timedelta(days=365)
    six_months_ago = now - timedelta(days=6*30)
    thirty_days_ago = now - timedelta(days=30)
    
    # Helper function to get the price at a specific date
    def get_price_at_date(date):
        filtered_data = data[data['date'] <= date]
        if not filtered_data.empty:
            return filtered_data.iloc[-1]['price_feed']
        else:
            return None

    # Get the prices at the specified dates
    price_5_years_ago = get_price_at_date(five_years_ago)
    price_1_year_ago = get_price_at_date(one_year_ago)
    price_6_months_ago = get_price_at_date(six_months_ago)
    price_30_days_ago = get_price_at_date(thirty_days_ago)

    # Calculate percent changes
    def percent_change(old, new, raw=raw):
        if old is not None and new is not None:
            change = ((new - old) / old)
            emoji = '📈' if change > 0 else '📉'
            if raw:
                return change
            else:
                change = change * 100
                return f"{change:.2f}% {emoji}"
        else:
            return None
    
    changes = {
        '% Change (30 Day)': percent_change(price_30_days_ago, current_price, raw=raw),
        '% Change (6 mo)': percent_change(price_6_months_ago, current_price, raw=raw),
        '% Change (YoY)': percent_change(price_1_year_ago, current_price, raw=raw),
        '% Change (5 yr)': percent_change(price_5_years_ago, current_price, raw=raw),
        '% Change (Since `10)': percent_change(start_price, current_price, raw=raw),
        
        
    }
    
    return changes


In [None]:
# lets get all US markets currently available to trade on the Parcl Exchange
# Now lets say you want all price feed markets that are on the parcl exchange
market_df = client.search_markets.retrieve(
    sort_by='PARCL_EXCHANGE_MARKET',
    sort_order='DESC',
    as_dataframe=True,
    # query='Tampa',
    params={'limit': 14},  # expand the default limit to 14, as of this writing, 14 markets are available
)

# lets store the parcl_ids of the markets we are interested in
parcl_ids = market_df['parcl_id'].tolist()

# lets retrieve data back to 2011 for these price feeds
START_DATE = '2010-01-01'

price_feeds = client.price_feed.retrieve_many(
    parcl_ids=parcl_ids,
    start_date=START_DATE,
    as_dataframe=True,
    params={'limit': 1000},  # expand the limit to 1000, these are daily series
    auto_paginate=True, # auto paginate to get all the data - WARNING: ~6k credits can be used in one parcl price feed. Change the START_DATE to a more recent date to reduce the number of credits used
)

price_feeds.head()

In [None]:
# lets get the volatility

volatility = client.price_feed_volatility.retrieve_many(
    parcl_ids=parcl_ids,
    start_date='2024-01-01',
    as_dataframe=True,
    params={'limit': 1},  # most recent volatility
)

volatility.head()

In [None]:
price_feeds = price_feeds.merge(market_df[['parcl_id', 'name']], on='parcl_id', how='left')
price_feeds

In [None]:
all_data = []

for pid in price_feeds.sort_values('name')['parcl_id'].unique():
    data = price_feeds.loc[price_feeds['parcl_id'] == pid].sort_values('date')
    name = data['name'].iloc[0].replace('Kings County', 'Brooklyn County').replace('Washington City', 'Washington, DC')
    # build_chart(name, data)
    changes = calculate_percent_changes(data, raw=True)
    print(name)
    vol = volatility.loc[volatility['parcl_id'] == pid]['pct_volatility'].iloc[0]
    for k, v in changes.items():
        print(f"{k}: {v}\n")
    print(f"Annualized Volatility: {vol:.02%}\n")
    print('Trade today on: @parcl')
    # create row
    row = pd.DataFrame(changes, index=[0])
    row['name'] = name
    row['volatility'] = vol
    all_data.append(row)

In [None]:
output = pd.concat(all_data)
output.head()

In [None]:
import plotly.graph_objects as go
import pandas as pd

# Use your actual data here
df = output.sort_values('% Change (Since `10)', ascending=False).rename(columns={'volatility': "Annualized Volatility"})
df['name'] = df['name'].replace({'United States Of America': 'USA'})

# Function to format the percentage with arrows
def format_percent(value, show_arrow=True):
    formatted_value = f"{value:6.2%}"  # Fixed width of 6 characters
    if value > 0 and show_arrow:
        return f"<b>{formatted_value} ⬆️</b>"  # Up arrow
    elif value < 0 and show_arrow:
        return f"<b>{formatted_value} ⬇️</b>"  # Down arrow
    else:
        return f"<b>{formatted_value}</b>"

# Define the function to scale the color based on the value
def color_scale(value, min_val, max_val):
    if value < 0:
        r = 255
        g = 0
        b = 0
    else:
        normalized = (value - min_val) / (max_val - min_val)
        r = 0
        g = int(100 + 155 * normalized)  # Starts from 100 to ensure readability
        b = 0
    return f'rgb({r},{g},{b})'

# Prepare data and colors for the table
colors = [[] for _ in range(len(df.columns))]
formatted_data = [[] for _ in range(len(df.columns))]

# Calculate color scales for each column
for col in df.columns:
    if col != 'name' and col != 'Annualized Volatility':
        min_val = df[col].min()
        max_val = df[col].max()
        for value in df[col]:
            formatted_data[df.columns.get_loc(col)].append(format_percent(value))
            colors[df.columns.get_loc(col)].append(color_scale(value, min_val, max_val))
    else:
        for value in df[col]:
            if col == 'name':
                formatted_data[df.columns.get_loc(col)].append(f"<b>{value}</b>")
                colors[df.columns.get_loc(col)].append('#000000')  # Black for name column
            else:
                formatted_data[df.columns.get_loc(col)].append(f"<b>{value:.2%}</b>")
                colors[df.columns.get_loc(col)].append('#000000')  # Black for volatility column

# Define headers and table layout
column_headers = ['% Change (30 Day)', '% Change (6 mo)', '% Change (YoY)', '% Change (5 yr)', '% Change (Since `10)', 'Annualized Volatility']

fig = go.Figure(data=[go.Table(
    header=dict(values=['<b>Market</b>'] + [f"<b>{header}</b>" for header in column_headers],
                fill_color='#000000',  # Black for header
                font=dict(color='#FFFFFF', size=12),
                align='center',
                height=30),
    cells=dict(values=[formatted_data[df.columns.get_loc('name')]] + 
               [formatted_data[df.columns.get_loc(col)] for col in column_headers],
               fill=dict(color=[['#000000']*len(df)] + colors),
               font=dict(color='#FFFFFF', size=12),
               align='center',
               height=30)
)])

# Add the logo image
labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}
labs_logo_dict = dict(
    source=labs_logo_lookup['white'],
    xref="paper",
    yref="paper",
    x=0.5,
    y=1.01,
    sizex=0.2,
    sizey=0.2,
    xanchor="center",
    yanchor="bottom"
)
fig.add_layout_image(labs_logo_dict)

w = 1250
h = 600

# Update layout and display the figure
fig.update_layout(
    title={
        'text': 'Parcl Exchange Trading Markets Review',
        'y': 0.94,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font_color='#FFFFFF',
    width=w,  # Increase the width for wider cells
    height=h,
    paper_bgcolor='#080D16',
    margin=dict(l=10, r=10, t=100, b=10)
)

fig.write_image(os.path.join('../graphics/pricefeeds', f'comp_table_price_feed-{datetime.now().date()}.png'), width=w, height=h)
fig.show()



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def calculate_seasonal_probability(df, market_column='parcl_id', date_column='date', price_column='price_feed'):
    # Ensure the Date column is in datetime format
    df[date_column] = pd.to_datetime(df[date_column])
    df['Year'] = df[date_column].dt.year
    df['DayOfYear'] = df[date_column].dt.dayofyear

    # Create a dictionary to store probability data for each market
    market_probabilities = {}

    # Iterate over each market
    for market in df[market_column].unique():
        market_data = df[df[market_column] == market]
        market_data = market_data.sort_values(date_column)

        # Calculate daily returns
        market_data['Return'] = market_data[price_column].pct_change()

        # Group by day of the year
        grouped = market_data.groupby('DayOfYear')

        # Calculate the probability of positive returns for each day of the year
        probability_positive = grouped['Return'].apply(lambda x: (x > 0).sum() / len(x))

        # Store the results in the dictionary
        market_probabilities[market] = probability_positive

    return market_probabilities

def plot_seasonal_probability(market_probabilities):
    plt.figure(figsize=(12, 6))
    
    # Plot the probability for each market
    for market, probability_positive in market_probabilities.items():
        plt.plot(probability_positive.index, probability_positive.values, label=f'Market {market}')
    
    plt.xlabel('Day of Year')
    plt.ylabel('Probability')
    plt.title('Daily Probability of Positive Returns')
    plt.legend()
    plt.show()

# Example usage
# Assume df is your DataFrame
market_probabilities = calculate_seasonal_probability(price_feeds)
plot_seasonal_probability(market_probabilities)
