# Welcome to the Lab 🥼🧪

Exploratory data analysis of price feeds, portfolio construction, seasonality, etc. 

In [None]:
import os
import sys
import json
import requests
import subprocess
from datetime import datetime, timedelta
from urllib.request import urlopen

import parcllabs
import numpy as np
import pandas as pd
import seaborn as sns
from prophet import Prophet
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from parcllabs import ParclLabsClient
from pypfopt import EfficientFrontier, risk_models, expected_returns

api_key = os.getenv('PARCL_LABS_API_KEY')
print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
# Initialize the Parcl Labs client
client = ParclLabsClient(api_key)

In [None]:
# set nb config
pf_options = {
    'rental': 'rental_price_feed',
    'pricefeed': 'price_feed'
}

PF_TYPE = pf_options['rental']

In [None]:
# lets get all US markets currently available to trade on the Parcl Exchange
# Now lets say you want all price feed markets that are on the parcl exchange
market_df = client.search_markets.retrieve(
    sort_by='PARCL_EXCHANGE_MARKET',
    sort_order='DESC',
    as_dataframe=True,
    params={'limit': 14},  # expand the default limit to 14, as of this writing, 14 markets are available
)

parcl_ids = market_df['parcl_id'].tolist()
market_df.head()

In [None]:
# lets retrieve data back to 2011 for these price feeds
START_DATE = '2020-01-01'
feeds = client.price_feed.retrieve_many(
    parcl_ids=parcl_ids,
    start_date=START_DATE,
    as_dataframe=True,
    params={'limit': 1000},  # expand the limit to 1000, these are daily series
    auto_paginate=True, # auto paginate to get all the data - WARNING: ~6k credits can be used in one parcl price feed. Change the START_DATE to a more recent date to reduce the number of credits used
)
    
rentals = client.rental_price_feed.retrieve_many(
    parcl_ids=parcl_ids,
    start_date=START_DATE,
    as_dataframe=True,
    params={'limit': 1000},  # expand the limit to 1000, these are daily series
    auto_paginate=True, # auto paginate to get all the data - WARNING: ~6k credits can be used in one parcl price feed. Change the START_DATE to a more recent date to reduce the number of credits used
)

In [None]:
feeds = feeds.merge(market_df[['parcl_id', 'name']], on='parcl_id', how='left')
rentals = rentals.merge(market_df[['parcl_id', 'name']], on=['parcl_id'], how='inner')


In [None]:
rentals = rentals.rename(columns={'name': 'rental_name', 'parcl_id': 'rental_parcl_id'})
# need to separate naming conventions for rentals and price feeds
rentals['rental_name'] = rentals['rental_name'] + '(R)'

In [None]:
def build_corr_matrix(
        feeds,
        rentals,
        title: str='Correlation Matrix Heatmap (Rental vs Price Feed since `20)',
        output_title: str='Median Correlation Coefficient (`20)',
        rr: bool=False
):
    
    rental_pivot = rentals.pivot(index='date', columns='rental_name', values='rental_price_feed')
    if not rr:
        price_pivot = feeds.pivot(index='date', columns='name', values='price_feed')
        # Combine the two pivoted DataFrames
        combined_df = pd.concat([price_pivot, rental_pivot], axis=1)

        # Compute the correlation matrix
        correlation_matrix = combined_df.corr()

        rental_indices = [idx for idx in correlation_matrix.index if '(R)' not in idx]
        col_index = [idx for idx in correlation_matrix.columns if '(R)' in idx]
        correlation_matrix_filtered = correlation_matrix.loc[rental_indices, col_index]
    else:
        correlation_matrix_filtered = rental_pivot.corr()

    # Plot the heatmap
    plt.figure(figsize=(20, 15))
    sns.heatmap(correlation_matrix_filtered, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title(title)
    plt.show()

    return correlation_matrix_filtered.median().reset_index(name=output_title)

In [None]:
# price feed vs. rental correlation matrix
corr1 = build_corr_matrix(feeds, rentals)
std = rentals.groupby('rental_name')['rental_price_feed'].std().reset_index(name='std_since_20')

In [None]:
# check rental vs rental correlation
rr_corr1 = build_corr_matrix(
    None, 
    rentals, 
    title='Correlation Matrix Heatmap (Rental vs Rental since `20)', 
    output_title='Median Correlation Coefficient (Rental since `20)', 
    rr=True
)

In [None]:
rr_corr1.head()

In [None]:
# filter to last year
feeds_yr = feeds[feeds['date'] > '2023-05-01']
rentals_yr = rentals[rentals['date'] > '2023-05-01']
std_yr = rentals_yr.groupby('rental_name')['rental_price_feed'].std().reset_index(name='std_since_23')
corr2 = build_corr_matrix(feeds_yr, rentals_yr, title='Correlation Matrix Heatmap (Rental vs Price Feed since `23)', output_title='Median Correlation Coefficient (`23)')

In [None]:
rr_corr2 = build_corr_matrix(
    None, 
    rentals_yr, 
    title='Correlation Matrix Heatmap (Rental vs Rental since `23)', 
    output_title='Median Correlation Coefficient (Rental since `23)', 
    rr=True
)

In [None]:
out = corr2.merge(corr1, on='index', how='inner')
out = out.rename(columns={'index': 'rental_name'})
out = out.merge(std, on='rental_name', how='inner')
out = out.merge(rr_corr1, on='rental_name', how='inner')
out = out.merge(rr_corr2, on='rental_name', how='inner')
out = out.merge(std_yr, on='rental_name', how='inner')
out['Diff in Median Correlation Coefficients'] = out['Median Correlation Coefficient (`20)'] - out['Median Correlation Coefficient (`23)']
out = out.sort_values('Diff in Median Correlation Coefficients', ascending=True)
out.head()

In [None]:
out = out.rename(columns={
    'rental_name': 'Rental Market',
    'std_since_20': 'Standard Deviation (Since `20)',
    'std_since_23': 'Standard Deviation (Since `23)',
    'diff': 'Diff in Median Correlation Coefficients'
})




out[[
    'Rental Market',
    'Median Correlation Coefficient (`20)',
    'Median Correlation Coefficient (`23)',
    'Diff in Median Correlation Coefficients',
    'Median Correlation Coefficient (Rental since `20)',
    'Median Correlation Coefficient (Rental since `23)',
    'Standard Deviation (Since `20)',
    'Standard Deviation (Since `23)',
]]

In [None]:
df = pd.merge(feeds, rentals.rename(columns={'rental_parcl_id': 'parcl_id'})[['date', 'parcl_id', 'rental_price_feed']], on=['date', 'parcl_id'], how='inner')

In [None]:

labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}

# Set charting constants
labs_logo_dict = dict(
    source=labs_logo_lookup['white'],
    xref="paper",
    yref="paper",
    x=0.5,  # Centering the logo below the title
    y=1.04,  # Adjust this value to position the logo just below the title
    sizex=0.15, 
    sizey=0.15,
    xanchor="center",
    yanchor="bottom"
)

def build_dual_axis_chart(
        market_name: str, 
        data: pd.DataFrame,
        price_series: str = 'price_feed',
        rental_series: str = 'rental_price_feed'
    ):

    series_format = {
        'price_feed': 'Price per Square Foot ($)',
        'rental_price_feed': 'Rental Price per Square Foot ($)',
    }

    HEIGHT = 900
    WIDTH = 1600
    
    fig = go.Figure()

    # Add primary y-axis trace for price series
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data[price_series],
        mode='lines',
        line=dict(width=2, color='#FFFFFF'),  # White color for price series
        name=series_format[price_series],
    ))

    # Add secondary y-axis trace for rental price series
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data[rental_series],
        mode='lines',
        line=dict(width=2, color='#0000FF'),  # Blue color for rental series
        name=series_format[rental_series],
        yaxis='y2'
    ))
    
    fig.add_layout_image(labs_logo_dict)
    
    fig.update_layout(
        margin=dict(l=0, r=0, t=110, b=0),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': f'{series_format[price_series]} and {series_format[rental_series]}: {market_name}',
            'y': 0.99,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#000000',
        paper_bgcolor='#000000',
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=False,
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis=dict(
            title_text=series_format[price_series],
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.2)',
            tickfont=dict(size=14),
            tickprefix='$',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis2=dict(
            title_text=series_format[rental_series],
            showgrid=False,
            tickfont=dict(size=14),
            tickprefix='$',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1,
            overlaying='y',
            side='right',
            tickformat=".2f"  # Round to three decimals
        ),
        hovermode='x unified',
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        ),
        legend=dict(
            x=0.95,  # Position legend in the bottom right corner
            y=0.01,
            xanchor='right',
            yanchor='bottom',
            font=dict(size=14, color='#FFFFFF'),
            bgcolor='rgba(0, 0, 0, 0.5)'
        )
    )

    root = f'../../graphics/{price_series}'
    timestamp = datetime.now().strftime('%Y-%m-%d')
    path = os.path.join(root, timestamp)
    if not os.path.exists(path):
        os.makedirs(path)

    fig.write_image(os.path.join(path, f'{market_name}_{price_series}.png'), width=WIDTH, height=HEIGHT)
    fig.show()



build_dual_axis_chart('USA', df[df['name'] == 'United States Of America'])


In [None]:
def fetch_mortgage_rates(api_key):
    url = f'https://api.stlouisfed.org/fred/series/observations?series_id=MORTGAGE30US&api_key={api_key}&file_type=json'
    response = requests.get(url)
    data = response.json()

    if response.status_code != 200 or 'observations' not in data:
        raise Exception("Error fetching data from FRED API")

    # Convert the JSON data to a DataFrame
    df = pd.DataFrame(data['observations'])
    df['date'] = pd.to_datetime(df['date'])
    df['value'] = pd.to_numeric(df['value'])

    return df

# Fetch mortgage rates from the FRED API
FRED_API_KEY = os.getenv('FRED_API_KEY')
mortgage_rates_df = fetch_mortgage_rates(FRED_API_KEY)


In [None]:
def build_dual_axis_chart(
        market_name: str, 
        data: pd.DataFrame,
        mortgage_data: pd.DataFrame,
        price_series: str = 'price_feed',
        rental_series: str = 'rental_price_feed',
        mortgage_series: str = 'mortgage_rate'
    ):

    series_format = {
        'price_feed': 'Price per Square Foot ($)',
        'rental_price_feed': 'Rental Price per Square Foot ($)',
        'mortgage_rate': 'Mortgage Rate (%)',
    }

    HEIGHT = 900
    WIDTH = 1600
    
    fig = go.Figure()

    # Add primary y-axis trace for price series
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data[price_series],
        mode='lines',
        line=dict(width=2, color='#FFFFFF'),  # White color for price series
        name=series_format[price_series],
    ))

    # Add secondary y-axis trace for rental price series
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data[rental_series],
        mode='lines',
        line=dict(width=2, color='#0000FF'),  # Blue color for rental series
        name=series_format[rental_series],
        yaxis='y2'
    ))
    
    # Add bar trace for mortgage rates with reduced height and gray color with opacity
    fig.add_trace(go.Bar(
        x=mortgage_data['date'],
        y=mortgage_data[mortgage_series] / 2,  # Reduce the height by 50%
        marker=dict(color='gray', opacity=0.2),
        name=series_format[mortgage_series],
        yaxis='y3'
    ))

    fig.add_layout_image(labs_logo_dict)
    
    fig.update_layout(
        margin=dict(l=0, r=0, t=110, b=0),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': f'Rental Rates vs. Home Values against 30-Year Fixed: {market_name}',
            'y': 0.99,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#000000',
        paper_bgcolor='#000000',
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=False,
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis=dict(
            title_text=series_format[price_series],
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.2)',
            tickfont=dict(size=14),
            tickprefix='$',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis2=dict(
            title_text=series_format[rental_series],
            showgrid=False,
            tickfont=dict(size=14),
            tickprefix='$',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1,
            overlaying='y',
            side='right',
            tickformat=".2f"
        ),
        yaxis3=dict(
            title_text='',
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            overlaying='y',
            side='right',
            anchor='x',
            position=0.9,
        ),
        hovermode='x unified',
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        ),
        legend=dict(
            x=0.95,  # Position legend in the bottom right corner
            y=0.01,
            xanchor='right',
            yanchor='bottom',
            font=dict(size=14, color='#FFFFFF'),
            bgcolor='rgba(0, 0, 0, 0.5)'
        )
    )

    root = f'../../graphics/{price_series}'
    timestamp = datetime.now().strftime('%Y-%m-%d')
    path = os.path.join(root, timestamp)
    if not os.path.exists(path):
        os.makedirs(path)

    fig.write_image(os.path.join(path, f'{market_name}_{price_series}.png'), width=WIDTH, height=HEIGHT)
    fig.show()


build_dual_axis_chart('USA', df[df['name'] == 'United States Of America'], mortgage_rates_df.loc[mortgage_rates_df['date']>='1/1/2020'].rename(columns={'date': 'date', 'value': 'mortgage_rate'}))


In [None]:
df = df.sort_values('date')
pf = df.loc[df['name'] == 'United States Of America']
r = df.loc[df['name']=='Boston City']

In [None]:
# lets analyze the series more closely
s = df.loc[df['name'] == 'United States Of America', ['date', 'price_feed', 'rental_price_feed']].sort_values('date')
# s = pd.merge(pf[['date', 'price_feed']], r[['date', 'rental_price_feed']], on='date', how='inner')
ts_1 = s['price_feed']
ts_2 = s['rental_price_feed']

In [None]:
def normalize_time_series(time_series):
    """Normalize the time series to have a mean of 0."""
    mean = np.mean(time_series)
    normalized_series = time_series - mean
    return normalized_series

# Example usage
time_series_1 = normalize_time_series(ts_1)
time_series_2 = normalize_time_series(ts_2)

In [None]:
def maxdist(x_i, x_j):
    """Calculate the Chebyshev distance between two vectors."""
    return np.max(np.abs(x_i - x_j))

def phi(m, r, time_series_1, time_series_2, epsilon=1e-10):
    """Calculate the phi value for given embedding dimension m and tolerance r."""
    N = len(time_series_1)
    X = np.array([time_series_1[i:i + m] for i in range(N - m + 1)])
    Y = np.array([time_series_2[i:i + m] for i in range(N - m + 1)])
    
    C = np.zeros(len(X))
    for i in range(len(X)):
        C[i] = np.sum([maxdist(X[i], Y[j]) <= r for j in range(len(Y))]) / len(Y)
    
    C += epsilon  # Add a small value to avoid log(0)
    return np.sum(np.log(C)) / (N - m + 1)

def cross_apen(time_series_1, time_series_2, m, r, epsilon=1e-10):
    """
    Calculate the Cross Approximate Entropy between two time series.
    
    :param time_series_1: First time series (array-like).
    :param time_series_2: Second time series (array-like).
    :param m: Embedding dimension.
    :param r: Tolerance (usually a fraction of the standard deviation of the data).
    :param epsilon: Small value to avoid log(0).
    :return: Cross-ApEn value.
    """
    r *= np.std(time_series_1)
    
    return phi(m, r, time_series_1, time_series_2, epsilon) - phi(m + 1, r, time_series_1, time_series_2, epsilon)

embedding_dimensions = [30, 60, 90]
tolerances = [0.1, 0.2, 0.3]

cross_apen_matrix = np.zeros((len(embedding_dimensions), len(tolerances)))

# calc cross-apen for each combo
for i, m in enumerate(embedding_dimensions):
    for j, r in enumerate(tolerances):
        cross_apen_value = cross_apen(time_series_1, time_series_2, m, r)
        cross_apen_matrix[i, j] = cross_apen_value


# m = 60 # Embedding dimension
# r = 0.2  # Tolerance (20% of the standard deviation)

# cross_apen_value = cross_apen(time_series_1, time_series_2, m, r)
# print("Cross-ApEn:", cross_apen_value)


In [None]:
# Plot the results
fig, ax = plt.subplots(figsize=(10, 8))
cax = ax.matshow(cross_apen_matrix, cmap='viridis')

# Add color bar
fig.colorbar(cax)

# Set axis labels
ax.set_xticklabels([''] + [str(r) for r in tolerances])
ax.set_yticklabels([''] + [str(m) for m in embedding_dimensions])
ax.set_xlabel('Tolerance (r)')
ax.set_ylabel('Embedding Dimension (m)')
ax.set_title('Cross-ApEn Values')

# Display the plot
plt.show()

In [None]:
def calculate_cross_correlation(ts1, ts2, max_lag):
    """Calculate cross-correlation between two time series for a range of lags."""
    lags = np.arange(-max_lag, max_lag + 1)
    cross_correlation = []
    for lag in lags:
        if lag >= 0:
            ts1_shifted = ts1[:-lag] if lag != 0 else ts1
            ts2_shifted = ts2[lag:]
        else:
            ts1_shifted = ts1[-lag:]
            ts2_shifted = ts2[:lag] if lag != 0 else ts2
        
        if len(ts1_shifted) > 1 and len(ts2_shifted) > 1:
            corr = np.corrcoef(ts1_shifted, ts2_shifted)[0, 1]
        else:
            corr = 0
        cross_correlation.append(corr)
    return lags, cross_correlation

# Set the maximum lag
max_lag = 720

# Calculate cross-correlation
lags, cross_corr = calculate_cross_correlation(time_series_1, time_series_2, max_lag)

# Find the lag with the maximum correlation
max_corr_lag = lags[np.argmax(cross_corr)]

# Plot the cross-correlation
plt.figure(figsize=(10, 6))
plt.plot(lags, cross_corr, marker='o')
plt.axvline(x=max_corr_lag, color='r', linestyle='--', label=f'Max Correlation Lag: {max_corr_lag}')
plt.xlabel('Lag')
plt.ylabel('Cross-Correlation')
plt.title('Cross-Correlation between Time Series (US Housing vs. Rents)')
plt.legend()
plt.show()

# Interpret the result
if max_corr_lag > 0:
    print(f"Time series 1 leads time series 2 by {max_corr_lag} time units.")
elif max_corr_lag < 0:
    print(f"Time series 2 leads time series 1 by {-max_corr_lag} time units.")
else:
    print("The time series are synchronized with no lag.")

In [None]:
ticker1 = 'price_feed'
ticker2 = 'rental_price_feed'
returns = s[[ticker1, ticker2]].pct_change().dropna()
returns

In [None]:

data = feeds.pivot_table(index='date', columns='name', values='price_feed')
data_rentals = rentals.pivot_table(index='date', columns='rental_name', values='rental_price_feed')
data_rentals.head()

In [None]:
data = data.sort_index()
returns = data.pct_change().dropna()
returns.shape

In [None]:
mu = expected_returns.mean_historical_return(data, frequency=365)
S = risk_models.exp_cov(data, frequency=365)

# Optimize for the maximum Sharpe ratio
ef = EfficientFrontier(mu, S, weight_bounds=(-1, 1))
weights = ef.efficient_return(target_return=0.2, market_neutral=True)
# weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()

print("Optimized Weights:", cleaned_weights)

# Calculate the performance of the optimized portfolio
performance = ef.portfolio_performance(verbose=True)

In [None]:
cleaned_weights

In [None]:
# Convert daily returns to annual returns using 365 days
trading_days_per_year = 365
annualized_return = (1 + returns.mean())**trading_days_per_year - 1
annualized_std = returns.std() * np.sqrt(trading_days_per_year)

# Calculate Sharpe Ratio
risk_free_rate = 0.01  # Assuming 1% risk-free rate
sharpe_ratio = (annualized_return - risk_free_rate) / annualized_std

print("Annualized Return:", annualized_return)
print("Annualized Std Dev:", annualized_std)
print("Sharpe Ratio:", sharpe_ratio)


In [None]:
# Calculate cumulative returns of the optimized portfolio
portfolio_returns = (returns * list(cleaned_weights.values())).sum(axis=1)
cumulative_returns = (1 + portfolio_returns).cumprod() - 1

# Plot the cumulative returns
plt.figure(figsize=(10, 6))
plt.plot(cumulative_returns, label='Optimized Portfolio')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.title('Cumulative Returns of the Optimized Portfolio')
plt.legend()
plt.show()

In [None]:
pd.merge(returns.cumsum().reset_index(), cumulative_returns.reset_index(name='Weighted Portfolio'), on='date', how='inner').plot(x='date', figsize=(15, 10))

In [None]:

def build_chart(market_name: str, data: pd.DataFrame, pf_type: str = 'price_feed'):

    HEIGHT = 900
    WIDTH = 1600

    fig = go.Figure()

    # Get a list of up to 15 distinct colors from Plotly
    colors = px.colors.qualitative.Plotly

    # Add trace for the individual asset cumulative returns
    for i, column in enumerate(data.columns):
        if column != 'date' and column != 'Weighted Portfolio':
            color = colors[i % len(colors)]  # Cycle through the color list
            fig.add_trace(go.Scatter(
                x=data['date'],
                y=data[column] * 100,  # Convert to percentage
                mode='lines',
                line=dict(width=2, color=color),
                opacity=0.7,
                name=column
            ))

    # Add the logo image
    labs_logo_lookup = {
        'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
        'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
    }
    labs_logo_dict = dict(
        source=labs_logo_lookup['white'],
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.01,
        sizex=0.2,
        sizey=0.2,
        xanchor="center",
        yanchor="bottom"
    )
    fig.add_layout_image(labs_logo_dict)

    # Add trace for the weighted portfolio cumulative returns
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data['Weighted Portfolio'] * 100,  # Convert to percentage
        mode='lines',
        line=dict(width=3, color='red'),
        opacity=1.0,
        name='Weighted Portfolio'
    ))

    fig.add_layout_image(
        dict(
            source="path_to_your_logo_image.png",
            xref="paper", yref="paper",
            x=0.5, y=1.1,
            sizex=0.2, sizey=0.2,
            xanchor="center", yanchor="top"
        )
    )

    fig.update_layout(
        margin=dict(l=0, r=0, t=110, b=0),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': f'Cumulative Returns: {market_name}',
            'y': 0.99,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#000000',
        paper_bgcolor='#000000',
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=False,
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis=dict(
            title_text='Cumulative Returns (%)',
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.2)',
            tickfont=dict(size=14),
            ticksuffix='%',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        hovermode='x unified',
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        ),
        legend=dict(
            x=0,
            y=1,
            traceorder="normal",
            font=dict(
                size=12,
                color="white"
            ),
            bgcolor="rgba(0,0,0,0)"
        )
    )

    root = f'../../graphics/{pf_type}'
    timestamp = datetime.now().strftime('%Y-%m-%d')
    path = os.path.join(root, timestamp)
    if not os.path.exists(path):
        os.makedirs(path)

    # Save the plot
    fig.write_image(os.path.join(path, f'{market_name}_{pf_type}.png'), width=WIDTH, height=HEIGHT)
    
    # Show the plot
    fig.show()

# Example usage
# Assuming returns and cumulative_returns are precomputed dataframes
merged_data = pd.merge(returns.cumsum().reset_index(), cumulative_returns.reset_index(name='Weighted Portfolio'), on='date', how='inner')
build_chart("Individual Markets vs. Weighted Portfolio", merged_data)


In [None]:
# Seasonality analysis
# Function to fit Prophet model and extract seasonality
def fit_prophet_model(data):
    model = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
    model.fit(data)
    future = model.make_future_dataframe(periods=0)
    forecast = model.predict(future)
    return model, forecast

# Function to calculate seasonality strength
def seasonality_strength(forecast):
    seasonal_component = forecast['yearly'].values
    total_variance = forecast['yhat'].var()
    seasonal_variance = seasonal_component.var()
    return seasonal_variance / total_variance

# List of dataframes, each containing a time series
time_series_list = data_rentals.columns.tolist()

# Dictionary to store seasonality strengths
seasonality_strengths = {}

# Process each time series
for idx, market_name in enumerate(time_series_list):
    model, forecast = fit_prophet_model(data_rentals[market_name].reset_index(name='y').rename(columns={'date': 'ds'}))
    strength = seasonality_strength(forecast)
    seasonality_strengths[f'{market_name}'] = strength

# Convert to DataFrame for easier handling
seasonality_df = pd.DataFrame.from_dict(seasonality_strengths, orient='index', columns=['Seasonality Strength'])

# Rank the time series by seasonality strength
seasonality_df = seasonality_df.sort_values(by='Seasonality Strength', ascending=False)

# Plot the results
plt.figure(figsize=(10, 6))
plt.barh(seasonality_df.index, seasonality_df['Seasonality Strength'], color='skyblue')
plt.xlabel('Seasonality Strength')
plt.ylabel('Time Series')
plt.title('Price Feed Rental Ranking of Time Series by Seasonality Strength')
plt.gca().invert_yaxis()
plt.show()