# Welcome to the Lab 🥼🧪

Exploratory data analysis of price feeds, portfolio construction, seasonality, etc. 

In [None]:
import os
import sys
import json
import requests
import subprocess
from datetime import datetime, timedelta
from urllib.request import urlopen

import parcllabs
import numpy as np
import pandas as pd
import seaborn as sns
from prophet import Prophet
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from parcllabs import ParclLabsClient
from pypfopt import EfficientFrontier, risk_models, expected_returns

api_key = os.getenv('PARCL_LABS_API_KEY')
print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
# Initialize the Parcl Labs client
client = ParclLabsClient(api_key)

In [None]:
# lets get all US markets currently available to trade on the Parcl Exchange
# Now lets say you want all price feed markets that are on the parcl exchange
market_df = client.search_markets.retrieve(
    sort_by='PARCL_EXCHANGE_MARKET',
    sort_order='DESC',
    as_dataframe=True,
    params={'limit': 15},  # expand the default limit to 14, as of this writing, 14 markets are available
)

In [None]:
# lets retrieve data back to 2011 for these price feeds
START_DATE = '2010-01-01'
feeds = client.price_feed.retrieve_many(
    parcl_ids=market_df['parcl_id'].tolist(),
    start_date=START_DATE,
    as_dataframe=True,
    params={'limit': 1000},  # expand the limit to 1000, these are daily series
    auto_paginate=True, # auto paginate to get all the data - WARNING: ~6k credits can be used in one parcl price feed. Change the START_DATE to a more recent date to reduce the number of credits used
)

In [None]:
feeds = feeds.merge(market_df[['parcl_id', 'name']], on='parcl_id', how='left')

In [None]:
feeds['date'] = pd.to_datetime(feeds['date'])
data = feeds.pivot(index='date', columns='name', values='price_feed')
market_returns = data.loc['2017':].pct_change().dropna()

In [None]:
market_returns

In [None]:
import cvxpy as cp

# Split data into in-sample (2010-2017) and out-of-sample (2017 onwards)
in_sample_data = data.loc[:'2017']
out_of_sample_data = data.loc['2017':]

# Initialize the portfolio performance record
performance_records = []
daily_performance = []

def optimize_portfolio(data, target_return=0.2):
    mu = expected_returns.mean_historical_return(data, frequency=365)
    S = risk_models.exp_cov(data, frequency=365)
    ef = EfficientFrontier(mu, S, weight_bounds=(-1, 1))
    weights = ef.efficient_return(target_return=target_return, market_neutral=True)
    cleaned_weights = ef.clean_weights()
    
    # Normalize weights
    sum_abs_weights = sum(abs(w) for w in cleaned_weights.values())
    normalized_weights = {k: v / sum_abs_weights for k, v in cleaned_weights.items()}
    
    # Calculate the performance of the normalized portfolio
    ef.weights = list(normalized_weights.values())
    performance = ef.portfolio_performance(verbose=True)
    
    return normalized_weights, performance

# Optimize the initial weights using in-sample data
initial_weights, initial_performance = optimize_portfolio(in_sample_data)

# Define a rolling window function to update weights every 30 days
def rolling_window_optimization(out_of_sample_data, initial_weights, window_size=30):
    current_weights = initial_weights
    start_date = out_of_sample_data.index[0]
    end_date = out_of_sample_data.index[-1]
    
    current_date = start_date
    while current_date <= end_date:
        window_end_date = current_date + pd.DateOffset(days=window_size)
        if window_end_date > end_date:
            window_end_date = end_date
        
        # Use all data up to the current window end date for optimization
        window_data = data.loc[:window_end_date]
        out_window_data = out_of_sample_data.loc[current_date:window_end_date]
        
        if len(out_window_data) < window_size:
            break
        
        current_weights, performance = optimize_portfolio(window_data)
        
        # Calculate daily returns
        for date, returns in out_window_data.pct_change().dropna().iterrows():
            daily_return = sum(current_weights[asset] * returns[asset] for asset in current_weights)
            daily_performance.append({'date': date, 'daily_return': daily_return})
        
        performance_records.append({
            'date': window_end_date,
            **current_weights,
            'performance': performance
        })
        
        current_date = window_end_date + pd.DateOffset(days=1)

# Run the rolling window optimization
rolling_window_optimization(out_of_sample_data, initial_weights)

# Convert performance records to DataFrame for analysis
performance_df = pd.DataFrame(performance_records)
daily_performance_df = pd.DataFrame(daily_performance)

# Plot the weights for each market
performance_df.set_index('date', inplace=True)
performance_df.drop(columns='performance', inplace=True)  # Remove the performance column for plotting

plt.figure(figsize=(24, 12))
for column in performance_df.columns:
    plt.plot(performance_df.index, performance_df[column], label=column)

plt.title('Asset Weights Over Time')
plt.xlabel('Date')
plt.ylabel('Weight')
plt.legend()
plt.show()


In [None]:
daily_performance_df.set_index('date', inplace=True)

In [None]:
import matplotlib.pyplot as plt

# Calculate cumulative returns of the optimized portfolio
cumulative_returns = (1 + daily_performance_df['daily_return']).cumprod() - 1

# Plot the cumulative returns
plt.figure(figsize=(10, 6))
plt.plot(cumulative_returns, label='Optimized Portfolio')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.title('Cumulative Returns of the Optimized Portfolio')
plt.legend()
plt.show()

In [None]:

pd.merge(market_returns.cumsum().reset_index(), cumulative_returns.reset_index(name='Weighted Portfolio'), on='date', how='inner').plot(x='date', figsize=(15, 10))

In [None]:

def build_chart(market_name: str, data: pd.DataFrame, pf_type: str = 'price_feed'):

    HEIGHT = 900
    WIDTH = 1600

    fig = go.Figure()

    # Get a list of up to 15 distinct colors from Plotly
    colors = px.colors.qualitative.Plotly

    # Add trace for the individual asset cumulative returns
    for i, column in enumerate(data.columns):
        if column != 'date' and column != 'Weighted Portfolio':
            color = colors[i % len(colors)]  # Cycle through the color list
            fig.add_trace(go.Scatter(
                x=data['date'],
                y=data[column] * 100,  # Convert to percentage
                mode='lines',
                line=dict(width=2, color=color),
                opacity=0.7,
                name=column
            ))

    # Add the logo image
    labs_logo_lookup = {
        'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
        'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
    }
    labs_logo_dict = dict(
        source=labs_logo_lookup['white'],
        xref="paper",
        yref="paper",
        x=0.5,
        y=1.01,
        sizex=0.2,
        sizey=0.2,
        xanchor="center",
        yanchor="bottom"
    )
    fig.add_layout_image(labs_logo_dict)

    # Add trace for the weighted portfolio cumulative returns
    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data['Weighted Portfolio'] * 100,  # Convert to percentage
        mode='lines',
        line=dict(width=3, color='red'),
        opacity=1.0,
        name='Weighted Portfolio'
    ))

    fig.add_layout_image(
        dict(
            source="path_to_your_logo_image.png",
            xref="paper", yref="paper",
            x=0.5, y=1.1,
            sizex=0.2, sizey=0.2,
            xanchor="center", yanchor="top"
        )
    )

    fig.update_layout(
        margin=dict(l=0, r=0, t=110, b=0),
        height=HEIGHT,
        width=WIDTH,
        title={
            'text': f'Cumulative Returns: {market_name}',
            'y': 0.99,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=28, color='#FFFFFF'),
        },
        plot_bgcolor='#000000',
        paper_bgcolor='#000000',
        font=dict(color='#FFFFFF'),
        xaxis=dict(
            title_text='',
            showgrid=False,
            tickangle=-45,
            tickfont=dict(size=14),
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        yaxis=dict(
            title_text='Cumulative Returns (%)',
            showgrid=True,
            gridwidth=0.5,
            gridcolor='rgba(255, 255, 255, 0.2)',
            tickfont=dict(size=14),
            ticksuffix='%',
            zeroline=False,
            linecolor='rgba(255, 255, 255, 0.7)',
            linewidth=1
        ),
        hovermode='x unified',
        hoverlabel=dict(
            bgcolor='#1F1F1F',
            font_size=14,
            font_family="Rockwell"
        ),
        legend=dict(
            x=0,
            y=1,
            traceorder="normal",
            font=dict(
                size=12,
                color="white"
            ),
            bgcolor="rgba(0,0,0,0)"
        )
    )

    root = f'../../graphics/{pf_type}'
    timestamp = datetime.now().strftime('%Y-%m-%d')
    path = os.path.join(root, timestamp)
    if not os.path.exists(path):
        os.makedirs(path)

    # Save the plot
    fig.write_image(os.path.join(path, f'{market_name}_{pf_type}.png'), width=WIDTH, height=HEIGHT)
    
    # Show the plot
    fig.show()

# Example usage
# Assuming returns and cumulative_returns are precomputed dataframes
merged_data = pd.merge(returns.cumsum().reset_index(), cumulative_returns.reset_index(name='Weighted Portfolio'), on='date', how='inner')
build_chart("Individual Markets vs. Weighted Portfolio", merged_data)


In [None]:
merged_data.head()

In [None]:
# seasonality analysis
from prophet import Prophet

In [None]:
# Function to fit Prophet model and extract seasonality
def fit_prophet_model(data):
    model = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
    model.fit(data)
    future = model.make_future_dataframe(periods=0)
    forecast = model.predict(future)
    return model, forecast

# Function to calculate seasonality strength
def seasonality_strength(forecast):
    seasonal_component = forecast['yearly'].values
    total_variance = forecast['yhat'].var()
    seasonal_variance = seasonal_component.var()
    return seasonal_variance / total_variance

# List of dataframes, each containing a time series
time_series_list = data.columns.tolist()

# Dictionary to store seasonality strengths
seasonality_strengths = {}

# Process each time series
for idx, market_name in enumerate(time_series_list):
    model, forecast = fit_prophet_model(data[market_name].reset_index(name='y').rename(columns={'date': 'ds'}))
    strength = seasonality_strength(forecast)
    seasonality_strengths[f'{market_name}'] = strength

# Convert to DataFrame for easier handling
seasonality_df = pd.DataFrame.from_dict(seasonality_strengths, orient='index', columns=['Seasonality Strength'])

# Rank the time series by seasonality strength
seasonality_df = seasonality_df.sort_values(by='Seasonality Strength', ascending=False)

# Plot the results
plt.figure(figsize=(10, 6))
plt.barh(seasonality_df.index, seasonality_df['Seasonality Strength'], color='skyblue')
plt.xlabel('Seasonality Strength')
plt.ylabel('Time Series')
plt.title('Price Feed Ranking of Time Series by Seasonality Strength')
plt.gca().invert_yaxis()
plt.show()