In [None]:
# %%
# Import necessary libraries for data handling, model utilization, and visualization
import numpy as np
import pandas as pd
import yfinance as yf  # For collecting financial data
import matplotlib.pyplot as plt
from typing import List, Dict, Optional, Tuple
from datetime import datetime, timedelta
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from scipy.optimize import minimize
from collections import deque
# Import the custom Model class
from Model import Model
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s')

# Set the random seed for reproducibility across numpy and tensorflow
np.random.seed(123)
tf.random.set_seed(123)

plt.style.use('seaborn-darkgrid')

# Define the tickers and date range with consideration of trading days
TICKERS = ['AGG', 'DBC', 'VTI', '^VIX']#, 'VEA', 'EEM', 'VNQ', 'IWM']

# Approximate number of trading days per year (useful for annualizing returns)
TRADING_DAYS_PER_YEAR = 252
# Define transaction cost rate
C = 0.0001  # 0.01%

# Confirm setup
print("Setup complete: libraries imported, random seed set, and tickers defined.")

In [None]:
# %%
# Data Collection Step
# Objective: Fetch historical adjusted close prices for defined tickers and date range

# Download data using yfinance for the specified tickers and date range
def get_data(tickers, start_date, end_date):
    """
    Retrieves historical adjusted close prices for the given tickers and date range.
    
    Parameters:
    - tickers: List of stock ticker symbols
    - start_date: Start date for historical data
    - end_date: End date for historical data
    
    Returns:
    - DataFrame of adjusted close prices, with each column representing a ticker
    """
    # Fetch asset prices
    asset_data = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
    asset_data.dropna(inplace=True)

    # Fetch risk-free rate (^IRX) which represents the 13-week Treasury Bill
    rf_ticker = '^IRX'
    rf_data = yf.download(rf_ticker, start=start_date, end=end_date)['Adj Close']
    rf_data = rf_data.resample('D').ffill()  # Forward-fill to get daily rates
    rf_data = rf_data / 100 / TRADING_DAYS_PER_YEAR  # Convert annual percentage to daily decimal
    rf_data.dropna(inplace=True)
    # Combine asset data and risk-free rates
    data = asset_data.join(rf_data.rename('Risk_Free_Rate_Daily'), how='left')
    data['Risk_Free_Rate_Daily'].fillna(method='ffill', inplace=True)

    return data

# Fetch the data and display a quick preview
data = get_data(TICKERS, '2006-01-01', '2024-11-26')
print("Data fetched successfully. Sample data:")
print(data.head())

# Confirm data spans the expected range and has the expected number of columns
print(f"Data covers {len(data)} trading days with {len(data.columns)} assets.")

In [None]:
# %%
def preprocess_data(data, rolling_window=50):
    """
    Prepares data by calculating window-day rolling averages and returns.

    Parameters:
    - data: DataFrame of historical adjusted close prices for assets
    - rolling_window: Window size for the rolling average

    Returns:
    - normalized_data: Smoothed prices, normalized to start at 1 for each asset
    - returns: Smoothed returns using a rolling mean of percentage changes
    """
    # Calculate rolling mean for prices and returns to smooth the data
    smoothed_prices = (data.rolling(window=rolling_window).mean()).dropna()
    smoothed_returns = (data.pct_change().rolling(window=rolling_window).mean()).dropna()
    # Normalize prices to start each asset's time series at 1
    normalized_data = smoothed_prices / smoothed_prices.iloc[0]
    
    return normalized_data, smoothed_returns


# Run preprocessing and display sample data
normalized_data, smoothed_returns = preprocess_data(data)
print("Data preprocessing complete. Sample normalized data:")
print(normalized_data.head())
print("\nSample daily returns:")
print(smoothed_returns.head())


In [None]:
class Portfolio:
    def __init__(self, initial_cash: float, asset_names: List[str], transaction_cost: float = 0.0001):
        self.initial_cash = initial_cash
        self.current_value = initial_cash
        self.transaction_cost_rate = transaction_cost
        self.asset_names = asset_names
        self.current_weights = np.zeros(len(asset_names))
        self.portfolio_values = []
        self.weights_history = []
        self.dates = []
        self.transaction_cost = 0.0  # Initialize transaction cost

    def rebalance(self, new_weights: np.array):
        # Compute transaction costs
        if len(self.weights_history) == 0:
            transaction_cost = self.transaction_cost_rate * np.sum(np.abs(new_weights))
        else:
            transaction_cost = self.transaction_cost_rate * np.sum(np.abs(new_weights - self.current_weights))
        self.transaction_cost = transaction_cost * self.current_value
        self.current_weights = new_weights.copy()
        self.weights_history.append(self.current_weights.copy())

    def update_portfolio_value(self, asset_returns: np.array, current_date: pd.Timestamp):
        # Compute portfolio return
        portfolio_return = np.dot(self.current_weights, asset_returns)
        self.current_value = self.current_value * (1 + portfolio_return) - self.transaction_cost
        self.transaction_cost = 0.0
        self.portfolio_values.append(self.current_value)
        self.dates.append(current_date)
    
    def get_portfolio_values(self):
        return self.portfolio_values


In [None]:
def calculate_metrics(portfolio_values, risk_free_rates):
    """
    Calculates performance metrics for the portfolio, incorporating historical risk-free rates.

    Parameters:
    - portfolio_values: List of daily portfolio values over the testing period.
    - risk_free_rates: Array of daily risk-free rates corresponding to portfolio_returns.

    Returns:
    - metrics: Dictionary containing various performance metrics.
    """
    # Drop risk free rate from portfolio values
    portfolio_values = portfolio_values[:-1]
    # Convert portfolio values to daily returns
    portfolio_returns = np.diff(portfolio_values) / portfolio_values[:-1]
    
    # Ensure risk_free_rates aligns with portfolio_returns
    risk_free_rates = risk_free_rates[:len(portfolio_returns)]
    
    # Compute excess returns
    excess_returns = portfolio_returns - risk_free_rates

    # Number of days
    N = len(portfolio_returns)

    # Calculate Sharpe Ratio using excess returns
    mean_excess_return = np.mean(excess_returns)
    std_dev = np.std(portfolio_returns)  # Use actual returns for std dev
    sharpe_ratio = mean_excess_return / std_dev * np.sqrt(TRADING_DAYS_PER_YEAR)

    # Calculate Sortino Ratio
    downside_returns = excess_returns[excess_returns < 0]
    downside_std_dev = np.std(downside_returns) if len(downside_returns) > 0 else 0
    sortino_ratio = (mean_excess_return / downside_std_dev) * np.sqrt(TRADING_DAYS_PER_YEAR) if downside_std_dev != 0 else np.nan

    # Calculate Maximum Drawdown
    cumulative_max = np.maximum.accumulate(portfolio_values[:len(portfolio_returns)])
    drawdowns = (cumulative_max - portfolio_values[:len(cumulative_max)]) / cumulative_max
    max_drawdown = np.max(drawdowns)

    # Expected return (annualized)
    cumulative_return = (portfolio_values[-1] / portfolio_values[0])
    annualized_return = (cumulative_return) ** (TRADING_DAYS_PER_YEAR / N) - 1
    print(f"cumulative_return: {cumulative_return}, annualized_return: {annualized_return}")

    # Standard deviation of returns (annualized)
    annualized_std = std_dev * np.sqrt(TRADING_DAYS_PER_YEAR)

    # Percentage of positive returns
    positive_returns = portfolio_returns[portfolio_returns > 0]
    percentage_positive = len(positive_returns) / len(portfolio_returns) * 100

    # Average profit / average loss (profit/loss ratio)
    average_profit = np.mean(portfolio_returns[portfolio_returns > 0]) if len(positive_returns) > 0 else 0
    average_loss = np.mean(portfolio_returns[portfolio_returns < 0]) if len(portfolio_returns[portfolio_returns < 0]) > 0 else 0
    profit_loss_ratio = (average_profit / -average_loss) if average_loss != 0 else np.nan

    metrics = {
        "Annualized Return": annualized_return,
        "Annualized Std Dev": annualized_std,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Max Drawdown": max_drawdown,
        "% Positive Returns": percentage_positive,
        "Profit/Loss Ratio": profit_loss_ratio
    }
    
    return metrics


In [None]:
# %%
def equal_weighted_strategy(returns):
    """
    Creates an equal-weighted portfolio.

    Parameters:
    - returns: DataFrame of daily returns for each asset.

    Returns:
    - equal_weights: Numpy array of equal weights for each asset.
    """
    num_assets = returns.shape[1]
    equal_weights = np.ones(num_assets) / num_assets
    return equal_weights

# Define function to get MV weights
def mean_variance_optimized_strategy(returns, expected_rf):
    """
    Creates a mean-variance optimized portfolio by maximizing the Sharpe Ratio.

    Parameters:
    - returns: DataFrame of daily returns for each asset.

    Returns:
    - optimized_weights: Numpy array of optimized weights for each asset.
    """
    mean_returns = returns.mean() - expected_rf
    cov_matrix = returns.cov()
    
    def neg_sharpe(weights):
        portfolio_return = np.dot(weights, mean_returns)
        portfolio_std = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        return -portfolio_return / portfolio_std

    # Constraints: Weights must sum to 1, and each weight must be between 0 and 1
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    bounds = tuple((0, 1) for _ in range(returns.shape[1]))

    result = minimize(neg_sharpe, np.ones(returns.shape[1]) / returns.shape[1], bounds=bounds, constraints=constraints)
    optimized_weights = result.x
    optimized_weights = np.maximum(optimized_weights, 0)  # Avoid negative weights
    return optimized_weights

# Define function to get MD weights
def maximum_diversification(returns):
    """
    Perform maximum diversification optimization based on the given returns.

    Parameters:
    - returns: DataFrame of daily returns for each asset.

    Returns:
    - optimal_weights: Array of portfolio weights that maximize diversification.
    """
    # Calculate asset volatilities (standard deviation of each asset’s returns)
    asset_volatilities = returns.std()

    # Calculate the covariance matrix of returns
    cov_matrix = returns.cov()

    # Define the diversification ratio to be maximized
    def neg_diversification_ratio(weights):
        # Calculate the weighted average asset volatility
        weighted_volatility = np.dot(weights, asset_volatilities)
        
        # Calculate the portfolio volatility as the weighted covariance matrix
        portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        
        # Diversification ratio (we negate this because we want to maximize it)
        diversification_ratio = weighted_volatility / portfolio_volatility
        return -diversification_ratio  # Negate to turn this into a minimization problem

    # Constraints: weights sum to 1, and each weight between 0 and 1 (long-only portfolio)
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    bounds = tuple((0, 1) for _ in range(len(asset_volatilities)))

    # Initial guess (equal allocation)
    init_guess = np.ones(len(asset_volatilities)) / len(asset_volatilities)

    # Optimize to find weights that maximize diversification ratio
    result = minimize(neg_diversification_ratio, init_guess, bounds=bounds, constraints=constraints)
    optimal_weights = result.x
    
    return optimal_weights


In [None]:
def average_metrics(metrics_list):
    """
    Calculates the average of each metric in the list of metrics.
    """
    avg_metrics = {}
    keys = metrics_list[0].keys()
    for key in keys:
        avg_metrics[key] = np.mean([m[key] for m in metrics_list])
    return avg_metrics


In [None]:
def plot_performance_comparison(portfolio_histories, periods, strategies):
    """
    Plots the actual portfolio values for each strategy across all testing periods in a grid format,
    swapping rows and columns such that rows represent strategies and columns represent periods.
    
    Parameters:
    - portfolio_histories: Dictionary containing portfolio value histories for each strategy.
    - periods: List of tuples containing (training_end, testing_start, testing_end) for each period.
    - strategies: List of strategy names to plot.
    """
    num_periods = len(periods)
    num_strategies = len(strategies)
    
    # Set up the matplotlib figure
    fig, axes = plt.subplots(num_strategies, num_periods, figsize=(5*num_periods, 3*num_strategies), sharex=False, sharey=False)
    fig.suptitle('Portfolio Performance Comparison (Raw Values)', fontsize=16, y=0.92)
    
    for strat_idx, strategy in enumerate(strategies):
        for period_idx, period in enumerate(periods):
            training_start, training_end, testing_start, testing_end = period
            
            # Handle axes indexing for single row or column
            if num_periods == 1 and num_strategies == 1:
                ax = axes
            elif num_periods == 1:
                ax = axes[strat_idx]
            elif num_strategies == 1:
                ax = axes[period_idx]
            else:
                ax = axes[strat_idx, period_idx]
            
            # Get the corresponding portfolio history
            history = portfolio_histories[strategy][period_idx]
            dates = history['dates']
            values = history['values']
            # Plot raw portfolio values
            ax.plot(dates, values, label=f"{strategy}")
            ax.set_title(f"{strategy} | {testing_start} to {testing_end}")
            ax.set_xlabel('Date')
            ax.set_ylabel('Portfolio Value ($)')
            ax.legend()
            ax.grid(True)
    
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()


In [None]:
def plot_weights_comparison(portfolio_histories, periods, strategies, asset_names):
    """
    Plots the portfolio weights for each strategy across all testing periods in a grid format,
    with each asset having its own line on the graph, colored differently and with a legend.

    Parameters:
    - portfolio_histories: Dictionary containing portfolio histories for each strategy.
      Each entry should have 'dates', 'values', and 'weights'.
    - periods: List of tuples containing (training_end, testing_start, testing_end) for each period.
    - strategies: List of strategy names to plot.
    - asset_names: List of asset names corresponding to portfolio weights.
    """
    num_periods = len(periods)
    num_strategies = len(strategies)
    
    # Set up the matplotlib figure
    fig, axes = plt.subplots(num_strategies, num_periods, figsize=(5*num_periods, 3*num_strategies), sharex=False, sharey=False)
    fig.suptitle('Portfolio Weights Comparison', fontsize=16, y=0.92)
    
    # If there's only one strategy or one period, adjust axes accordingly
    if num_strategies == 1 and num_periods == 1:
        axes = np.array([[axes]])
    elif num_strategies == 1:
        axes = np.expand_dims(axes, axis=0)
    elif num_periods == 1:
        axes = np.expand_dims(axes, axis=1)
    
    for strat_idx, strategy in enumerate(strategies):
        for period_idx, period in enumerate(periods):
            training_start, training_end, testing_start, testing_end = period
            
            ax = axes[strat_idx, period_idx]
            
            # Get the corresponding portfolio history
            history = portfolio_histories[strategy][period_idx]
            dates = history['dates']
            weights = np.array(history['weights'])  # Shape: (num_days, num_assets)
            
            # Create a DataFrame for easier plotting
            weights_df = pd.DataFrame(weights, index=dates, columns=asset_names)
            
            # Plot each asset's weight over time
            for asset in asset_names:
                ax.plot(weights_df.index, weights_df[asset], label=asset)
            
            ax.set_title(f"{strategy} | {testing_start} to {testing_end}")
            ax.set_xlabel('Date')
            ax.set_ylabel('Weight')
            ax.legend(fontsize='small', ncol=2)
            ax.grid(True)
    
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()


In [None]:
# %%
# Define testing periods
training_start_dates = ['2009-01-01','2011-01-01', '2013-01-01', '2015-01-01', '2017-01-01', '2019-01-01','2021-01-01']
training_end_dates = ['2010-12-31', '2012-12-31', '2014-12-31', '2016-12-31', '2018-12-31','2020-12-31','2022-12-31']
testing_start_dates = ['2011-01-01', '2013-01-01', '2015-01-01', '2017-01-01', '2019-01-01' ,'2021-01-01','2023-01-01']
testing_end_dates = ['2012-12-31', '2014-12-31', '2016-12-31', '2018-12-31', '2020-12-31','2022-12-31','2024-11-25']

periods = list(zip(training_start_dates,training_end_dates, testing_start_dates, testing_end_dates))

# Initialize lists to store performance metrics for each model
lstm_metrics = []
mvo_metrics = []
md_metrics = []
ew_metrics = []
# Initialize a dictionary to store portfolio histories for each strategy
portfolio_histories = {
    'LSTM': [],
    'MVO': [],
    'MD': [],
    'EW': []
}

initial_cash = 100000

In [None]:
def run_backtest(data, periods, initial_cash, C, rebalancing_frequency=1, lstm_window_size=50, mvo_md_window_size=50,rolling_window=None):
    portfolio_histories = {
        'LSTM': [],
        'MVO': [],
        'MD': [],
        'EW': []
    }
    lstm_metrics = []
    mvo_metrics = []
    md_metrics = []
    ew_metrics = []
    for period in periods:
        training_start,training_end, testing_start, testing_end = period
        print(f"\nProcessing period: Training up to {training_end}, Testing from {testing_start} to {testing_end}")

        # Split the data into training and testing sets
        if(rolling_window):
            training_data = data.loc[training_start:training_end].copy()
        else:
            training_data = data.loc[:training_end].copy()
        testing_data = data.loc[testing_start:testing_end].copy()
        testing_returns = testing_data.pct_change().fillna(0)
        testing_returns = testing_returns.drop('Risk_Free_Rate_Daily', axis=1)
        # Ensure there is enough data for testing
        if len(testing_data) < 1:
            print("Not enough data for testing period")
            continue

        # -------------------------------------------
        # LSTM Model
        # -------------------------------------------

        # Train the LSTM model
        lstm_model = Model()
        lstm_model.train(training_data)

        # Initialize the portfolio for LSTM strategy
        portfolio_lstm = Portfolio(
            initial_cash,
            asset_names=testing_returns.columns.tolist(),
            transaction_cost=C
        )

        # Combine the last 'lstm_window_size' days of training data with the testing data
        combined_data = pd.concat(
            [training_data.tail(lstm_window_size), testing_data],
            ignore_index=False
        )

        # Calculate daily returns
        returns = combined_data.drop('Risk_Free_Rate_Daily', axis=1).pct_change().fillna(0)

        # Ensure 'Risk_Free_Rate_Daily' is present in combined_data
        if 'Risk_Free_Rate_Daily' not in combined_data.columns:
            # Fetch risk-free rate data
            rf_data = data.loc[combined_data.index, 'Risk_Free_Rate_Daily']
            combined_data = combined_data.join(rf_data.rename('Risk_Free_Rate_Daily'))

        # Create combined_data similar to training
        # **Critical Fix:** Concatenate only once to maintain 9 columns
        combined_data = pd.concat([combined_data, returns], axis=1)

        # Assign correct column names
        combined_data.columns = [f"{ticker}_price" for ticker in testing_returns.columns] + \
                                [f"{ticker}_return" for ticker in testing_returns.columns] + \
                                ['Risk_Free_Rate_Daily']
        
        last_rebalance_day = -rebalancing_frequency  # Ensure the first day is a rebalance

        # Now, in the loop, we can get input_sequence directly
        for idx, current_date in enumerate(testing_data.index):
            # Calculate the end index for the input sequence in combined_data
            end_idx = lstm_window_size + idx  # idx starts from 0

            # Check if it's time to rebalance
            if (idx - last_rebalance_day) >= rebalancing_frequency or idx == 0:
                # Extract the input sequence: the preceding 'lstm_window_size' days
                input_sequence = combined_data.iloc[end_idx - lstm_window_size:end_idx].values

                # Predict the allocation using the LSTM model
                allocation = lstm_model.predict_allocation(input_sequence)

                # Rebalance the portfolio with the new allocation
                portfolio_lstm.rebalance(allocation)

                last_rebalance_day = idx  # Update last rebalance day
            else:
                portfolio_lstm.weights_history.append(portfolio_lstm.weights_history[-1])
            
            # Get today's return
            today_return = testing_returns.iloc[idx].values

            # Update the portfolio value based on today's return
            portfolio_lstm.update_portfolio_value(today_return, current_date)

        # Store the LSTM portfolio history
        portfolio_values_lstm = portfolio_lstm.get_portfolio_values()
        portfolio_dates_lstm = portfolio_lstm.dates
        portfolio_histories['LSTM'].append({
            'dates': portfolio_dates_lstm,
            'values': portfolio_values_lstm,
            'weights': portfolio_lstm.weights_history
        })

        # Calculate and store performance metrics for LSTM
        metrics_lstm = calculate_metrics(
            portfolio_values_lstm, 
            data.loc[testing_start:testing_end, 'Risk_Free_Rate_Daily'].values
        )
        lstm_metrics.append(metrics_lstm)

        # -------------------------------------------
        # Mean-Variance Optimization (MVO) Model
        # -------------------------------------------

        # Calculate full returns up to the end of the testing period
        returns_full_testing = data.loc[:testing_end].pct_change().dropna()
        returns_full_testing = returns_full_testing.drop('Risk_Free_Rate_Daily', axis=1)
        # Returns only for the testing period
        returns_testing = data.loc[testing_start:testing_end].pct_change().dropna()
        returns_testing = returns_testing.drop('Risk_Free_Rate_Daily', axis=1)
        # Get indices of testing dates in returns_full_testing
        testing_indices = returns_full_testing.index.get_indexer_for(returns_testing.index)

        # Initialize the portfolio for MVO strategy
        portfolio_mv = Portfolio(
            initial_cash,
            asset_names=testing_returns.columns.tolist(),
            transaction_cost=C
        )

        # Initialize a variable to track rebalancing schedule
        last_rebalance_day = -rebalancing_frequency  # Ensure the first day is a rebalance

        for idx, i in enumerate(testing_indices):
            # i is the index in returns_full_testing corresponding to the current testing day
            current_date = returns_testing.index[idx]

            # Check if it's time to rebalance
            if (idx - last_rebalance_day) >= rebalancing_frequency or idx == 0:
                # Ensure we have enough data for the window
                if i >= mvo_md_window_size - 1:
                    # Extract the past 'mvo_md_window_size' days of returns ending on current date
                    input_data = returns_full_testing.iloc[i - mvo_md_window_size:i]
                    # Extract corresponding risk-free rates
                    rf_data = data.loc[input_data.index, 'Risk_Free_Rate_Daily']
                    expected_rf = rf_data.mean()  # Calculate expected risk-free rate
                else:
                    # Not enough data, skip rebalancing
                    print(f"Not enough data for MVO window on {current_date}")
                    input_data = returns_full_testing.iloc[:i]
                    rf_data = data.loc[input_data.index, 'Risk_Free_Rate_Daily']
                    expected_rf = rf_data.mean()  # Calculate expected risk-free rate

                weights_mv = mean_variance_optimized_strategy(input_data, expected_rf)  # Pass expected_rf

                # Rebalance the portfolio with MVO weights
                portfolio_mv.rebalance(weights_mv)

                last_rebalance_day = idx  # Update last rebalance day
            else:
                # Maintain previous weights
                portfolio_mv.weights_history.append(portfolio_mv.weights_history[-1])

            # Get today's return
            today_return = returns_testing.iloc[idx].values

            # Update portfolio value
            portfolio_mv.update_portfolio_value(today_return, current_date)

        # Store MVO portfolio history
        portfolio_values_mv = portfolio_mv.get_portfolio_values()
        portfolio_dates_mv = portfolio_mv.dates
        portfolio_histories['MVO'].append({
            'dates': portfolio_dates_mv,
            'values': portfolio_values_mv,
            'weights': portfolio_mv.weights_history
        })

        # Calculate and store performance metrics for MVO
        metrics_mv = calculate_metrics(
            portfolio_values_mv, 
            data.loc[testing_start:testing_end, 'Risk_Free_Rate_Daily'].values
        )
        mvo_metrics.append(metrics_mv)

        # -------------------------------------------
        # Maximum Diversification (MD) Model
        # -------------------------------------------

        # Initialize the portfolio for MD strategy
        portfolio_md = Portfolio(
            initial_cash,
            asset_names=testing_returns.columns.tolist(),
            transaction_cost=C
        )

        # Initialize a variable to track rebalancing schedule
        last_rebalance_day = -rebalancing_frequency  # Ensure the first day is a rebalance

        for idx, i in enumerate(testing_indices):
            # i is the index in returns_full_testing corresponding to the current testing day
            current_date = returns_testing.index[idx]

            # Check if it's time to rebalance
            if (idx - last_rebalance_day) >= rebalancing_frequency or idx == 0:
                # Ensure we have enough data for the window
                if i >= mvo_md_window_size - 1:
                    # Extract the past 'mvo_md_window_size' days of returns ending on current date
                    input_data = returns_full_testing.iloc[i - mvo_md_window_size:i]
                else:
                    # Not enough data, skip rebalancing
                    print(f"Not enough data for MD window on {current_date}")
                    input_data = returns_full_testing.iloc[:i]

                weights_md = maximum_diversification(input_data)

                # Rebalance the portfolio with MD weights
                portfolio_md.rebalance(weights_md)

                last_rebalance_day = idx  # Update last rebalance day
            else:
                # Maintain previous weights
                portfolio_md.weights_history.append(portfolio_md.weights_history[-1])

            # Get today's return
            today_return = returns_testing.iloc[idx].values

            # Update portfolio value
            portfolio_md.update_portfolio_value(today_return, current_date)

        # Store MD portfolio history
        portfolio_values_md = portfolio_md.get_portfolio_values()
        portfolio_dates_md = portfolio_md.dates
        portfolio_histories['MD'].append({
            'dates': portfolio_dates_md,
            'values': portfolio_values_md,
            'weights': portfolio_md.weights_history
        })

        # Calculate and store performance metrics for MD
        metrics_md = calculate_metrics(
            portfolio_values_md, 
            data.loc[testing_start:testing_end, 'Risk_Free_Rate_Daily'].values
        )
        md_metrics.append(metrics_md)

        # -------------------------------------------
        # Equal Weighted (EW) Strategy
        # -------------------------------------------

        # Initialize the portfolio for EW strategy
        portfolio_ew = Portfolio(
            initial_cash,
            asset_names=testing_returns.columns.tolist(),
            transaction_cost=C
        )

        weights_ew = equal_weighted_strategy(returns_full_testing)

        for idx in range(len(testing_returns)):
            today_return = testing_returns.iloc[idx].values
            current_date = testing_returns.index[idx]
            if idx % 252 == 0:
                # Rebalance yearly
                portfolio_ew.rebalance(weights_ew)
            else:
                # Maintain previous weights
                portfolio_ew.weights_history.append(portfolio_ew.weights_history[-1])
            portfolio_ew.update_portfolio_value(today_return, current_date)

        # Store EW portfolio history
        portfolio_values_ew = portfolio_ew.get_portfolio_values()
        portfolio_dates_ew = portfolio_ew.dates
        portfolio_histories['EW'].append({
            'dates': portfolio_dates_ew,
            'values': portfolio_values_ew,
            'weights': portfolio_ew.weights_history
        })

        # Calculate and store performance metrics for EW
        metrics_ew = calculate_metrics(
            portfolio_values_ew, 
            data.loc[testing_start:testing_end, 'Risk_Free_Rate_Daily'].values
        )
        ew_metrics.append(metrics_ew)

    # Return the portfolio histories and metrics
    return portfolio_histories, lstm_metrics, mvo_metrics, md_metrics, ew_metrics


In [None]:
# Run the backtest
portfolio_histories, lstm_metrics, mvo_metrics, md_metrics, ew_metrics = run_backtest(
    data,
    periods,
    initial_cash,
    C,
    rebalancing_frequency=1,
    lstm_window_size=50,
    mvo_md_window_size=50,
)

In [None]:
# Calculate average metrics for each model
lstm_avg_metrics = average_metrics(lstm_metrics)
mvo_avg_metrics = average_metrics(mvo_metrics)
md_avg_metrics = average_metrics(md_metrics)
ew_avg_metrics = average_metrics(ew_metrics)

In [None]:
# Print the average metrics
print("\nAverage Metrics for LSTM Model:")
for key, value in lstm_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for MVO Strategy:")
for key, value in mvo_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for MD Strategy:")
for key, value in md_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for Equal Weighted Strategy:")
for key, value in average_metrics(ew_metrics).items():
    print(f"{key}: {value:.4f}")

In [None]:
strategies = ['LSTM','MVO', 'MD', 'EW']
plot_performance_comparison(portfolio_histories, periods, strategies)
for i in range(len(mvo_metrics)):
    print(f"Period {i+1}")
    print("LSTM Metrics:")
    for key, value in lstm_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("MVO Metrics:")
    for key, value in mvo_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("MD Metrics:")
    for key, value in md_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("EW Metrics:")
    for key, value in ew_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("\n")

In [None]:
strategies = ['LSTM','MVO', 'MD']
asset_names = TICKERS
plot_weights_comparison(portfolio_histories, periods, strategies, asset_names)

In [None]:
TICKERS = ['AAPL','MSFT','AMZN','GOOG']
data = get_data(TICKERS, '2006-01-01', '2024-11-26')
portfolio_histories, lstm_metrics, mvo_metrics, md_metrics, ew_metrics = run_backtest(
    data,
    periods,
    initial_cash,
    C,
    rebalancing_frequency=1,
    lstm_window_size=50,
    mvo_md_window_size=50,
)

In [None]:
#Calculate average metrics for each model
lstm_avg_metrics = average_metrics(lstm_metrics)
mvo_avg_metrics = average_metrics(mvo_metrics)
md_avg_metrics = average_metrics(md_metrics)
ew_avg_metrics = average_metrics(ew_metrics)
#Print the average metrics
print("\nAverage Metrics for LSTM Model:")
for key, value in lstm_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for MVO Strategy:")
for key, value in mvo_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for MD Strategy:")
for key, value in md_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for Equal Weighted Strategy:")
for key, value in average_metrics(ew_metrics).items():
    print(f"{key}: {value:.4f}")

In [None]:
#Plot the performance comparison
strategies = ['LSTM','MVO', 'MD', 'EW']
plot_performance_comparison(portfolio_histories, periods, strategies)

#Print the metrics for each period
for i in range(len(mvo_metrics)):
    print(f"Period {i+1}")
    print("LSTM Metrics:")
    for key, value in lstm_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("MVO Metrics:")
    for key, value in mvo_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("MD Metrics:")
    for key, value in md_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("EW Metrics:")
    for key, value in ew_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("\n")

#Plot the weights comparison
strategies = ['LSTM','MVO', 'MD']
asset_names = TICKERS
plot_weights_comparison(portfolio_histories, periods, strategies, asset_names)

In [None]:
TICKERS = ['O', 'AMT', 'SPG', 'PLD', 'VTR']
data = get_data(TICKERS, '2006-01-01', '2024-11-26')
portfolio_histories, lstm_metrics, mvo_metrics, md_metrics, ew_metrics = run_backtest(
    data,
    periods,
    initial_cash,
    C,
    rebalancing_frequency=1,
    lstm_window_size=50,
    mvo_md_window_size=50,
)

In [None]:
lstm_avg_metrics = average_metrics(lstm_metrics)
mvo_avg_metrics = average_metrics(mvo_metrics)
md_avg_metrics = average_metrics(md_metrics)
ew_avg_metrics = average_metrics(ew_metrics)

print("\nAverage Metrics for LSTM Model:")
for key, value in lstm_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for MVO Strategy:")
for key, value in mvo_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for MD Strategy:")
for key, value in md_avg_metrics.items():
    print(f"{key}: {value:.4f}")

print("\nAverage Metrics for Equal Weighted Strategy:")
for key, value in average_metrics(ew_metrics).items():
    print(f"{key}: {value:.4f}")

In [None]:
strategies = ['LSTM','MVO', 'MD', 'EW']
plot_performance_comparison(portfolio_histories, periods, strategies)

for i in range(len(mvo_metrics)):
    print(f"Period {i+1}")
    print("LSTM Metrics:")
    for key, value in lstm_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("MVO Metrics:")
    for key, value in mvo_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("MD Metrics:")
    for key, value in md_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("EW Metrics:")
    for key, value in ew_metrics[i].items():
        print(f"{key}: {value:.4f}")
    print("\n")

strategies = ['LSTM','MVO', 'MD']
asset_names = TICKERS
plot_weights_comparison(portfolio_histories, periods, strategies, asset_names)