In [2]:
import pandas as pd
import numpy as np

import datetime
import os, sys
import importlib

import utils
importlib.reload(utils)

from utils import plot_series, plot_series_with_names, plot_series_bar
from utils import plot_dataframe
from utils import get_universe_adjusted_series, scale_weights_to_one, scale_to_book_long_short
from utils import generate_portfolio, backtest_portfolio
from utils import match_implementations

import plotly.graph_objects as go

In [3]:
# This directory can be used if you're working on a Kaggle Notebook inside the competition
# Change the directory as per your requirements if you're working somewhere else
data_dir = "/kaggle/input/qrt-quant-quest-iit-bombay-2025/"

features = pd.read_parquet(os.path.join(data_dir, "features.parquet"))

universe = pd.read_parquet(os.path.join(data_dir, "universe.parquet"))
 
returns = pd.read_parquet(os.path.join(data_dir, "returns.parquet"))

In [4]:
train_features = features.loc["2005":"2015"]
validation_features = features.loc["2016":"2018"]
test_features = features.loc["2019"]

train_universe = universe.loc["2005":"2015"]
validation_universe = universe.loc["2016":"2018"]
test_universe = universe.loc["2019"]

train_returns = returns.loc["2005":"2015"]
validation_returns = returns.loc["2016":"2018"]
test_returns = returns.loc["2019"]

In [5]:
def backtest_portfolio_new(portfolio: pd.DataFrame, returns: pd.DataFrame, universe: pd.DataFrame, plot_: bool, print_: bool):

    """
    Computes performance metrics from a given portfolio DataFrame.

    This function calculates the net Sharpe ratio, along with other portfolio metrics,
    ensuring that certain constraints are met. It performs checks for:
    - Shape alignment of input DataFrames.
    - portfolio weights only in stocks that are part of the universe.
    - Dollar neutrality.
    - Unit capital constraint.
    - Maximum weight constraint.

    Parameters:
    ----------
    portfolio : pd.DataFrame
        DataFrame representing portfolio over time.
    returns : pd.DataFrame
        DataFrame containing stock returns corresponding to the portfolio.
    universe : pd.DataFrame
        Boolean DataFrame indicating whether a stock is part of the investable universe.
    plot_: bool
        Boolean Flag which decides whether to plot the cumulative PnL
    print_: bool
        Boolean Flag which decides whether to print the metrics of backtest

    Raises:
    ------
    ValueError
        If the input DataFrames do not have matching shapes.
        If portfolio contain stocks that are not in the universe.
        If the portfolio violates the dollar neutrality constraint.
        If the unit capital constraint is violated.
        If the maximum weight constraint is exceeded.

    Returns
    -------
    tuple
        - Net Sharpe Ratio (rounded to 3 decimal places).
        - Pandas Series containing gross PnL over time.

    Additional Outputs:
    -------------------
    - Prints the gross and net Sharpe ratios.
    - Prints the turnover percentage.
    - Plots cumulative Gross and Net PnL.

    Notes:
    ------
    - The turnover is calculated as the average traded capital divided by the average book value.
    - The gross and net Sharpe ratios are annualized using a factor of √252.
    - The net PnL accounts for trading costs (assumed to be 0.01% per unit traded).
    """

    universe = universe.astype(bool)
    
    if not (portfolio.shape == returns.shape == universe.shape):
        raise ValueError("Shapes of portfolio, returns and universe are not algined")

    if ((portfolio.replace(0, np.nan))[~universe].notna().sum().sum() != 0):
        raise ValueError("Your portfolio are present for a stock not present in the universe")

    if ((portfolio.sum(1).abs() > 0.01).sum() > 0):
        raise ValueError("Dollar Neutral Constraint is violated")
    
    if (((portfolio.abs().sum(1) - 1) > 0.01).sum() > 0):
        raise ValueError("Unit Capital Constraint is violated")

    if ((portfolio.abs().max(1) > 0.1).sum() > 0):
        raise ValueError("Maximum Weight Constraint is violated")

    portfolio = portfolio.fillna(0)

    rets = returns.fillna(0)

    gross_pnl = (portfolio * rets).sum(axis=1)

    traded = portfolio.diff(1).abs().sum(axis=1).fillna(0)

    net_pnl = gross_pnl - traded * 1e-4

    gross_sharpe_ratio = (gross_pnl.mean() / gross_pnl.std()) * np.sqrt(252)

    net_sharpe_ratio = (net_pnl.mean() / net_pnl.std()) * np.sqrt(252)

    if print_:
        print("Gross Sharpe Ratio: ", round(gross_sharpe_ratio, 3))
        print("Net Sharpe Ratio: ", round(net_sharpe_ratio, 3))

    return round(net_sharpe_ratio, 3), gross_pnl

In [6]:
def normalize_signal(feature, universe_boolean):
    """
    Normalize a feature signal for a given universe.
    """
    signal = feature.shift(1)  # Avoid lookahead bias
    signal = signal.where(universe_boolean, np.nan)
    signal = signal.rank(axis=1, method="min", ascending=True)
    signal = signal.sub(signal.mean(axis=1), axis=0)
    signal = signal.div(signal.abs().sum(axis=1), axis=0)
    return signal

def generate_portfolio_vectorized(features, universe, weights):
    """
    Generate portfolio weights using a vectorized approach.
    """
    universe_boolean = universe.astype(bool)
    
    # Combine signals into a portfolio
    portfolio_weights = pd.DataFrame(0, index=features.index, columns=features.columns.get_level_values(1).unique())
    
    for i, feature_name in enumerate(features.columns.levels[0]):
        feature_data = features[feature_name]
        normalized_signal = normalize_signal(feature_data, universe_boolean)
        portfolio_weights += normalized_signal * weights[i]
    
    # Enforce dollar neutrality and normalize weights to sum to 1 per day
    portfolio_weights = portfolio_weights.sub(portfolio_weights.mean(axis=1), axis=0)
    portfolio_weights = portfolio_weights.div(portfolio_weights.abs().sum(axis=1), axis=0)
    
    return portfolio_weights.fillna(0)


In [15]:
initial_weights = {
   'accumulation_distribution_index': -10.0,
   'aroon': -0.09222290817164171,
   'average_true_range': -10.0,
   'chaikin_money_flow': -10.0,
   'chande_momentum_oscillator': 3.318011135314672,
   'commodity_channel_index': 10.0,
   'ease_of_movement': 5.821824288780287,
   'ichimoku': -10.0,
   'know_sure_thing': 10.0,
   'macd': -3.867991947270643,
   'on_balance_volume': 1.8025161396173899,
   'relative_strength_index': 0.6147723614862542,
   'stochastic_oscillator': -10.0,
   'trend_1_3': 10.0,
   'trend_20_60': -10.0,
   'trend_5_20': -10.0,
   'trix': -0.8681094745966318,
   'ultimate_oscillator': 1.3625158710488225,
   'volatility_20': 10.0,
   'volatility_60': -10.0,
   'volume': 10.0,
   'williams_r': -6.987324378181491
}

# Define Bayesian Optimization function
def optimize_signal_weights_bayesian(features_train, features_validation, universe_train, universe_validation, returns_train, returns_validation):
    
    def sharpe_ratio_objective(**weights):
        weight_array = np.array([weights[key] for key in sorted(weights.keys())])
        
        # Generate portfolio weights for training set
        portfolio_weights_train = generate_portfolio_vectorized(features_train, universe_train, weight_array)
        
        # Backtest on training data
        sr_train, _ = backtest_portfolio_new(portfolio_weights_train, returns_train, universe_train,False,False)
        # print(f"Debug: Sharpe ratio calculated: {sr_train}")
        # Validate on validation set
        portfolio_weights_validation = generate_portfolio_vectorized(features_validation, universe_validation, weight_array)
        sr_validation, _ = backtest_portfolio_new(portfolio_weights_validation, returns_validation, universe_validation,False,False)
        print(f"Train Sharpe Ratio: {sr_train}, Validation Sharpe Ratio: {sr_validation}")
        # Add penalties to prevent overfitting
        alpha = 0.7  # Weight for training Sharpe ratio (adjust as needed)
        combined_sharpe_ratio = alpha * sr_train + (1 - alpha) * sr_validation
        
        # Add penalties to prevent overfitting
        penalty_l2 = np.sum(np.square(weight_array)) * combined_sharpe_ratio * 0.0005
        penalty_l1 = np.sum(np.abs(weight_array)) * combined_sharpe_ratio * 0.001
        
        result = combined_sharpe_ratio * 10 - penalty_l2 - penalty_l1
        print(f"Debug: L2 penalty: {penalty_l2}")
        print(f"Debug: L1 penalty: {penalty_l1}")
        
        print(f"Debug: Objective function value (with penalty): {result}")
        print("-" * 50)
        return result
    
    pbounds = {feature: (-20, 20) for feature in features_train.columns.levels[0]}
    
    optimizer = BayesianOptimization(
        f=sharpe_ratio_objective,
        pbounds=pbounds,
        random_state=42
    )
    
    optimizer.maximize(init_points=20, n_iter=250)
    
    optimal_weights = optimizer.max['params']
    
    return optimal_weights

# Run Bayesian Optimization using train/validation split
optimal_weights = optimize_signal_weights_bayesian(
    train_features,
    validation_features,
    train_universe,
    validation_universe,
    train_returns,
    validation_returns
)

# Evaluate on test set
portfolio_weights_test = generate_portfolio_vectorized(test_features, test_universe, np.array(list(optimal_weights.values())))
sr_test, _ = backtest_portfolio_new(portfolio_weights_test, test_returns, test_universe)

print("Optimal Weights:", optimal_weights)
print("Test Set Sharpe Ratio:", sr_test)

|   iter    |  target   | accumu... |   aroon   | averag... | chaiki... | chande... | commod... | ease_o... | ichimoku  | know_s... |   macd    | on_bal... | relati... | stocha... | trend_1_3 | trend_... | trend_... |   trix    | ultima... | volati... | volati... |  volume   | willia... |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Train Sharpe Ratio: -0.538, Validation Sharpe Ratio: -0.432
Debug: L2 penalty: -0.8006102263438701
Debug: L1 penalty: -0.11917956704397133
Debug: Objective function value (with penalty): -4.142210206612157
--------------------------------------------------
| [39m1        [39m | [39m-4.142   [39m | [39m-5.018   [39m | [39m18.03    [39m | [39m9.28     [39m | [39m3.946    [39m | [39m-13.76   [39m | [39m-1

TypeError: backtest_portfolio_new() missing 2 required positional arguments: 'plot_' and 'print_'

In [16]:
optimal_weights = 

{'accumulation_distribution_index': -20.0,
 'aroon': 1.8475694801189855,
 'average_true_range': -20.0,
 'chaikin_money_flow': 5.898065359965044,
 'chande_momentum_oscillator': -9.354101362538275,
 'commodity_channel_index': 20.0,
 'ease_of_movement': 8.08903939705811,
 'ichimoku': -20.0,
 'know_sure_thing': -7.25558502973287,
 'macd': -5.873723274987229,
 'on_balance_volume': -11.932050044738435,
 'relative_strength_index': 11.688643060816238,
 'stochastic_oscillator': -4.979467865847553,
 'trend_1_3': -1.9952749754513066,
 'trend_20_60': -20.0,
 'trend_5_20': 5.072363909076035,
 'trix': 9.352609868081357,
 'ultimate_oscillator': -3.046442913727308,
 'volatility_20': 1.8633451855091139,
 'volatility_60': -2.248940148121792,
 'volume': 20.0,
 'williams_r': -20.0}

In [19]:
portfolio_weights_test = generate_portfolio_vectorized(test_features, test_universe, np.array(list(optimal_weights.values())))
sr_test, _ = backtest_portfolio_new(portfolio_weights_test, test_returns, test_universe,True,True)

Gross Sharpe Ratio:  -0.33
Net Sharpe Ratio:  -0.817


In [21]:
selected_features = [
    'on_balance_volume', 'trend_1_3', 'macd', 'ichimoku',
    'ultimate_oscillator',
    'chaikin_money_flow', 'volume', 'know_sure_thing',
    'chande_momentum_oscillator', 'average_true_range',
    'volatility_20', 'trend_5_20'
]
train_features = features.loc["2005":"2015"]
validation_features = features.loc["2016":"2018"]
test_features = features.loc["2019"]

train_universe = universe.loc["2005":"2015"]
validation_universe = universe.loc["2016":"2018"]
test_universe = universe.loc["2019"]

train_returns = returns.loc["2005":"2015"]
validation_returns = returns.loc["2016":"2018"]
test_returns = returns.loc["2019"]

# Filter your training and validation feature sets to include only selected features
train_features = train_features[selected_features]
validation_features = validation_features[selected_features]
test_features = test_features[selected_features]  # Test set for evaluation

In [8]:
from bayes_opt import BayesianOptimization
import numpy as np
import matplotlib.pyplot as plt
from joblib import Parallel, delayed

In [None]:
initial_weights = {
   'accumulation_distribution_index': -10.0,
   'aroon': -0.09222290817164171,
   'average_true_range': -10.0,
   'chaikin_money_flow': -10.0,
   'chande_momentum_oscillator': 3.318011135314672,
   'commodity_channel_index': 10.0,
   'ease_of_movement': 5.821824288780287,
   'ichimoku': -10.0,
   'know_sure_thing': 10.0,
   'macd': -3.867991947270643,
   'on_balance_volume': 1.8025161396173899,
   'relative_strength_index': 0.6147723614862542,
   'stochastic_oscillator': -10.0,
   'trend_1_3': 10.0,
   'trend_20_60': -10.0,
   'trend_5_20': -10.0,
   'trix': -0.8681094745966318,
   'ultimate_oscillator': 1.3625158710488225,
   'volatility_20': 10.0,
   'volatility_60': -10.0,
   'volume': 10.0,
   'williams_r': -6.987324378181491
}

# Define Bayesian Optimization function
def optimize_signal_weights_bayesian(features_train, features_validation, universe_train, universe_validation, returns_train, returns_validation):
    
    def sharpe_ratio_objective(**weights):
        weight_array = np.array([weights[key] for key in sorted(weights.keys())])
        
        # Generate portfolio weights for training set
        portfolio_weights_train = generate_portfolio_vectorized(features_train, universe_train, weight_array)
        
        # Backtest on training data
        sr_train, _ = backtest_portfolio_new(portfolio_weights_train, returns_train, universe_train,False,False)
        # print(f"Debug: Sharpe ratio calculated: {sr_train}")
        # Validate on validation set
        portfolio_weights_validation = generate_portfolio_vectorized(features_validation, universe_validation, weight_array)
        sr_validation, _ = backtest_portfolio_new(portfolio_weights_validation, returns_validation, universe_validation,False,False)
        print(f"Train Sharpe Ratio: {sr_train}, Validation Sharpe Ratio: {sr_validation}")
        # Add penalties to prevent overfitting
        alpha = 0.5  # Weight for training Sharpe ratio (adjust as needed)
        combined_sharpe_ratio = alpha * sr_train + (1 - alpha) * sr_validation
        
        # Add penalties to prevent overfitting
        penalty_l2 = np.sum(np.square(weight_array)) * combined_sharpe_ratio * 0.0005
        penalty_l1 = np.sum(np.abs(weight_array)) * combined_sharpe_ratio * 0.001
        
        result = combined_sharpe_ratio * 10 - penalty_l2 - penalty_l1
        print(f"Debug: L2 penalty: {penalty_l2}")
        print(f"Debug: L1 penalty: {penalty_l1}")
        
        print(f"Debug: Objective function value (with penalty): {result}")
        print("-" * 50)
        return result
    
    pbounds = {feature: (-20, 20) for feature in features_train.columns.levels[0]}
    
    optimizer = BayesianOptimization(
        f=sharpe_ratio_objective,
        pbounds=pbounds,
        random_state=42
    )
    
    optimizer.maximize(init_points=20, n_iter=250)
    
    optimal_weights = optimizer.max['params']
    
    return optimal_weights

# Run Bayesian Optimization using train/validation split
optimal_weights = optimize_signal_weights_bayesian(
    train_features,
    validation_features,
    train_universe,
    validation_universe,
    train_returns,
    validation_returns
)

# Evaluate on test set
portfolio_weights_test = generate_portfolio_vectorized(test_features, test_universe, np.array(list(optimal_weights.values())))
sr_test, _ = backtest_portfolio_new(portfolio_weights_test, test_returns, test_universe,True,True)

print("Optimal Weights:", optimal_weights)
print("Test Set Sharpe Ratio:", sr_test)

|   iter    |  target   | accumu... |   aroon   | averag... | chaiki... | chande... | commod... | ease_o... | ichimoku  | know_s... |   macd    | on_bal... | relati... | stocha... | trend_1_3 | trend_... | trend_... |   trix    | ultima... | volati... | volati... |  volume   | willia... |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Train Sharpe Ratio: -0.538, Validation Sharpe Ratio: -0.432
Debug: L2 penalty: -0.7670801259912623
Debug: L1 penalty: -0.11418824578491918
Debug: Objective function value (with penalty): -3.9687316282238188
--------------------------------------------------
| [39m1        [39m | [39m-3.969   [39m | [39m-5.018   [39m | [39m18.03    [39m | [39m9.28     [39m | [39m3.946    [39m | [39m-13.76   [39m | [39m-