"""
The evaluation API requires that you set up a server which will respond to inference requests.
We have already defined the server; you just need write the predict function.
When we evaluate your submission on the hidden test set the client defined in `default_gateway` will run in a different container
with direct access to the hidden test set and hand off the data timestep by timestep.

Your code will always have access to the published copies of the copmetition files.
"""

In [1]:
import os

import pandas as pd
import polars as pl
import numpy as np
from math import floor, ceil
import copy

import kaggle_evaluation.default_inference_server
import importlib

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

import warnings

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import time
import shutil

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x1f303095670>

# Read the entire train file

In [3]:
comp_dataset_df = pd.read_csv("./kaggle/input/hull-tactical-market-prediction/train complete.csv")
comp_dataset_df.head(3)

Unnamed: 0,date_id,D1,D2,D3,D4,D5,D6,D7,D8,D9,...,V3,V4,V5,V6,V7,V8,V9,forward_returns,risk_free_rate,market_forward_excess_returns
0,0,0,0,0,1,1,0,0,0,1,...,,,,,,,,-0.002421,0.000301,-0.003038
1,1,0,0,0,1,1,0,0,0,1,...,,,,,,,,-0.008495,0.000303,-0.009114
2,2,0,0,0,1,0,0,0,0,1,...,,,,,,,,-0.009624,0.000301,-0.010243


# Transformers for data transformation

## Transformers for cleaning data

In [4]:
class CleanerTransformer(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass
        
    def fit(self, X):

        self.col_mean_dict = {}

        #For each column, determine the mean value.
        for col_ind in range(X.shape[1]):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=RuntimeWarning)
                col_mean = np.nanmean(X[:, col_ind])
                
            if np.isnan(col_mean):
                col_mean = 0.0
            self.col_mean_dict[col_ind] = col_mean
            
        return self
        
    def transform(self, X):

        X_copy = X.copy().astype(float)

        for col_ind in range(X_copy.shape[1]):
            nan_mask = np.isnan(X_copy[:, col_ind])
            X_copy[nan_mask, col_ind] = self.col_mean_dict[col_ind]

        
        return X_copy

## Transformer for creating latent features

In [5]:
class LatentTransformer(BaseEstimator, TransformerMixin):
    
    def __init__(self, num_sing_vals):
        self.num_sing_vals = num_sing_vals
        
    def fit(self, X, y=None):

        U, s, Vt = np.linalg.svd(X)
        
        self.U_ld = U[:, :self.num_sing_vals]
        self.S_ld = s[0:self.num_sing_vals].reshape((1, self.num_sing_vals)) * np.eye(self.num_sing_vals)
        self.Vt_ld = Vt[0:self.num_sing_vals, :]

        return self
        
    def transform(self, X):

        latent = np.dot(X, self.Vt_ld.T)
        
        return latent

# Functions to Train the model

In [6]:
MIN_INVESTMENT = 0
MAX_INVESTMENT = 2

class ParticipantVisibleError(Exception):
    pass

In [7]:
def smoothed_score(forward_returns_ten: torch.Tensor, risk_free_rate_ten: torch.Tensor, signal_pred_ten: torch.Tensor) -> torch.Tensor:
    """
    Calculates a smoothed version of the competition metric.

    This metric penalizes strategies that take on significantly more volatility than the underlying market.

    Returns:
        float: The calculated adjusted Sharpe ratio.
    """a
    eps = 1e-7
    # Clip positions to allowed range
    signal_pred = torch.clamp(signal_pred_ten, MIN_INVESTMENT, MAX_INVESTMENT)

    #Strategy returns
    strategy_returns = risk_free_rate_ten * (1 - signal_pred) + signal_pred * forward_returns_ten

    # Calculate strategy's Sharpe ratio
    strategy_excess_returns = strategy_returns - risk_free_rate_ten
    strategy_excess_returns = torch.clamp(strategy_excess_returns, min=-0.999) #<--New line
    #strategy_excess_cumulative = (1 + strategy_excess_returns).prod() <--Existing line
    strategy_excess_cumulative = torch.log1p(strategy_excess_returns).mean() #<-- New line
    strategy_mean_excess_return = torch.expm1(strategy_excess_cumulative) #<-- New line
    strategy_std = torch.std(strategy_returns, unbiased = False) + eps

    trading_days_per_yr = torch.tensor(252, dtype=torch.float32)
    if strategy_std == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')
    sharpe = strategy_mean_excess_return / strategy_std * torch.sqrt(trading_days_per_yr)
    strategy_volatility = strategy_std * torch.sqrt(trading_days_per_yr) * 100.0

    # Calculate market return and volatility
    market_excess_returns = forward_returns_ten - risk_free_rate_ten
    market_excess_returns = torch.clamp(market_excess_returns, min=-0.999) #<--New line
    #market_excess_cumulative = (1 + market_excess_returns).prod()
    market_excess_cumulative = torch.log1p(market_excess_returns).mean() #<--New line
    market_mean_excess_return = torch.expm1(market_excess_cumulative) #<-- New line
    market_std = torch.std(forward_returns_ten, unbiased = False) + eps
    
    market_volatility = market_std * torch.sqrt(trading_days_per_yr) * torch.tensor(100.0, dtype=torch.float32)

    if market_volatility == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')

    # Calculate the volatility penalty
    excess_vol = torch.relu(strategy_volatility / market_volatility - 1.2)
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    return_gap = torch.relu((market_mean_excess_return - strategy_mean_excess_return) * torch.tensor(100.0, dtype=torch.float32) * trading_days_per_yr)
    return_penalty = 1 + (return_gap**2) / torch.tensor(100.0, dtype=torch.float32)

    # Adjust the Sharpe ratio by the volatility and return penalty
    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)
    adjusted_sharpe = torch.clamp(adjusted_sharpe, max = 1_000_000)
    
    return adjusted_sharpe

In [8]:
def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    """
    Calculates a custom evaluation metric (volatility-adjusted Sharpe ratio).

    This metric penalizes strategies that take on significantly more volatility
    than the underlying market.

    Returns:
        float: The calculated adjusted Sharpe ratio.
    """

    if not pd.api.types.is_numeric_dtype(submission['prediction']):
        raise ParticipantVisibleError('Predictions must be numeric')

    solution = solution
    solution['position'] = submission['prediction']

    if solution['position'].max() > MAX_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].max()} exceeds maximum of {MAX_INVESTMENT}')
    if solution['position'].min() < MIN_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].min()} below minimum of {MIN_INVESTMENT}')

    solution['strategy_returns'] = solution['risk_free_rate'] * (1 - solution['position']) + solution['position'] * solution['forward_returns']

    # Calculate strategy's Sharpe ratio
    strategy_excess_returns = solution['strategy_returns'] - solution['risk_free_rate']
    strategy_excess_cumulative = (1 + strategy_excess_returns).prod()
    strategy_mean_excess_return = (strategy_excess_cumulative) ** (1 / len(solution)) - 1
    strategy_std = solution['strategy_returns'].std()

    trading_days_per_yr = 252
    if strategy_std == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')
    
    sharpe = strategy_mean_excess_return / strategy_std * np.sqrt(trading_days_per_yr)
    
    strategy_volatility = float(strategy_std * np.sqrt(trading_days_per_yr) * 100)

    # Calculate market return and volatility
    market_excess_returns = solution['forward_returns'] - solution['risk_free_rate']
    market_excess_cumulative = (1 + market_excess_returns).prod()
    market_mean_excess_return = (market_excess_cumulative) ** (1 / len(solution)) - 1
    market_std = solution['forward_returns'].std()

    market_volatility = float(market_std * np.sqrt(trading_days_per_yr) * 100)

    if market_volatility == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')

    # Calculate the volatility penalty
    excess_vol = max(0, strategy_volatility / market_volatility - 1.2) if market_volatility > 0 else 0
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    return_gap = max(
        0,
        (market_mean_excess_return - strategy_mean_excess_return) * 100 * trading_days_per_yr,
    )
    return_penalty = 1 + (return_gap**2) / 100
    
    # Adjust the Sharpe ratio by the volatility and return penalty
    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)
    return min(float(adjusted_sharpe), 1_000_000)

In [9]:
class SimpleDataset(Dataset):
    def __init__(self, data_ten, forward_rets_ten, risk_free_rate_ten):
        self.data_ten = data_ten
        self.forward_rets_ten = forward_rets_ten
        self.risk_free_rate_ten = risk_free_rate_ten
        
    def __len__(self):
        return self.data_ten.shape[0]

    def __getitem__(self, idx):
        x = self.data_ten[idx] 
        fr = self.forward_rets_ten[idx]
        rf = self.risk_free_rate_ten[idx]
        
        return x, fr, rf, idx

In [10]:
class SimpleFeedForward(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim):
        
        super(SimpleFeedForward, self).__init__()
        self.model = nn.Sequential(nn.Linear(input_dim, hidden_dim),
                                  nn.Tanh(),
                                  nn.Linear(hidden_dim, output_dim))

    def forward(self, x):

        return self.model(x)

In [11]:
def train_model(lt_train_ten, forward_rets_ten, risk_free_rate_ten, lt_val_ten, forward_rets_val_ten, risk_free_rate_val_ten, lt_input_dim, lt_hidden_dim, output_dim, batch_size, shuffle, num_epochs = None, patience = 2, verbose = False):

    global lt_model

    start_time = time.time()

    #Datset to create training sequences based on training data.
    lt_dataset = SimpleDataset(lt_train_ten, forward_rets_ten, risk_free_rate_ten)
    lt_dataloader = DataLoader(lt_dataset, batch_size=batch_size, shuffle=shuffle)
    
    lt_model = SimpleFeedForward(input_dim=lt_input_dim, hidden_dim=lt_hidden_dim, output_dim=output_dim)
    
    total_count = 0
    for name, param in lt_model.named_parameters():
            
        count = param.numel()
        total_count += count
    
    #Datset to create validation sequences based on valiaation data.
    if lt_val_ten is not None:
        
        lt_val_dataset = SimpleDataset(lt_val_ten, forward_rets_val_ten, risk_free_rate_val_ten)
        lt_val_dataloader = DataLoader(lt_val_dataset, batch_size=batch_size, shuffle=False)
        val_batch, val_batch_fr_ten, val_batch_rf_ten, val_idx_ten = next(iter(lt_val_dataloader))

    optimizer = torch.optim.Adam(list(lt_model.parameters()), lr=0.00001) 
    
    total_loss = 0
    total_adj_sharpe = 0
    total_val_adj_sharpe = 0
    
    NUM_EPOCHS = 50

    early_stop_count = 0
    best_val_adj_sharpe = -np.inf
    adj_sharpe_best_epoch = -np.inf
    best_epoch = 0
    best_val_preds_ten = None
    best_model_state = None
    
    for epoch in range(1, NUM_EPOCHS+1):

        for name, p in lt_model.named_parameters():
            if not torch.isfinite(p).all():
                print("Bad parameter:", name)
        
        if num_epochs is not None:
            if epoch > num_epochs:
                best_model_state = copy.deepcopy(lt_model.state_dict())
                break
        
        epoch_loss = 0.0
        epoch_adj_sharpe = 0.0
    
        all_preds = []
        batch_count = 0
        for batch, batch_fr_ten, batch_rf_ten, idx_ten in lt_dataloader:
            
            optimizer.zero_grad()

            #Signal predictions using the long-term model.
            lt_preds_ten = lt_model(batch)
            final_preds_ten = torch.sigmoid(lt_preds_ten) * 2
            
            #Compute loss as negative smoothed_score (maximize Sharpe)
            batch_loss = -smoothed_score(
                forward_returns_ten=batch_fr_ten,
                risk_free_rate_ten=batch_rf_ten,
                signal_pred_ten=final_preds_ten
            )
            
            submission = pd.DataFrame()
            submission['prediction'] = final_preds_ten.detach().numpy().flatten()
            
            solution = pd.DataFrame()
            solution['forward_returns'] = batch_fr_ten.detach().numpy().flatten()
            solution['risk_free_rate'] = batch_rf_ten.detach().numpy().flatten()
            
            batch_adj_sharpe = round(score(solution, submission, None), 3)

            epoch_loss += batch_loss.item()
            epoch_adj_sharpe += batch_adj_sharpe
            
            batch_loss.backward()
            batch_count += 1

            optimizer.step()
        
        mean_loss_for_epoch = round(epoch_loss/batch_count, 3)
        mean_adj_sharpe_for_epoch = round(epoch_adj_sharpe/batch_count, 3)

        if lt_val_ten is not None:

            #Signal predictions on the validation data using the long-term model.
            lt_val_preds_ten = lt_model(val_batch)
            
            final_val_preds_ten = torch.sigmoid(lt_val_preds_ten) * 2
            
            val_submission = pd.DataFrame()
            val_submission['prediction'] = final_val_preds_ten.detach().numpy().flatten()
            val_solution = pd.DataFrame()
            val_solution['forward_returns'] = val_batch_fr_ten.detach().numpy().flatten()
            val_solution['risk_free_rate'] = val_batch_rf_ten.detach().numpy().flatten()
            
            val_adj_sharpe = round(score(val_solution, val_submission, None), 3)

            #Update the best validation score so far.
            if best_val_adj_sharpe < val_adj_sharpe:
               
                best_val_adj_sharpe = val_adj_sharpe
                adj_sharpe_best_epoch = mean_adj_sharpe_for_epoch
                early_stop_count = 0
                best_epoch = epoch
                best_val_preds_ten = final_val_preds_ten
                best_model_state = copy.deepcopy(lt_model.state_dict())
    
            else:
                if early_stop_count > patience:
                    break
    
                else:
                    early_stop_count += 1
            
            if verbose == True:
                print(f"Epoch {epoch}: Mean Training Loss = {mean_loss_for_epoch}, Training mean adj sharpe ratio = {mean_adj_sharpe_for_epoch}, Validation mean adj sharpe ratio = {val_adj_sharpe}")
    
    time_taken = round((time.time() - start_time)/60, 3)
    if verbose == True:
        print(f"Best epoch: {best_epoch}")
        print(f"Best mean adjusted sharpe ratio (Train): {adj_sharpe_best_epoch}")
        print(f"Best mean adjusted sharpe ratio (Val): {best_val_adj_sharpe}")
        print(f"Time taken = {time_taken} minutes")
    
    load_result = lt_model.load_state_dict(best_model_state)
    # Optional sanity check
    assert len(load_result.missing_keys) == 0
    assert len(load_result.unexpected_keys) == 0
    return lt_model, total_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, best_epoch, best_val_preds_ten

In [12]:
def train_model_wrapper(lt_train_ten, forward_rets_ten, risk_free_rate_ten, lt_val_ten, forward_rets_val_ten, risk_free_rate_val_ten, lt_input_dim, lt_hidden_dim, output_dim, batch_size, shuffle, num_epochs = None, patience = 2, verbose = False, retrain_freq = None):

    time_taken = 0
    tot_num_epochs = 0
    best_train_preds_ten = None
    best_val_preds_ten = None
    
    if ((retrain_freq is None) or (lt_val_ten is None) or (lt_val_ten.shape[0] == retrain_freq)):
        lt_model, param_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, best_epoch, best_val_preds_ten = train_model(lt_train_ten, forward_rets_ten, risk_free_rate_ten, lt_val_ten, forward_rets_val_ten, risk_free_rate_val_ten, lt_input_dim, lt_hidden_dim, output_dim, batch_size, shuffle, num_epochs = num_epochs, patience = patience, verbose = verbose)

    #Retrain frequency has been meaningfully defined.
    else:

        lt_combined_ten = torch.concat([lt_train_ten, lt_val_ten], axis = 0)
        comb_forward_rets_ten = torch.concat([forward_rets_ten, forward_rets_val_ten], axis = 0)
        comb_risk_free_rate_ten = torch.concat([risk_free_rate_ten, risk_free_rate_val_ten], axis = 0)
        
        lt_train_subset_start = 0
        lt_train_subset_end = lt_train_ten.shape[0] - 1
        
        lt_val_subset_start = lt_train_subset_end + 1
        lt_val_subset_end = lt_val_subset_start + retrain_freq - 1

        retrain_count = ceil(lt_combined_ten.shape[0]/retrain_freq)
        while lt_val_subset_start < lt_combined_ten.shape[0]:
            
            lt_train_subset_ten = lt_combined_ten[lt_train_subset_start:(lt_train_subset_end+1)]
            forward_returns_subset_ten = comb_forward_rets_ten[lt_train_subset_start:(lt_train_subset_end+1)]
            risk_free_rate_subset_ten = comb_risk_free_rate_ten[lt_train_subset_start:(lt_train_subset_end+1)]
            
            lt_val_subset_ten  = lt_combined_ten[lt_val_subset_start:(lt_val_subset_end+1)]
            forward_rets_val_subset_ten = comb_forward_rets_ten[lt_val_subset_start:(lt_val_subset_end+1)]
            risk_free_rate_val_subset_ten = comb_risk_free_rate_ten[lt_val_subset_start:(lt_val_subset_end+1)]

            lt_model, param_count, _, _, curr_time_taken, curr_num_epochs, curr_val_preds_ten = train_model(lt_train_subset_ten, forward_returns_subset_ten, risk_free_rate_subset_ten, lt_val_subset_ten, forward_rets_val_subset_ten, risk_free_rate_val_subset_ten, lt_input_dim, lt_hidden_dim, output_dim, batch_size, shuffle, num_epochs = num_epochs, patience = patience, verbose = verbose)
            
            time_taken += curr_time_taken
            tot_num_epochs += curr_num_epochs
            if best_val_preds_ten is None:
                best_val_preds_ten = curr_val_preds_ten
            else:
                best_val_preds_ten = torch.concat([best_val_preds_ten, curr_val_preds_ten])
            
            lt_train_subset_start += retrain_freq
            lt_train_subset_end += retrain_freq
            
            lt_val_subset_start += retrain_freq
            lt_val_subset_end += retrain_freq

        time_taken /= retrain_count
        best_epoch = tot_num_epochs/retrain_count

    if lt_val_ten is not None:
        val_solution_df = pd.DataFrame()
        val_solution_df['forward_returns'] = pd.Series(forward_rets_val_ten.detach().numpy().flatten())
        val_solution_df['risk_free_rate'] = pd.Series(risk_free_rate_val_ten.detach().numpy().flatten())
    
        val_submission_df = pd.DataFrame()
        val_submission_df['prediction'] = pd.Series(best_val_preds_ten.detach().numpy().flatten())
        
        best_val_adj_sharpe = round(score(val_solution_df, val_submission_df, None), 3)
    else:
        best_val_adj_sharpe = None

    return lt_model, param_count, 0, best_val_adj_sharpe, time_taken, best_epoch

## Scale the features

In [13]:
def scale(col_ser):
    
    global col_number
    min_val = col_ser.min()
    max_val = col_ser.max()
    
    return (col_ser - min_val)/(max_val - min_val)

In [14]:
comp_ds_scaled_df = comp_dataset_df.apply(scale, axis = 0)
comp_ds_scaled_df['date_id'] = comp_dataset_df['date_id']

In [15]:
comp_ds_scaled_df.head(3)

Unnamed: 0,date_id,D1,D2,D3,D4,D5,D6,D7,D8,D9,...,V3,V4,V5,V6,V7,V8,V9,forward_returns,risk_free_rate,market_forward_excess_returns
0,0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,...,,,,,,,,0.464247,0.949355,0.462739
1,1,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,...,,,,,,,,0.38872,0.955531,0.387855
2,2,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,...,,,,,,,,0.374671,0.950587,0.373946


# Run TSCV to determine the best hyperparameters

In [16]:
def run_tscv(train_size, val_size, retrain_freq, input_dim, hidden_dim, comp_dataset_df, batch_size, shuffle, max_folds = None, verbose = False):

    total_size = comp_dataset_df.shape[0]
    num_possible_folds = floor((total_size - train_size)/val_size)
    num_actual_folds = min(num_possible_folds, max_folds)

    if verbose == True:
        print(f'Folds to be created = {num_actual_folds}')

    fold_no = 0
    
    mean_adj_sharpe_across_fold = 0
    mean_val_adj_sharpe_across_fold = 0
    mean_time_taken_across_fold = 0
    mean_num_epochs = 0
    
    #Create as many folds as possible.
    while fold_no < num_actual_folds:

        if verbose == True:
            print(f'Fold index = {fold_no}')

        if max_folds is not None:
            if fold_no >= max_folds:
                break

        if fold_no == 0:
            #Initialise the train_set1 indices
            train_set_start_index = comp_dataset_df.shape[0] - num_actual_folds * val_size - train_size
            train_set_end_index = train_set_start_index + train_size - 1
            train_start_date = comp_dataset_df.iloc[train_set_start_index, 0]
            train_end_date = comp_dataset_df.iloc[train_set_end_index, 0]
            
            #Initialising the validation indices.
            val_start_index = train_set_end_index + 1
            val_end_index = val_start_index + val_size - 1
            val_start_date = comp_dataset_df.iloc[val_start_index, 0]
            val_end_date = comp_dataset_df.iloc[val_end_index, 0]
            
        else:
            train_set_start_index = train_set_start_index + val_size
            train_set_end_index = train_set_end_index + val_size
            train_start_date = comp_dataset_df.iloc[train_set_start_index, 0]
            train_end_date = comp_dataset_df.iloc[train_set_end_index, 0]
            
            val_start_index = val_start_index + val_size
            val_end_index = val_end_index + val_size
            val_start_date = comp_dataset_df.iloc[val_start_index, 0]
            val_end_date = comp_dataset_df.iloc[val_end_index, 0]

        if verbose == True:
            print(f'Training test fold: {fold_no}, Training dates: {train_start_date} - {train_end_date}, Validation Dates: {val_start_date} - {val_end_date}')
        
        comp_dataset_df['lagged_forward_returns'] = comp_dataset_df['forward_returns'].shift(1)
        comp_dataset_df['lagged_risk_free_rate'] = comp_dataset_df['risk_free_rate'].shift(1)
        comp_dataset_df['lagged_market_forward_excess_returns'] = comp_dataset_df['market_forward_excess_returns'].shift(1)
    
        train_set_df = comp_dataset_df.iloc[train_set_start_index:train_set_end_index+1, :]
        val_set_df = comp_dataset_df.iloc[val_start_index:val_end_index+1, :]

        #We will then train the model here.
        forward_returns_ten = torch.tensor(train_set_df['forward_returns'].values, dtype=torch.float32)
        risk_free_rate_ten = torch.tensor(train_set_df['risk_free_rate'].values, dtype=torch.float32)
        
        forward_rets_val_ten = torch.tensor(val_set_df['forward_returns'].values, dtype=torch.float32)
        risk_free_rate_val_ten = torch.tensor(val_set_df['risk_free_rate'].values, dtype=torch.float32)

        train_set_df = train_set_df.drop(['forward_returns', 'risk_free_rate', 'market_forward_excess_returns'], axis = 1)
        val_set_df = val_set_df.drop(['forward_returns', 'risk_free_rate', 'market_forward_excess_returns'], axis = 1)

        pipeline = Pipeline([('cleaner', CleanerTransformer()),
                             ('latent', LatentTransformer(input_dim))])
        
        with warnings.catch_warnings():

            warnings.filterwarnings(
                "ignore",
                category=FutureWarning,
                module="sklearn.pipeline"
            )
            pipeline = pipeline.fit(train_set_df.values)

            lt_train_latent_ten = torch.tensor(pipeline.transform(train_set_df.values), dtype=torch.float32)
            lt_val_latent_ten = torch.tensor(pipeline.transform(val_set_df.values), dtype=torch.float32)

            #Replace infinite values.
            if torch.isinf(lt_val_latent_ten).any():
                finite_vals = lt_val_latent_ten[torch.isfinite(lt_val_latent_ten)]
                max_val = finite_vals.max()
                min_val = finite_vals.min()
                lt_val_latent_ten[lt_val_latent_ten == float('inf')] = max_val
                lt_val_latent_ten[lt_val_latent_ten == float('-inf')] = min_val

        _, param_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, num_epochs = train_model_wrapper(lt_train_latent_ten, forward_returns_ten, risk_free_rate_ten, lt_val_latent_ten, forward_rets_val_ten, risk_free_rate_val_ten, input_dim, hidden_dim, 1, batch_size, shuffle, num_epochs = 20, patience = 2, verbose = False, retrain_freq = retrain_freq)
                                                                                     
        mean_adj_sharpe_across_fold += adj_sharpe_best_epoch
        mean_val_adj_sharpe_across_fold += best_val_adj_sharpe
        mean_time_taken_across_fold += time_taken
        mean_num_epochs += num_epochs
        
        fold_no += 1

        if verbose == True:
            print("---------------------------------------------------------------")

    mean_adj_sharpe_across_fold = round(mean_adj_sharpe_across_fold/fold_no, 3)
    mean_val_adj_sharpe_across_fold = round(mean_val_adj_sharpe_across_fold/fold_no, 3)
    mean_time_taken_across_fold = round(mean_time_taken_across_fold/fold_no, 3)
    mean_num_epochs = int(round(mean_num_epochs/fold_no, 0))

    if verbose == True:
        print(f"Number of folds = {fold_no}")
        print(f'Mean adjusted sharpe ratio (Training) across folds = {mean_adj_sharpe_across_fold}')
        print(f'Mean adjusted sharpe ratio (Validation) across folds = {mean_val_adj_sharpe_across_fold}')
        print(f'Mean time taken across folds = {mean_time_taken_across_fold}')
        print(f'Mean number of epochs across folds = {mean_num_epochs}')
    
    return param_count, mean_adj_sharpe_across_fold, mean_val_adj_sharpe_across_fold, mean_time_taken_across_fold, mean_num_epochs

In [17]:
def do_grid_search(train_size_list, val_size_list, retrain_freq_list, input_dim_list, hidden_dim_list, max_folds = None, verbose = False):

    best_mean_adj_sharpe_across_fold = None
    best_mean_val_adj_sharpe_across_fold = None
    best_train_size = None
    best_input_dim = None
    best_hidden_dim = None
    best_param_count = None
    best_val_size = None
    best_mean_time = None
    best_num_epochs = None
    best_retrain_freq = None
    
    for val_size in val_size_list:
        
        for train_size in train_size_list:
        
            for hidden_dim in hidden_dim_list:
                
                for input_dim in input_dim_list:

                    for retrain_freq in retrain_freq_list:

                        batch_size = val_size
                
                        param_count, mean_adj_sharpe_across_fold, mean_val_adj_sharpe_across_fold, mean_time_taken_across_fold, num_epochs = run_tscv(train_size, val_size, retrain_freq, input_dim, hidden_dim, comp_dataset_df, max_folds = max_folds, batch_size=batch_size, shuffle = False, verbose = False)
    
                        if verbose == True:
                            print(f'Validation size = {val_size}, Train size = {train_size}, Hidden dim = {hidden_dim}, Input dimension: {input_dim}, Retrain freq = {retrain_freq}, Parameter count = {param_count}')
                            print(f'Training adjusted sharpe = {mean_adj_sharpe_across_fold}, Validation adjusted sharpe = {mean_val_adj_sharpe_across_fold}, Time = {mean_time_taken_across_fold}, number of epochs = {num_epochs}')
                            print('----------------------------------------------------------------')
                        
                        if ((best_mean_adj_sharpe_across_fold is None) or (best_mean_val_adj_sharpe_across_fold is None)):
                            best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                            best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                            best_train_size = train_size
                            best_input_dim = input_dim
                            best_hidden_dim = hidden_dim
                            best_param_count = param_count
                            best_val_size = val_size
                            best_mean_time = mean_time_taken_across_fold
                            best_num_epochs = num_epochs
                            best_retrain_freq = retrain_freq
        
                        elif ((best_mean_val_adj_sharpe_across_fold == mean_val_adj_sharpe_across_fold) & (best_param_count > param_count)):
                            best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                            best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                            best_train_size = train_size
                            best_input_dim = input_dim
                            best_hidden_dim = hidden_dim
                            best_param_count = param_count
                            best_val_size = val_size
                            best_mean_time = mean_time_taken_across_fold
                            best_num_epochs = num_epochs
                            best_retrain_freq = retrain_freq
                            
                        elif best_mean_val_adj_sharpe_across_fold < mean_val_adj_sharpe_across_fold:
                            best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                            best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                            best_train_size = train_size
                            best_input_dim = input_dim
                            best_hidden_dim = hidden_dim
                            best_param_count = param_count
                            best_val_size = val_size
                            best_mean_time = mean_time_taken_across_fold
                            best_num_epochs = num_epochs
                            best_retrain_freq = retrain_freq
            
                        else:
                            pass
        print("--------------------------------------------------------------------------------")   

    print(f'\nBest validation size = {best_val_size}, Best train size = {best_train_size}, Best hidden dimension = {best_hidden_dim}, Best input dimension = {best_input_dim}, Best retrain Freq = {retrain_freq}, Best parameter count = {best_param_count}, Training adjusted sharpe = {best_mean_adj_sharpe_across_fold}, Validation adjusted sharpe = {best_mean_val_adj_sharpe_across_fold}, Best time = {best_mean_time}, Best number of epochs = {best_num_epochs}')

    return best_val_size, best_train_size, best_hidden_dim, best_input_dim, retrain_freq, best_param_count, best_mean_adj_sharpe_across_fold, best_mean_val_adj_sharpe_across_fold, best_mean_time, best_num_epochs

## Run TSCV for Short-Term Model

In [18]:
S_TRAIN_SIZE_LIST = sorted([int(252/3), int(252/4), int(252/5), int(252/6), int(252/7)], reverse = False)
S_VAL_SIZE_LIST = [180]
S_RETRAIN_FREQ_LIST = [5, 10, 15, 20]

#Model hyper-parameters.
S_INPUT_DIM_LIST = [2, 3, 4]
S_HIDDEN_DIM_LIST = [6, 8, 10, 12]

S_MAX_FOLDS = 10

s_best_val_size, s_best_train_size, s_best_hidden_dim, s_best_input_dim, s_retrain_freq, s_best_param_count, s_best_mean_adj_sharpe_across_fold, s_best_mean_val_adj_sharpe_across_fold, s_best_mean_time, s_best_num_epochs = do_grid_search(S_TRAIN_SIZE_LIST, S_VAL_SIZE_LIST, S_RETRAIN_FREQ_LIST, S_INPUT_DIM_LIST, S_HIDDEN_DIM_LIST, S_MAX_FOLDS, True)

Validation size = 180, Train size = 36, Hidden dim = 6, Input dimension: 2, Retrain freq = 5, Parameter count = 25
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.616, Time = 0.001, number of epochs = 3
----------------------------------------------------------------
Validation size = 180, Train size = 36, Hidden dim = 6, Input dimension: 2, Retrain freq = 10, Parameter count = 25
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.897, Time = 0.001, number of epochs = 2
----------------------------------------------------------------
Validation size = 180, Train size = 36, Hidden dim = 6, Input dimension: 2, Retrain freq = 15, Parameter count = 25
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.765, Time = 0.001, number of epochs = 2
----------------------------------------------------------------
Validation size = 180, Train size = 36, Hidden dim = 6, Input dimension: 2, Retrain freq = 20, Parameter count = 25
Training adjusted sharpe = 0.0, V

In [19]:
#Best validation size = 180, Best train size = 84, Best hidden dimension = 9, Best input dimension = 3, Best retrain Freq = 9, Best parameter count = 46, Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.934, Best time = 0.001, Best number of epochs = 2
#Best validation size = 180, Best train size = 63, Best hidden dimension = 7, Best input dimension = 2, Best retrain Freq = 18, Best parameter count = 29, Training adjusted sharpe = 0.0, Validation adjusted sharpe = 1.072, Best time = 0.001, Best number of epochs = 3
#Best validation size = 180, Best train size = 84, Best hidden dimension = 9, Best input dimension = 3, Best retrain Freq = 36, Best parameter count = 46, Training adjusted sharpe = 0.0, Validation adjusted sharpe = 1.145, Best time = 0.001, Best number of epochs = 2
#Best validation size = 180, Best train size = 126, Best hidden dimension = 6, Best input dimension = 2, Best retrain Freq = 90, Best parameter count = 25, Training adjusted sharpe = 0.0, Validation adjusted sharpe = 1.014, Best time = 0.001, Best number of epochs = 1
#Best validation size = 180, Best train size = 126, Best hidden dimension = 6, Best input dimension = 2, Best retrain Freq = 90, Best parameter count = 25, Training adjusted sharpe = 0.0, Validation adjusted sharpe = 1.014, Best time = 0.001, Best number of epochs = 1
#Best validation size = 180, Best train size = 84, Best hidden dimension = 8, Best input dimension = 4, Best retrain Freq = 20, Best parameter count = 49, Training adjusted sharpe = 0.0, Validation adjusted sharpe = 1.092, Best time = 0.001, Best number of epochs = 1


## Run TSCV for Long-Term Model

In [20]:
L_TRAIN_SIZE_LIST = sorted([252*6, 252*5, 252*4, 252*3, 252 * 2], reverse = False)
L_VAL_SIZE_LIST = [180]
L_RETRAIN_FREQ_LIST = [36, 90, 180]

#Model hyper-parameters.
L_INPUT_DIM_LIST = [2, 3, 4, 5]
L_HIDDEN_DIM_LIST = [13, 15, 17]

L_MAX_FOLDS = 5
print(f'Maximum training/test folds = {L_MAX_FOLDS}')

l_best_val_size, l_best_train_size, l_best_hidden_dim, l_best_input_dim, l_retrain_freq, l_best_param_count, l_best_mean_adj_sharpe_across_fold, l_best_mean_val_adj_sharpe_across_fold, l_best_mean_time, l_best_num_epochs = do_grid_search(L_TRAIN_SIZE_LIST, L_VAL_SIZE_LIST, L_RETRAIN_FREQ_LIST, L_INPUT_DIM_LIST, L_HIDDEN_DIM_LIST, L_MAX_FOLDS, True)

Maximum training/test folds = 5
Validation size = 180, Train size = 504, Hidden dim = 13, Input dimension: 2, Retrain freq = 36, Parameter count = 53
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.595, Time = 0.001, number of epochs = 1
----------------------------------------------------------------
Validation size = 180, Train size = 504, Hidden dim = 13, Input dimension: 2, Retrain freq = 90, Parameter count = 53
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.421, Time = 0.001, number of epochs = 1
----------------------------------------------------------------
Validation size = 180, Train size = 504, Hidden dim = 13, Input dimension: 2, Retrain freq = 180, Parameter count = 53
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.628, Time = 0.003, number of epochs = 2
----------------------------------------------------------------
Validation size = 180, Train size = 504, Hidden dim = 13, Input dimension: 3, Retrain freq = 36, Parameter co

In [21]:
#Best validation size = 180, Best train size = 1512, Best hidden dimension = 17, Best input dimension = 4, Best retrain Freq = 180, Best parameter count = 103, Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.985, Best time = 0.001, Best number of epochs = 1

## Run TSCV for mixed Model

In [18]:
class SignalCombiner(nn.Module):
    
    def __init__(self, lt_pipeline, lt_model, st_pipeline, st_model):
        super().__init__()

        self.lt_pipeline = lt_pipeline
        self.lt_model = lt_model

        self.st_pipeline = st_pipeline
        self.st_model = st_model

        # Unconstrained scalar
        self.alpha = nn.Parameter(torch.tensor(0.0))  # sigmoid(0)=0.5

    def update_lt_model(self, lt_pipeline, lt_model):
        self.lt_pipeline = lt_pipeline
        self.lt_model = lt_model

    def update_st_model(self, st_pipeline, st_model):
        self.st_pipeline = st_pipeline
        self.st_model = st_model
    
    def forward(self, x):

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                category=FutureWarning,
                module="sklearn.pipeline"
            )
            lt_x = torch.tensor(self.lt_pipeline.transform(x.detach().cpu().numpy()), dtype=torch.float32)
            st_x = torch.tensor(self.st_pipeline.transform(x.detach().cpu().numpy()), dtype=torch.float32)        
        
        with torch.no_grad():
            lt_signal = torch.sigmoid(self.lt_model(lt_x)) * 2
            st_signal = torch.sigmoid(self.st_model(st_x)) * 2

        w = torch.sigmoid(self.alpha)  # w ∈ (0,1)
        combined = w * lt_signal + (1 - w) * st_signal
        
        return combined

In [19]:
def train_comb_model(lt_pipeline, lt_model, st_pipeline, st_model, train_ten, forward_rets_ten, risk_free_rate_ten, val_ten, forward_rets_val_ten, risk_free_rate_val_ten, batch_size, shuffle, num_epochs, patience, verbose):

    start_time = time.time()
    
    #Datset to create training sequences based on training data.
    train_dataset = SimpleDataset(train_ten, forward_rets_ten, risk_free_rate_ten)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
    
    model = SignalCombiner(lt_pipeline, lt_model, st_pipeline, st_model)
    
    #Datset to create validation sequences based on validation data.
    if val_ten is not None:
        
        val_dataset = SimpleDataset(val_ten, forward_rets_val_ten, risk_free_rate_val_ten)
        val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        val_batch, val_batch_fr_ten, val_batch_rf_ten, val_idx_ten = next(iter(val_dataloader))

    optimizer = torch.optim.Adam(list(model.parameters()), lr=0.00001) 
    
    total_loss = 0
    total_adj_sharpe = 0
    total_val_adj_sharpe = 0
    
    NUM_EPOCHS = 50

    early_stop_count = 0
    best_val_adj_sharpe = -np.inf
    adj_sharpe_best_epoch = -np.inf
    best_epoch = 0
    best_val_preds_ten = None
    best_model_state = None
    
    for epoch in range(1, NUM_EPOCHS+1):

        for name, p in model.named_parameters():
            if not torch.isfinite(p).all():
                print("Bad parameter:", name)
        
        if num_epochs is not None:
            if epoch > num_epochs:
                best_model_state = copy.deepcopy(model.state_dict())
                break
        
        epoch_loss = 0.0
        epoch_adj_sharpe = 0.0
    
        all_preds = []
        batch_count = 0
        for batch, batch_fr_ten, batch_rf_ten, idx_ten in train_dataloader:
            
            optimizer.zero_grad()
            
            #Signal predictions using the long-term model.
            preds_ten = model(batch)
            
            #Compute loss as negative smoothed_score (maximize Sharpe)
            batch_loss = -smoothed_score(
                forward_returns_ten=batch_fr_ten,
                risk_free_rate_ten=batch_rf_ten,
                signal_pred_ten=preds_ten
            )
            
            submission = pd.DataFrame()
            submission['prediction'] = preds_ten.detach().numpy().flatten()
            
            solution = pd.DataFrame()
            solution['forward_returns'] = batch_fr_ten.detach().numpy().flatten()
            solution['risk_free_rate'] = batch_rf_ten.detach().numpy().flatten()
            
            batch_adj_sharpe = round(score(solution, submission, None), 3)

            epoch_loss += batch_loss.item()
            epoch_adj_sharpe += batch_adj_sharpe
            
            batch_loss.backward()
            batch_count += 1

            optimizer.step()
        
        mean_loss_for_epoch = round(epoch_loss/batch_count, 3)
        mean_adj_sharpe_for_epoch = round(epoch_adj_sharpe/batch_count, 3)

        if val_ten is not None:

            #Signal predictions on the validation data using the long-term model.
            val_preds_ten = model(val_batch)
            
            val_submission = pd.DataFrame()
            val_submission['prediction'] = val_preds_ten.detach().numpy().flatten()
            val_solution = pd.DataFrame()
            val_solution['forward_returns'] = val_batch_fr_ten.detach().numpy().flatten()
            val_solution['risk_free_rate'] = val_batch_rf_ten.detach().numpy().flatten()
            
            val_adj_sharpe = round(score(val_solution, val_submission, None), 3)

            #Update the best validation score so far.
            if best_val_adj_sharpe < val_adj_sharpe:
               
                best_val_adj_sharpe = val_adj_sharpe
                adj_sharpe_best_epoch = mean_adj_sharpe_for_epoch
                early_stop_count = 0
                best_epoch = epoch
                best_val_preds_ten = val_preds_ten
                best_model_state = copy.deepcopy(model.state_dict())
    
            else:
                if early_stop_count > patience:
                    break
    
                else:
                    early_stop_count += 1
            
            if verbose == True:
                print(f"Epoch {epoch}: Mean Training Loss = {mean_loss_for_epoch}, Training mean adj sharpe ratio = {mean_adj_sharpe_for_epoch}, Validation mean adj sharpe ratio = {val_adj_sharpe}")
    
    time_taken = round((time.time() - start_time)/60, 3)
    if verbose == True:
        print(f"Best epoch: {best_epoch}")
        print(f"Best mean adjusted sharpe ratio (Train): {adj_sharpe_best_epoch}")
        print(f"Best mean adjusted sharpe ratio (Val): {best_val_adj_sharpe}")
        print(f"Time taken = {time_taken} minutes")
    
    load_result = model.load_state_dict(best_model_state)
    # Optional sanity check
    assert len(load_result.missing_keys) == 0
    assert len(load_result.unexpected_keys) == 0
    return model, 0, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, best_epoch, best_val_preds_ten

In [20]:
def train_comb_model_wrapper(lt_pipeline, lt_model, st_pipeline, st_model, train_ten, forward_rets_ten, risk_free_rate_ten, val_ten, forward_rets_val_ten, risk_free_rate_val_ten, batch_size, shuffle, num_epochs = None, patience = 2, verbose = False, retrain_freq = None):

    time_taken = 0
    tot_num_epochs = 0
    best_train_preds_ten = None
    best_val_preds_ten = None
    
    if ((retrain_freq is None) or (val_ten is None) or (val_ten.shape[0] == retrain_freq)):
        model, param_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, best_epoch, best_val_preds_ten = train_comb_model(lt_pipeline, lt_model, st_pipeline, st_model, train_ten, forward_rets_ten, risk_free_rate_ten, val_ten, forward_rets_val_ten, risk_free_rate_val_ten, batch_size, shuffle, num_epochs, patience, verbose)

    #Retrain frequency has been meaningfully defined.
    else:

        combined_ten = torch.concat([train_ten, val_ten], axis = 0)
        comb_forward_rets_ten = torch.concat([forward_rets_ten, forward_rets_val_ten], axis = 0)
        comb_risk_free_rate_ten = torch.concat([risk_free_rate_ten, risk_free_rate_val_ten], axis = 0)
        
        train_subset_start = 0
        train_subset_end = train_ten.shape[0] - 1
        
        val_subset_start = train_subset_end + 1
        val_subset_end = val_subset_start + retrain_freq - 1

        retrain_count = ceil(combined_ten.shape[0]/retrain_freq)
        while val_subset_start < combined_ten.shape[0]:
            
            train_subset_ten = combined_ten[train_subset_start:(train_subset_end+1)]
            forward_returns_subset_ten = comb_forward_rets_ten[train_subset_start:(train_subset_end+1)]
            risk_free_rate_subset_ten = comb_risk_free_rate_ten[train_subset_start:(train_subset_end+1)]
            
            val_subset_ten = combined_ten[val_subset_start:(val_subset_end+1)]
            forward_rets_val_subset_ten = comb_forward_rets_ten[val_subset_start:(val_subset_end+1)]
            risk_free_rate_val_subset_ten = comb_risk_free_rate_ten[val_subset_start:(val_subset_end+1)]

            model, param_count, _, _, curr_time_taken, curr_num_epochs, curr_val_preds_ten = train_comb_model(lt_pipeline, lt_model, st_pipeline, st_model, train_subset_ten, forward_returns_subset_ten, risk_free_rate_subset_ten, val_subset_ten, forward_rets_val_subset_ten, risk_free_rate_val_subset_ten, batch_size, shuffle, num_epochs, patience, verbose)
            
            time_taken += curr_time_taken
            tot_num_epochs += curr_num_epochs
            if best_val_preds_ten is None:
                best_val_preds_ten = curr_val_preds_ten
            else:
                best_val_preds_ten = torch.concat([best_val_preds_ten, curr_val_preds_ten])
            
            train_subset_start += retrain_freq
            train_subset_end += retrain_freq
            
            val_subset_start += retrain_freq
            val_subset_end += retrain_freq

        time_taken /= retrain_count
        best_epoch = tot_num_epochs/retrain_count

    if val_ten is not None:
        val_solution_df = pd.DataFrame()
        val_solution_df['forward_returns'] = pd.Series(forward_rets_val_ten.detach().numpy().flatten())
        val_solution_df['risk_free_rate'] = pd.Series(risk_free_rate_val_ten.detach().numpy().flatten())
    
        val_submission_df = pd.DataFrame()
        val_submission_df['prediction'] = pd.Series(best_val_preds_ten.detach().numpy().flatten())
        
        best_val_adj_sharpe = round(score(val_solution_df, val_submission_df, None), 3)
    else:
        best_val_adj_sharpe = None

    return model, param_count, 0, best_val_adj_sharpe, time_taken, best_epoch

In [21]:
def run_comb_model_tscv(train_size, lt_train_size, st_train_size, val_size, retrain_freq, comp_dataset_df, max_folds, batch_size, shuffle = False, verbose = False):

    max_train_size = max(train_size, lt_train_size, st_train_size)
    
    total_size = comp_dataset_df.shape[0]
    num_possible_folds = floor((total_size - max_train_size)/val_size)
    num_actual_folds = min(num_possible_folds, max_folds)

    if verbose == True:
        print(f'Folds to be created = {num_actual_folds}')

    comp_dataset_df['lagged_forward_returns'] = comp_dataset_df['forward_returns'].shift(1)
    comp_dataset_df['lagged_risk_free_rate'] = comp_dataset_df['risk_free_rate'].shift(1)
    comp_dataset_df['lagged_market_forward_excess_returns'] = comp_dataset_df['market_forward_excess_returns'].shift(1)
    
    mean_adj_sharpe_across_fold = 0
    mean_val_adj_sharpe_across_fold = 0
    mean_time_taken_across_fold = 0
    mean_num_epochs = 0
    
    fold_no = 0
    
    #Create as many folds as possible.
    while fold_no < num_actual_folds:

        if verbose == True:
            print(f'Fold index = {fold_no}')

        if max_folds is not None:
            if fold_no >= max_folds:
                break

        if fold_no == 0:
            
            #Initialise the indices for the largest train size.
            max_fold_start_index = comp_dataset_df.shape[0] - num_actual_folds * val_size - max_train_size
            max_fold_end_index = max_fold_start_index + max_train_size - 1

        else:

            #Update the indices for the largest train size.
            max_fold_start_index = max_fold_start_index + val_size
            max_fold_end_index = max_fold_end_index + val_size

        lt_train_set_start_index = max_fold_end_index - lt_train_size + 1
        lt_train_end_index = lt_train_set_start_index + lt_train_size - 1
        lt_train_start_date = comp_dataset_df.iloc[lt_train_set_start_index, 0]
        lt_train_end_date = comp_dataset_df.iloc[lt_train_end_index, 0]

        st_train_set_start_index = max_fold_end_index - st_train_size + 1
        st_train_set_end_index = st_train_set_start_index + st_train_size - 1
        st_train_start_date = comp_dataset_df.iloc[st_train_set_start_index, 0]
        st_train_end_date = comp_dataset_df.iloc[st_train_set_end_index, 0]

        train_set_start_index = max_fold_end_index - train_size + 1
        train_set_end_index = train_set_start_index + train_size - 1
        train_start_date = comp_dataset_df.iloc[train_set_start_index, 0]
        train_end_date = comp_dataset_df.iloc[train_set_end_index, 0]

        val_start_index = max_fold_end_index + 1
        val_end_index = val_start_index + val_size - 1 
        val_start_date = comp_dataset_df.iloc[val_start_index, 0]
        val_end_date = comp_dataset_df.iloc[val_end_index, 0]

        if verbose == True:
            print(f'Fold: {fold_no}')
            print(f'Long term model - Training dates: {lt_train_start_date} - {lt_train_end_date}')
            print(f'Short term model - Training dates: {st_train_start_date} - {st_train_end_date}')
            print(f'Combined model - Training dates: {train_start_date} - {train_end_date}, Validation Dates: {val_start_date} - {val_end_date}')

        #We will then train the long-term model here.
        lt_train_set_df = comp_dataset_df.iloc[lt_train_set_start_index:(lt_train_end_index+1), :]
        lt_forward_returns_ten = torch.tensor(lt_train_set_df['forward_returns'].values, dtype=torch.float32)
        lt_risk_free_rate_ten = torch.tensor(lt_train_set_df['risk_free_rate'].values, dtype=torch.float32)
        lt_train_set_df = lt_train_set_df.drop(['forward_returns', 'risk_free_rate', 'market_forward_excess_returns'], axis = 1)
        lt_pipeline = Pipeline([('cleaner', CleanerTransformer()), ('latent', LatentTransformer(2))])
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                category=FutureWarning,
                module="sklearn.pipeline"
            )
            lt_pipeline = lt_pipeline.fit(lt_train_set_df.values)
            lt_train_latent_ten = torch.tensor(lt_pipeline.transform(lt_train_set_df.values), dtype=torch.float32)
        lt_model, _, _, _, _, _ = train_model_wrapper(lt_train_latent_ten, lt_forward_returns_ten, lt_risk_free_rate_ten, None, None, None, 2, 15, 1, 180, False, num_epochs = 1, patience = 2, verbose = False, retrain_freq = 180)
        
        #We will then train the short-term model here.
        st_train_set_df = comp_dataset_df.iloc[st_train_set_start_index:(st_train_set_end_index+1), :]
        st_forward_returns_ten = torch.tensor(st_train_set_df['forward_returns'].values, dtype=torch.float32)
        st_risk_free_rate_ten = torch.tensor(st_train_set_df['risk_free_rate'].values, dtype=torch.float32)
        st_train_set_df = st_train_set_df.drop(['forward_returns', 'risk_free_rate', 'market_forward_excess_returns'], axis = 1)
        st_pipeline = Pipeline([('cleaner', CleanerTransformer()), ('latent', LatentTransformer(4))])
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                category=FutureWarning,
                module="sklearn.pipeline"
            )
            st_pipeline = st_pipeline.fit(st_train_set_df.values)
            st_train_latent_ten = torch.tensor(st_pipeline.transform(st_train_set_df.values), dtype=torch.float32)
        st_model, _, _, _, _, _ = train_model_wrapper(st_train_latent_ten, st_forward_returns_ten, st_risk_free_rate_ten, None, None, None, 4, 8, 1, 180, False, num_epochs = 1, patience = 2, verbose = False, retrain_freq = 20)    
        
        #We will then train the combined model here.
        train_set_df = comp_dataset_df.iloc[train_set_start_index:(train_set_end_index+1), :]
        forward_returns_ten = torch.tensor(train_set_df['forward_returns'].values, dtype=torch.float32)
        risk_free_rate_ten = torch.tensor(train_set_df['risk_free_rate'].values, dtype=torch.float32)
        train_set_df = train_set_df.drop(['forward_returns', 'risk_free_rate', 'market_forward_excess_returns'], axis = 1)
        train_ten = torch.tensor(train_set_df.values, dtype=torch.float32)
        val_set_df = comp_dataset_df.iloc[val_start_index:(val_end_index+1), :]
        forward_returns_val_ten = torch.tensor(val_set_df['forward_returns'].values, dtype=torch.float32)
        risk_free_rate_val_ten = torch.tensor(val_set_df['risk_free_rate'].values, dtype=torch.float32)
        val_set_df = val_set_df.drop(['forward_returns', 'risk_free_rate', 'market_forward_excess_returns'], axis = 1)
        val_ten = torch.tensor(val_set_df.values, dtype=torch.float32)

        _, param_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, num_epochs = train_comb_model_wrapper(lt_pipeline, lt_model, st_pipeline, st_model, train_ten, forward_returns_ten, risk_free_rate_ten, val_ten, forward_returns_val_ten, risk_free_rate_val_ten, 180, False, num_epochs = None, patience = 2, verbose = verbose, retrain_freq = retrain_freq)
                                                                                     
        mean_adj_sharpe_across_fold += adj_sharpe_best_epoch
        mean_val_adj_sharpe_across_fold += best_val_adj_sharpe
        mean_time_taken_across_fold += time_taken
        mean_num_epochs += num_epochs
        
        fold_no += 1

        if verbose == True:
            print("---------------------------------------------------------------")

    mean_adj_sharpe_across_fold = round(mean_adj_sharpe_across_fold/fold_no, 3)
    mean_val_adj_sharpe_across_fold = round(mean_val_adj_sharpe_across_fold/fold_no, 3)
    mean_time_taken_across_fold = round(mean_time_taken_across_fold/fold_no, 3)
    mean_num_epochs = int(round(mean_num_epochs/fold_no, 0))

    if verbose == True:
        print(f"Number of folds = {fold_no}")
        print(f'Mean adjusted sharpe ratio (Training) across folds = {mean_adj_sharpe_across_fold}')
        print(f'Mean adjusted sharpe ratio (Validation) across folds = {mean_val_adj_sharpe_across_fold}')
        print(f'Mean time taken across folds = {mean_time_taken_across_fold}')
        print(f'Mean number of epochs across folds = {mean_num_epochs}')
    
    return param_count, mean_adj_sharpe_across_fold, mean_val_adj_sharpe_across_fold, mean_time_taken_across_fold, mean_num_epochs

In [22]:
def do_grid_search_comb_model(train_size_list, lt_train_size, st_train_size, val_size, retrain_freq_list, max_folds = None, verbose = False):

    best_mean_adj_sharpe_across_fold = None
    best_mean_val_adj_sharpe_across_fold = None
    best_train_size = None
    best_val_size = None
    
    best_mean_time = None
    best_num_epochs = None
    best_retrain_freq = None

    for train_size in train_size_list:

        for retrain_freq in retrain_freq_list:

            batch_size = val_size

            param_count, mean_adj_sharpe_across_fold, mean_val_adj_sharpe_across_fold, mean_time_taken_across_fold, num_epochs = run_comb_model_tscv(train_size, lt_train_size, st_train_size, val_size, retrain_freq, comp_dataset_df, max_folds = max_folds, batch_size=batch_size, shuffle = False, verbose = False)
    
            if verbose == True:
                print(f'Validation size = {val_size}, Train size = {train_size}, Retrain freq = {retrain_freq}, Parameter count = {param_count}')
                print(f'Training adjusted sharpe = {mean_adj_sharpe_across_fold}, Validation adjusted sharpe = {mean_val_adj_sharpe_across_fold}, Time = {mean_time_taken_across_fold}, number of epochs = {num_epochs}')
                print('----------------------------------------------------------------')
            
            if ((best_mean_adj_sharpe_across_fold is None) or (best_mean_val_adj_sharpe_across_fold is None)):
                best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                best_train_size = train_size
                best_param_count = param_count
                best_val_size = val_size
                best_mean_time = mean_time_taken_across_fold
                best_num_epochs = num_epochs
                best_retrain_freq = retrain_freq
    
            elif ((best_mean_val_adj_sharpe_across_fold == mean_val_adj_sharpe_across_fold) & (best_param_count > param_count)):
                best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                best_input_dim = input_dim
                best_param_count = param_count
                best_val_size = val_size
                best_mean_time = mean_time_taken_across_fold
                best_num_epochs = num_epochs
                best_retrain_freq = retrain_freq
                
            elif best_mean_val_adj_sharpe_across_fold < mean_val_adj_sharpe_across_fold:
                best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                best_param_count = param_count
                best_val_size = val_size
                best_mean_time = mean_time_taken_across_fold
                best_num_epochs = num_epochs
                best_retrain_freq = retrain_freq
    
            else:
                pass
        print("--------------------------------------------------------------------------------")

    print(f'\nBest validation size = {best_val_size}, Best train size = {best_train_size}, Best retrain Freq = {retrain_freq}, Best parameter count = {best_param_count}, Training adjusted sharpe = {best_mean_adj_sharpe_across_fold}, Validation adjusted sharpe = {best_mean_val_adj_sharpe_across_fold}, Best time = {best_mean_time}, Best number of epochs = {best_num_epochs}')

    return best_val_size, best_train_size, retrain_freq, best_param_count, best_mean_adj_sharpe_across_fold, best_mean_val_adj_sharpe_across_fold, best_mean_time, best_num_epochs

In [430]:
st_best_val_size = 1
st_best_train_size = int(252/3)
st_best_hidden_dim = 8
st_best_input_dim = 4
st_best_retrain_freq = 20
st_best_num_epochs = 1

In [431]:
lt_best_val_size = 1
lt_best_train_size = 252*5
lt_best_hidden_dim = 15
lt_best_input_dim = 2
lt_best_retrain_freq = 180
lt_best_num_epochs = 1

In [438]:
C_TRAIN_SIZE_LIST = sorted([252 * 3, 252 * 2, 252, int(252/2), int(252/3), int(252/4), int(252/5)])#, 252 * 2, 252 * 1, int(252/2), int(252/3)], reverse = False)
VAL_SIZE = 180
C_RETRAIN_FREQ_LIST = [18, 36, 90, 180]

C_MAX_FOLDS = 5

c_best_val_size, c_best_train_size, c_retrain_freq, c_best_param_count, c_best_mean_adj_sharpe_across_fold, c_best_mean_val_adj_sharpe_across_fold, c_best_mean_time, c_best_num_epochs = do_grid_search_comb_model(C_TRAIN_SIZE_LIST, lt_best_train_size, st_best_train_size, VAL_SIZE, C_RETRAIN_FREQ_LIST, C_MAX_FOLDS, True)
                                                                                                                                                                                                                               #do_grid_search_comb_model(train_size_list, lt_train_size, st_train_size, val_size, retrain_freq_list, max_folds, True)

Validation size = 180, Train size = 50, Retrain freq = 18, Parameter count = 0
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.691, Time = 0.001, number of epochs = 1
----------------------------------------------------------------
Validation size = 180, Train size = 50, Retrain freq = 36, Parameter count = 0
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.63, Time = 0.001, number of epochs = 1
----------------------------------------------------------------
Validation size = 180, Train size = 50, Retrain freq = 90, Parameter count = 0
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.616, Time = 0.001, number of epochs = 1
----------------------------------------------------------------
Validation size = 180, Train size = 50, Retrain freq = 180, Parameter count = 0
Training adjusted sharpe = 0.0, Validation adjusted sharpe = 0.693, Time = 0.001, number of epochs = 1
----------------------------------------------------------------
------------

# Attempting the best model on a Kaggle simulated environment

In [22]:
#Best validation size = 180, Best train size = 84, Best hidden dimension = 8, Best input dimension = 4, Best retrain Freq = 20, Best parameter count = 49, Training adjusted sharpe = 0.0, Validation adjusted sharpe = 1.092, Best time = 0.001, Best number of epochs = 1

In [30]:
torch.manual_seed(42)

MIN_INVESTMENT = 0
MAX_INVESTMENT = 2

train_file = './kaggle/input/hull-tactical-market-prediction/train.csv'
train_df = pd.read_csv(train_file, skip_blank_lines = True)
#train_df will increase in size as it is augmented with new training rows uring each invocation of predict.

test_file = './kaggle/input/hull-tactical-market-prediction/test.csv'
test_df = pd.read_csv(test_file, skip_blank_lines = True)

train_df['lagged_forward_returns'] = train_df['forward_returns'].shift(1)
train_df['lagged_risk_free_rate'] = train_df['risk_free_rate'].shift(1)
train_df['lagged_market_forward_excess_returns'] = train_df['market_forward_excess_returns'].shift(1)

best_val_size = 1
best_train_size = int(252/3)
best_hidden_dim = 8
best_input_dim = 4
best_retrain_freq = 20
best_num_epochs = 1

lt_model = None
pipeline = None
retrain_df = None
count = 0

time_list = []
pred_list = []

In [31]:
count = 0
def predict(test: pl.DataFrame) -> float: #pl.DataFrame:
    
    global count, train_df, lt_model, retrain_df, pipeline

    start_time = time.time()
    
    test_df = test.to_pandas() #To exclude some blank columns that are being read.

    #Copying lagged version of 'forward_returns', 'risk_free_rate', and 'market_forward_excess_returns' from the current test row to the 
    #non-lagged versions of the same fields in the latests training row.
    train_df.iloc[-1, [-3, -2, -1]] = test_df.iloc[0, [-3, -2, -1]]
    
    #Retrain the model.
    if count%best_retrain_freq == 0:

        #Prepare the training data. This is the latest subset of train_df.
        retrain_df = train_df.iloc[-best_train_size:]
        forward_returns_ten = torch.tensor(retrain_df[-best_train_size:]['forward_returns'].values, dtype=torch.float32)
        risk_free_rate_ten = torch.tensor(retrain_df[-best_train_size:]['risk_free_rate'].values, dtype=torch.float32)
        retrain_df = retrain_df.drop(['forward_returns', 'risk_free_rate', 'market_forward_excess_returns'], axis = 1)        
        
        #Retrain the model.
        pipeline = Pipeline([('cleaner', CleanerTransformer()),
                         ('latent', LatentTransformer(best_input_dim))])
        
        with warnings.catch_warnings():
            warnings.filterwarnings(
                            "ignore",
                            category=FutureWarning,
                            module="sklearn.pipeline"
                        )
            pipeline = pipeline.fit(retrain_df.values)
            lt_train_latent_ten = torch.tensor(pipeline.transform(retrain_df.values), dtype=torch.float32)
        
        lt_model, param_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, num_epochs = train_model_wrapper(lt_train_latent_ten, forward_returns_ten, risk_free_rate_ten, None, None, None, best_input_dim, best_hidden_dim, 1, 180, False, num_epochs = best_num_epochs, patience = 2, verbose = False, retrain_freq = best_retrain_freq)        

    #Make the prediction
    test_df = test_df.drop('is_scored', axis = 1)
    with warnings.catch_warnings():
        warnings.filterwarnings(
                        "ignore",
                        category=FutureWarning,
                        module="sklearn.pipeline"
                    )
        lt_val_latent_ten = torch.tensor(pipeline.transform(test_df.values), dtype=torch.float32)
    
    lt_pred = lt_model(lt_val_latent_ten.unsqueeze(0))
    lt_pred = torch.sigmoid(lt_pred) * 2

    pred_time = round((time.time() - start_time)/60, 3)
    print(f'Count = {count}, Predictions = {lt_pred.item()}, Prediction time = {pred_time} minutes')
    
    count += 1
    pred_list.append(lt_pred.item())
    time_list.append(pred_time)
    return lt_pred.item()

In [32]:
inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('./kaggle/input/hull-tactical-market-prediction/',))

Count = 0, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 1, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 2, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 3, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 4, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 5, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 6, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 7, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 8, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 9, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 10, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 11, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 12, Predictions = 0.6663790345191956, Prediction time = 0.0 minutes
Count = 13, Prediction

In [34]:
print(f'Mean prediction time = {np.mean(time_list)}')
print(f'Minimum prediction time = {np.min(time_list)}')
print(f'Maximum prediction time = {np.max(time_list)}')
print("")
print(f'Mean prediction = {np.mean(pred_list)}')
print(f'Minimum prediction = {np.min(pred_list)}')
print(f'Maximum prediction = {np.max(pred_list)}')

Mean prediction time = 3.3333333333333335e-05
Minimum prediction time = 0.0
Maximum prediction time = 0.001

Mean prediction = 1.0032476120524936
Minimum prediction = 0.4753839373588562
Maximum prediction = 1.5706956386566162


In [49]:
submission = pd.DataFrame()

#Compute the score on the predictions generated above.
submission['prediction'] = pd.Series(pred_list)

solution = pd.DataFrame()
solution['forward_returns'] = comp_dataset_df.iloc[-180:,:]['forward_returns']
solution['risk_free_rate'] = comp_dataset_df.iloc[-180:,:]['risk_free_rate']

In [66]:
def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    """
    Calculates a custom evaluation metric (volatility-adjusted Sharpe ratio).

    This metric penalizes strategies that take on significantly more volatility
    than the underlying market.

    Returns:
        float: The calculated adjusted Sharpe ratio.
    """

    if not pd.api.types.is_numeric_dtype(submission['prediction']):
        raise ParticipantVisibleError('Predictions must be numeric')

    solution = solution
    solution['position'] = submission['prediction'].values
    print(submission['prediction'])
    print(solution['position'])

    if solution['position'].max() > MAX_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].max()} exceeds maximum of {MAX_INVESTMENT}')
    if solution['position'].min() < MIN_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].min()} below minimum of {MIN_INVESTMENT}')

    solution['strategy_returns'] = solution['risk_free_rate'] * (1 - solution['position']) + solution['position'] * solution['forward_returns']

    # Calculate strategy's Sharpe ratio
    strategy_excess_returns = solution['strategy_returns'] - solution['risk_free_rate']
    strategy_excess_cumulative = (1 + strategy_excess_returns).prod()
    strategy_mean_excess_return = (strategy_excess_cumulative) ** (1 / len(solution)) - 1
    strategy_std = solution['strategy_returns'].std()

    trading_days_per_yr = 252
    if strategy_std == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')
    
    sharpe = strategy_mean_excess_return / strategy_std * np.sqrt(trading_days_per_yr)
    
    strategy_volatility = float(strategy_std * np.sqrt(trading_days_per_yr) * 100)

    # Calculate market return and volatility
    market_excess_returns = solution['forward_returns'] - solution['risk_free_rate']
    market_excess_cumulative = (1 + market_excess_returns).prod()
    market_mean_excess_return = (market_excess_cumulative) ** (1 / len(solution)) - 1
    market_std = solution['forward_returns'].std()

    market_volatility = float(market_std * np.sqrt(trading_days_per_yr) * 100)

    if market_volatility == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')

    # Calculate the volatility penalty
    excess_vol = max(0, strategy_volatility / market_volatility - 1.2) if market_volatility > 0 else 0
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    return_gap = max(
        0,
        (market_mean_excess_return - strategy_mean_excess_return) * 100 * trading_days_per_yr,
    )
    return_penalty = 1 + (return_gap**2) / 100
    
    # Adjust the Sharpe ratio by the volatility and return penalty
    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)
    return min(float(adjusted_sharpe), 1_000_000)

In [67]:
round(score(solution, submission, None), 3)

0      0.666379
1      0.666379
2      0.666379
3      0.666379
4      0.666379
         ...   
175    1.291962
176    1.291962
177    1.291962
178    1.291962
179    1.291962
Name: prediction, Length: 180, dtype: float64
8841    0.666379
8842    0.666379
8843    0.666379
8844    0.666379
8845    0.666379
          ...   
9016    1.291962
9017    1.291962
9018    1.291962
9019    1.291962
9020    1.291962
Name: position, Length: 180, dtype: float64


1.318