"""
The evaluation API requires that you set up a server which will respond to inference requests.
We have already defined the server; you just need write the predict function.
When we evaluate your submission on the hidden test set the client defined in `default_gateway` will run in a different container
with direct access to the hidden test set and hand off the data timestep by timestep.

Your code will always have access to the published copies of the copmetition files.
"""

In [1]:
import os

import pandas as pd
import polars as pl
import numpy as np

import kaggle_evaluation.default_inference_server
import importlib

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

import warnings

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import time
import shutil

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x1d323a45670>

# Read the entire train file

In [3]:
comp_dataset_df = pd.read_csv("./kaggle/input/hull-tactical-market-prediction/train complete.csv")
comp_dataset_df.head(3)

Unnamed: 0,date_id,D1,D2,D3,D4,D5,D6,D7,D8,D9,...,V3,V4,V5,V6,V7,V8,V9,forward_returns,risk_free_rate,market_forward_excess_returns
0,0,0,0,0,1,1,0,0,0,1,...,,,,,,,,-0.002421,0.000301,-0.003038
1,1,0,0,0,1,1,0,0,0,1,...,,,,,,,,-0.008495,0.000303,-0.009114
2,2,0,0,0,1,0,0,0,0,1,...,,,,,,,,-0.009624,0.000301,-0.010243


# Transformers for data transformation

## Transformers for creating lagged features

In [4]:
class LagTransformer(BaseEstimator, TransformerMixin):
    
    def __init__(self, col_ind_list):
        self.col_ind_list = col_ind_list
        
    def fit(self, X):
        self.fitted = True
        return self
        
    def transform(self, X):

        X_shifted = np.empty_like(X, dtype=float)
        
        for col_ind in self.col_ind_list:

            X_shifted[1:, col_ind] = X[:-1, col_ind]
            X_shifted[0, col_ind] = np.nan
        
        return X_shifted

## Transformers for cleaning data

In [5]:
class CleanerTransformer(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass
        
    def fit(self, X):

        self.col_mean_dict = {}

        #For each column, determine the mean value.
        for col_ind in range(X.shape[1]):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=RuntimeWarning)
                col_mean = np.nanmean(X[:, col_ind])
                
            if np.isnan(col_mean):
                col_mean = 0.0
            self.col_mean_dict[col_ind] = col_mean
            
        return self
        
    def transform(self, X):

        X_copy = X.copy().astype(float)

        for col_ind in range(X_copy.shape[1]):
            nan_mask = np.isnan(X_copy[:, col_ind])
            X_copy[nan_mask, col_ind] = self.col_mean_dict[col_ind]

        
        return X_copy

## Transformer for creating latent features

In [6]:
class LatentTransformer(BaseEstimator, TransformerMixin):
    
    def __init__(self, num_sing_vals):
        self.num_sing_vals = num_sing_vals
        
    def fit(self, X, y=None):

        U, s, Vt = np.linalg.svd(X)
        
        self.U_ld = U[:, :self.num_sing_vals]
        self.S_ld = s[0:self.num_sing_vals].reshape((1, self.num_sing_vals)) * np.eye(self.num_sing_vals)
        self.Vt_ld = Vt[0:self.num_sing_vals, :]

        return self
        
    def transform(self, X):

        latent = np.dot(X, self.Vt_ld.T)
        
        return latent

# Functions to Train the model

In [7]:
MIN_INVESTMENT = 0
MAX_INVESTMENT = 2

class ParticipantVisibleError(Exception):
    pass

In [8]:
def smoothed_score(forward_returns_ten: torch.Tensor, risk_free_rate_ten: torch.Tensor, signal_pred_ten: torch.Tensor) -> torch.Tensor:
    """
    Calculates a smoothed version of the competition metric.

    This metric penalizes strategies that take on significantly more volatility than the underlying market.

    Returns:
        float: The calculated adjusted Sharpe ratio.
    """
    
    # Clip positions to allowed range
    signal_pred = torch.clamp(signal_pred_ten, MIN_INVESTMENT, MAX_INVESTMENT)

    #Strategy returns
    strategy_returns = risk_free_rate_ten * (1 - signal_pred) + signal_pred * forward_returns_ten

    # Calculate strategy's Sharpe ratio
    strategy_excess_returns = strategy_returns - risk_free_rate_ten
    strategy_excess_cumulative = (1 + strategy_excess_returns).prod()
    strategy_mean_excess_return = (strategy_excess_cumulative) ** (1.0 / forward_returns_ten.shape[0]) - 1
    strategy_std = torch.std(strategy_returns, unbiased = False)

    trading_days_per_yr = torch.tensor(252, dtype=torch.float32)
    if strategy_std == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')
    sharpe = strategy_mean_excess_return / strategy_std * torch.sqrt(trading_days_per_yr)
    strategy_volatility = strategy_std * torch.sqrt(trading_days_per_yr) * 100.0

    # Calculate market return and volatility
    market_excess_returns = forward_returns_ten - risk_free_rate_ten
    market_excess_cumulative = (1 + market_excess_returns).prod()
    market_mean_excess_return = (market_excess_cumulative) ** (1 / forward_returns_ten.shape[0]) - 1
    market_std = torch.std(forward_returns_ten, unbiased = False)
    
    market_volatility = market_std * torch.sqrt(trading_days_per_yr) * torch.tensor(100.0, dtype=torch.float32)

    if market_volatility == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')

    # Calculate the volatility penalty
    excess_vol = torch.relu(strategy_volatility / market_volatility - 1.2)
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    return_gap = torch.relu((market_mean_excess_return - strategy_mean_excess_return) * torch.tensor(100.0, dtype=torch.float32) * trading_days_per_yr)
    return_penalty = 1 + (return_gap**2) / torch.tensor(100.0, dtype=torch.float32)

    # Adjust the Sharpe ratio by the volatility and return penalty
    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)
    adjusted_sharpe = torch.clamp(adjusted_sharpe, max = 1_000_000)
    
    return adjusted_sharpe

In [9]:
def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    """
    Calculates a custom evaluation metric (volatility-adjusted Sharpe ratio).

    This metric penalizes strategies that take on significantly more volatility
    than the underlying market.

    Returns:
        float: The calculated adjusted Sharpe ratio.
    """

    if not pd.api.types.is_numeric_dtype(submission['prediction']):
        raise ParticipantVisibleError('Predictions must be numeric')

    solution = solution
    solution['position'] = submission['prediction']

    if solution['position'].max() > MAX_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].max()} exceeds maximum of {MAX_INVESTMENT}')
    if solution['position'].min() < MIN_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].min()} below minimum of {MIN_INVESTMENT}')

    solution['strategy_returns'] = solution['risk_free_rate'] * (1 - solution['position']) + solution['position'] * solution['forward_returns']

    # Calculate strategy's Sharpe ratio
    strategy_excess_returns = solution['strategy_returns'] - solution['risk_free_rate']
    strategy_excess_cumulative = (1 + strategy_excess_returns).prod()
    strategy_mean_excess_return = (strategy_excess_cumulative) ** (1 / len(solution)) - 1
    strategy_std = solution['strategy_returns'].std()

    trading_days_per_yr = 252
    if strategy_std == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')
    
    sharpe = strategy_mean_excess_return / strategy_std * np.sqrt(trading_days_per_yr)
    
    strategy_volatility = float(strategy_std * np.sqrt(trading_days_per_yr) * 100)

    # Calculate market return and volatility
    market_excess_returns = solution['forward_returns'] - solution['risk_free_rate']
    market_excess_cumulative = (1 + market_excess_returns).prod()
    market_mean_excess_return = (market_excess_cumulative) ** (1 / len(solution)) - 1
    market_std = solution['forward_returns'].std()

    market_volatility = float(market_std * np.sqrt(trading_days_per_yr) * 100)

    if market_volatility == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')

    # Calculate the volatility penalty
    excess_vol = max(0, strategy_volatility / market_volatility - 1.2) if market_volatility > 0 else 0
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    return_gap = max(
        0,
        (market_mean_excess_return - strategy_mean_excess_return) * 100 * trading_days_per_yr,
    )
    return_penalty = 1 + (return_gap**2) / 100
    
    # Adjust the Sharpe ratio by the volatility and return penalty
    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)
    return min(float(adjusted_sharpe), 1_000_000)

In [10]:
class SequenceDataset(Dataset):
    def __init__(self, data_ten, forward_rets_ten, risk_free_rate_ten, seq_len, stride = 1):
        self.data_ten = data_ten
        self.forward_rets_ten = forward_rets_ten
        self.risk_free_rate_ten = risk_free_rate_ten
        self.seq_len = seq_len
        self.stride = stride
        self.num_samples = (data_ten.shape[0] - seq_len)//self.stride + 1
        
    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        start = idx * self.stride
        x = self.data_ten[start : start + self.seq_len]  # shape (SEQ_LEN, INPUT_DIM)
        fr = self.forward_rets_ten[start:start + self.seq_len]
        rf = self.risk_free_rate_ten[start:start + self.seq_len]
        return x, fr, rf, start

In [11]:
class SimpleLSTM(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim):
        
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        
        # x shape: (batch, seq_len, input_dim)
        out, (h_n, c_n) = self.lstm(x)  # out: (batch, seq_len, hidden_dim)
        out = out[:, -1, :]              # take last timestep
        out = self.fc(out)               # shape: (batch, output_dim)
        out = torch.sigmoid(out) * 2     # enforce [0, 2]
        return out

In [12]:
lt_model = None

def train_model(lt_train_ten, forward_rets_ten, risk_free_rate_ten, lt_val_ten, forward_rets_val_ten, risk_free_rate_val_ten, lt_seq_len, lt_input_dim, lt_hidden_dim, output_dim, batch_size, shuffle, num_epochs = None, patience = 2, verbose = False):

    global lt_model

    start_time = time.time()

    stride = 1

    #Datset to create training sequences based on training data.
    lt_dataset = SequenceDataset(lt_train_ten, forward_rets_ten, risk_free_rate_ten, lt_seq_len, stride = stride)
    lt_dataloader = DataLoader(lt_dataset, batch_size=batch_size, shuffle=shuffle)
    
    lt_model = SimpleLSTM(input_dim=lt_input_dim, hidden_dim=lt_hidden_dim, output_dim=output_dim)
    
    total_count = 0
    for name, param in lt_model.named_parameters():
            
        count = param.numel()
        total_count += count
    
    #Datset to create validation sequences based on valiaation data.
    if lt_val_ten is not None:
        
        if lt_seq_len > 1:
            lt_val_ten = torch.cat([lt_train_ten[-(lt_seq_len-1):, :], lt_val_ten], dim = 0) #<-- CHANGE
            forward_rets_val_ten = torch.cat([forward_rets_ten[-(lt_seq_len-1):], forward_rets_val_ten], dim = 0) #<-- CHANGE
            risk_free_rate_val_ten = torch.cat([risk_free_rate_ten[-(lt_seq_len-1):], risk_free_rate_val_ten], dim = 0) #<-- CHANGE
        
        lt_val_dataset = SequenceDataset(lt_val_ten, forward_rets_val_ten, risk_free_rate_val_ten, lt_seq_len, stride = stride)
        lt_val_dataloader = DataLoader(lt_val_dataset, batch_size=batch_size, shuffle=False)
        val_batch, val_batch_fr_ten, val_batch_rf_ten, val_idx_ten = next(iter(lt_val_dataloader))

    optimizer = torch.optim.Adam(list(lt_model.parameters()), lr=0.003) 
    
    total_loss = 0
    total_adj_sharpe = 0
    total_val_adj_sharpe = 0
    
    NUM_EPOCHS = 50

    early_stop_count = 0
    best_val_adj_sharpe = -np.inf
    adj_sharpe_best_epoch = -np.inf
    best_epoch = 0
    
    for epoch in range(NUM_EPOCHS):

        if num_epochs is not None:
            if epoch >= num_epochs:
                break
        
        total_abs = 0.0
        epoch_loss = 0.0
        epoch_adj_sharpe = 0.0
    
        all_preds = []

        batch_count = 0
        for batch, batch_fr_ten, batch_rf_ten, idx_ten in lt_dataloader:
            
            optimizer.zero_grad()

            #Signal predictions using the long-term model.
            lt_preds_ten = lt_model(batch)
            final_preds_ten = torch.sigmoid(lt_preds_ten) * 2
            
            #Compute loss as negative smoothed_score (maximize Sharpe)
            batch_loss = -smoothed_score(
                forward_returns_ten=batch_fr_ten,
                risk_free_rate_ten=batch_rf_ten,
                signal_pred_ten=final_preds_ten
            )
            
            submission = pd.DataFrame()
            submission['prediction'] = final_preds_ten.detach().numpy().flatten()
            
            solution = pd.DataFrame()
            solution['forward_returns'] = batch_fr_ten.detach().numpy().flatten()
            solution['risk_free_rate'] = batch_rf_ten.detach().numpy().flatten()
            
            batch_adj_sharpe = round(score(solution, submission, None), 3)

            epoch_loss += batch_loss.item()
            epoch_adj_sharpe += batch_adj_sharpe
            
            batch_loss.backward()
            batch_count += 1

            optimizer.step()
        
        mean_loss_for_epoch = round(epoch_loss/batch_count, 3)
        mean_adj_sharpe_for_epoch = round(epoch_adj_sharpe/batch_count, 3)

        if lt_val_ten is not None:

            #Signal predictions on the validation data using the long-term model.
            lt_val_preds_ten = lt_model(val_batch)
            
            final_val_preds_ten = torch.sigmoid(lt_val_preds_ten) * 2
            
            val_submission = pd.DataFrame()
            val_submission['prediction'] = final_val_preds_ten.detach().numpy().flatten()
            val_solution = pd.DataFrame()
            val_solution['forward_returns'] = val_batch_fr_ten.detach().numpy().flatten()
            val_solution['risk_free_rate'] = val_batch_rf_ten.detach().numpy().flatten()
            
            val_adj_sharpe = round(score(val_solution, val_submission, None), 3)
    
            #Update the best validation score so far.
            if best_val_adj_sharpe < val_adj_sharpe:
                best_val_adj_sharpe = val_adj_sharpe
                adj_sharpe_best_epoch = mean_adj_sharpe_for_epoch
                early_stop_count = 0
                best_epoch = epoch
    
            else:
                if early_stop_count > patience:
                    break
    
                else:
                    early_stop_count += 1
            
            if verbose == True:
                print(f"Epoch {epoch}: Mean Training Loss = {mean_loss_for_epoch}, Training mean adj sharpe ratio = {mean_adj_sharpe_for_epoch}, Validation mean adj sharpe ratio = {val_adj_sharpe}")

    time_taken = round((time.time() - start_time)/60, 3)
    if verbose == True:
        print(f"Best epoch: {best_epoch}")
        print(f"Best mean adjusted sharpe ratio (Val): {best_val_adj_sharpe}")
        print(f"Time taken = {time_taken} minutes")

    return lt_model, total_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, best_epoch

## Scale the features

In [13]:
def scale(col_ser):
    
    global col_number
    min_val = col_ser.min()
    max_val = col_ser.max()
    
    return (col_ser - min_val)/(max_val - min_val)

In [14]:
comp_ds_scaled_df = comp_dataset_df.apply(scale, axis = 0)
comp_ds_scaled_df['date_id'] = comp_dataset_df['date_id']

In [15]:
comp_ds_scaled_df.head(3)

Unnamed: 0,date_id,D1,D2,D3,D4,D5,D6,D7,D8,D9,...,V3,V4,V5,V6,V7,V8,V9,forward_returns,risk_free_rate,market_forward_excess_returns
0,0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,...,,,,,,,,0.464247,0.949355,0.462739
1,1,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,...,,,,,,,,0.38872,0.955531,0.387855
2,2,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,...,,,,,,,,0.374671,0.950587,0.373946


# Run TSCV to determine the best hyperparameters

In [16]:
def run_tscv(train_size, val_size, seq_len, step_size, input_dim, hidden_dim, comp_dataset_df, batch_size, shuffle, max_folds = None, verbose = False):

    root = './kaggle/input/hull-tactical-market-prediction'
    if os.path.exists(root):
        for item in os.listdir(root):
            item_path = os.path.join(root, item)
            if os.path.isdir(item_path):   # delete only folders
                shutil.rmtree(item_path)    
    
    #Initialising the validation indices.
    val_start_index = comp_dataset_df.shape[0] - val_size
    val_end_index = comp_dataset_df.shape[0] - 1

    #Initialise the train_set1 indices
    train_set_end_index = val_start_index - 1
    train_set_start_index = train_set_end_index - train_size + 1

    fold_no = 0
    
    mean_adj_sharpe_across_fold = 0
    mean_val_adj_sharpe_across_fold = 0
    mean_time_taken_across_fold = 0
    mean_num_epochs = 0
    
    #Create as many folds as possible.
    while train_set_start_index > 0:

        if max_folds is not None:
            if fold_no >= max_folds:
                break

        if verbose == True:
            print(f'Training test fold: {fold_no}')
        
        comp_dataset_df['lagged_forward_returns'] = comp_dataset_df['forward_returns'].shift(1)
        comp_dataset_df['lagged_risk_free_rate'] = comp_dataset_df['risk_free_rate'].shift(1)
        comp_dataset_df['lagged_market_forward_excess_returns'] = comp_dataset_df['market_forward_excess_returns'].shift(1)
    
        train_set_df = comp_dataset_df.iloc[train_set_start_index:train_set_end_index+1, :]
        val_set_df = comp_dataset_df.iloc[val_start_index:val_end_index+1, :]

        train_set_df = train_set_df.drop(['lagged_forward_returns', 'lagged_risk_free_rate', 'lagged_market_forward_excess_returns'], axis = 1)

        val_set_subset_df = val_set_df[['forward_returns', 'risk_free_rate', 'market_forward_excess_returns']].copy()
        val_set_df = val_set_df.drop(['forward_returns', 'risk_free_rate', 'market_forward_excess_returns'], axis = 1)
    
        pipeline = Pipeline([('cleaner', CleanerTransformer()),
                             ('latent', LatentTransformer(input_dim))])
        
        with warnings.catch_warnings():

            warnings.filterwarnings(
                "ignore",
                category=FutureWarning,
                module="sklearn.pipeline"
            )
            pipeline = pipeline.fit(train_set_df.values)
            lt_train_latent_ten = torch.tensor(pipeline.transform(train_set_df.values), dtype=torch.float32)
            lt_val_latent_ten = torch.tensor(pipeline.transform(val_set_df.values), dtype=torch.float32)

            #Replace infinite values.
            if torch.isinf(lt_val_latent_ten).any():
                finite_vals = lt_val_latent_ten[torch.isfinite(lt_val_latent_ten)]
                max_val = finite_vals.max()
                min_val = finite_vals.min()
                lt_val_latent_ten[lt_val_latent_ten == float('inf')] = max_val
                lt_val_latent_ten[lt_val_latent_ten == float('-inf')] = min_val
        
        #We will then train the model here.
        forward_returns_ten = torch.tensor(train_set_df['forward_returns'].values, dtype=torch.float32)
        risk_free_rate_ten = torch.tensor(train_set_df['risk_free_rate'].values, dtype=torch.float32)
        forward_rets_val_ten = torch.tensor(val_set_subset_df['forward_returns'].values, dtype=torch.float32)
        risk_free_rate_val_ten = torch.tensor(val_set_subset_df['risk_free_rate'].values, dtype=torch.float32)

        lt_model, param_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, num_epochs = train_model(lt_train_latent_ten, forward_returns_ten, risk_free_rate_ten, lt_val_latent_ten, forward_rets_val_ten, risk_free_rate_val_ten, seq_len, input_dim, hidden_dim, 1, batch_size, shuffle, num_epochs = 20, patience = 2, verbose = verbose)
        
        mean_adj_sharpe_across_fold += adj_sharpe_best_epoch
        mean_val_adj_sharpe_across_fold += best_val_adj_sharpe
        mean_time_taken_across_fold += time_taken
        mean_num_epochs += num_epochs
        
        train_set_start_index = train_set_start_index - step_size
        train_set_end_index = train_set_end_index - step_size
        
        val_start_index = val_start_index - step_size
        val_end_index = val_end_index - step_size
            
        
        fold_no += 1

        if verbose == True:
            print("---------------------------------------------------------------")
    
    mean_adj_sharpe_across_fold = round(mean_adj_sharpe_across_fold/fold_no, 3)
    mean_val_adj_sharpe_across_fold = round(mean_val_adj_sharpe_across_fold/fold_no, 3)
    mean_time_taken_across_fold = round(mean_time_taken_across_fold/fold_no, 3)
    mean_num_epochs = int(round(mean_num_epochs/fold_no, 0))

    print(f"Number of folds = {fold_no}")
    print(f'Mean adjusted sharpe ratio across folds = {mean_adj_sharpe_across_fold}')
    print(f'Mean adjusted sharpe ratio (Validation) across folds = {mean_val_adj_sharpe_across_fold}')
    print(f'Mean time taken across folds = {mean_time_taken_across_fold}')
    print(f'Mean number of epochs across folds = {mean_num_epochs}')
    
    return param_count, mean_adj_sharpe_across_fold, mean_val_adj_sharpe_across_fold, mean_time_taken_across_fold, mean_num_epochs

In [17]:
def do_grid_search(train_size_list, val_size_list, seq_len_list, input_dim_list, hidden_dim_list, step_size = None, max_folds = None):

    best_mean_adj_sharpe_across_fold = None
    best_mean_val_adj_sharpe_across_fold = None
    best_train_size = None
    best_seq_len = None
    best_input_dim = None
    best_hidden_dim = None
    best_param_count = None
    best_val_size = None
    best_mean_time = None
    best_num_epochs = None
    
    for val_size in val_size_list:
    
        if step_size is None:
            step_size = val_size
        
        for train_size in train_size_list:
        
            for hidden_dim in hidden_dim_list:
                
                for input_dim in input_dim_list:
                    
                    for seq_len in seq_len_list:

                        batch_size = val_size
                
                        param_count, mean_adj_sharpe_across_fold, mean_val_adj_sharpe_across_fold, mean_time_taken_across_fold, num_epochs = run_tscv(train_size, val_size, seq_len, step_size, input_dim, hidden_dim, comp_dataset_df, max_folds = max_folds, batch_size=batch_size, shuffle = False, verbose = False)
                        
                        print(f'Validation size = {val_size}, Train size = {train_size}, Hidden dim = {hidden_dim}, Input dimension: {input_dim}, Parameter count = {param_count}, Seq length = {seq_len}\n')
                        if ((best_mean_adj_sharpe_across_fold is None) or (best_mean_val_adj_sharpe_across_fold is None)):
                            best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                            best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                            best_train_size = train_size
                            best_seq_len = seq_len
                            best_input_dim = input_dim
                            best_hidden_dim = hidden_dim
                            best_param_count = param_count
                            best_val_size = val_size
                            best_mean_time = mean_time_taken_across_fold
                            best_num_epochs = num_epochs
        
                        elif ((best_mean_val_adj_sharpe_across_fold == mean_val_adj_sharpe_across_fold) & (best_param_count > param_count)):
                            best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                            best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                            best_train_size = train_size
                            best_seq_len = seq_len
                            best_input_dim = input_dim
                            best_hidden_dim = hidden_dim
                            best_param_count = param_count
                            best_val_size = val_size
                            best_mean_time = mean_time_taken_across_fold
                            best_num_epochs = num_epochs
                            
                        elif best_mean_val_adj_sharpe_across_fold < mean_val_adj_sharpe_across_fold:
                            best_mean_adj_sharpe_across_fold = mean_adj_sharpe_across_fold
                            best_mean_val_adj_sharpe_across_fold = mean_val_adj_sharpe_across_fold
                            best_train_size = train_size
                            best_seq_len = seq_len
                            best_input_dim = input_dim
                            best_hidden_dim = hidden_dim
                            best_param_count = param_count
                            best_val_size = val_size
                            best_mean_time = mean_time_taken_across_fold
                            best_num_epochs = num_epochs
            
                        else:
                            pass
        print("--------------------------------------------------------------------------------")   

    print(f'\nBest validation size = {best_val_size}, Best train size = {best_train_size}, Best hidden dimension = {best_hidden_dim}, Best input dimension = {best_input_dim}, Best sequence length = {best_seq_len}, Best parameter count = {best_param_count}, Training adjusted sharpe = {best_mean_adj_sharpe_across_fold}, Validation adjusted sharpe = {best_mean_val_adj_sharpe_across_fold}, Best time = {best_mean_time}, Best number of epochs = {best_num_epochs}')

    return best_val_size, best_train_size, best_hidden_dim, best_input_dim, best_seq_len, best_param_count, best_mean_adj_sharpe_across_fold, best_mean_val_adj_sharpe_across_fold, best_mean_time, best_num_epochs

## Run TSCV for Short-Term Model

In [18]:
S_TRAIN_SIZE_LIST = [int(252/8), int(252/4), int(252/3), int(252/2)]
S_VAL_SIZE_LIST = [16]

S_STEP_SIZE = None
#Model hyper-parameters.
S_SEQ_LEN_LIST = [1]
S_INPUT_DIM_LIST = [1, 2, 3, 4]
S_HIDDEN_DIM_LIST = [12, 13, 14, 15, 16]

S_MAX_FOLDS = 100
print(f'Maximum training/test folds = {S_MAX_FOLDS}')

s_best_val_size, s_best_train_size, s_best_hidden_dim, s_best_input_dim, s_best_seq_len, s_best_param_count, s_best_mean_adj_sharpe_across_fold, s_best_mean_val_adj_sharpe_across_fold, s_best_mean_time, s_best_num_epochs = do_grid_search(S_TRAIN_SIZE_LIST, S_VAL_SIZE_LIST, S_SEQ_LEN_LIST, S_INPUT_DIM_LIST, S_HIDDEN_DIM_LIST, S_STEP_SIZE, S_MAX_FOLDS)

Maximum training/test folds = 100
Number of folds = 100
Mean adjusted sharpe ratio across folds = 2.019
Mean adjusted sharpe ratio (Validation) across folds = 2.047
Mean time taken across folds = 0.004
Mean number of epochs across folds = 10
Validation size = 16, Train size = 31, Hidden dim = 12, Input dimension: 1, Parameter count = 733, Seq length = 1

Number of folds = 100
Mean adjusted sharpe ratio across folds = 2.017
Mean adjusted sharpe ratio (Validation) across folds = 2.067
Mean time taken across folds = 0.004
Mean number of epochs across folds = 10
Validation size = 16, Train size = 31, Hidden dim = 12, Input dimension: 2, Parameter count = 781, Seq length = 1

Number of folds = 100
Mean adjusted sharpe ratio across folds = 2.04
Mean adjusted sharpe ratio (Validation) across folds = 2.033
Mean time taken across folds = 0.004
Mean number of epochs across folds = 10
Validation size = 16, Train size = 31, Hidden dim = 12, Input dimension: 3, Parameter count = 829, Seq length = 1

In [19]:
#Best validation size = 16, Best train size = 126, Best hidden dimension = 12, Best input dimension = 3, Best sequence length = 1, Best parameter count = 829, Training adjusted sharpe = 2.216, Validation adjusted sharpe = 2.26, Best time = 0.017, Best number of epochs = 12
#Best validation size = 16, Best train size = 126, Best hidden dimension = 16, Best input dimension = 4, Best sequence length = 1, Best parameter count = 1425, Training adjusted sharpe = 2.223, Validation adjusted sharpe = 2.335, Best time = 0.014, Best number of epochs = 12

## Run TSCV for Long-Term Model

In [20]:
#Best validation size = 180, Best train size = 1008, Best hidden dimension = 24, Best input dimension = 5, Best sequence length = 1, Best parameter count = 3001, Training adjusted sharpe = 0.76, Validation adjusted sharpe = 0.817, Best time = 0.016, Best number of epochs = 13

In [21]:
L_TRAIN_SIZE_LIST = [252*3, 252*4, 252*5]
L_VAL_SIZE_LIST = [180]
L_STEP_SIZE = 16

#Model hyper-parameters.
L_SEQ_LEN_LIST = [1]
L_INPUT_DIM_LIST = [5, 6, 7]
L_HIDDEN_DIM_LIST = [20, 22, 24, 26]

L_MAX_FOLDS = 100
print(f'Maximum training/test folds = {L_MAX_FOLDS}')

l_best_val_size, l_best_train_size, l_best_hidden_dim, l_best_input_dim, l_best_seq_len, l_best_param_count, l_best_mean_adj_sharpe_across_fold, l_best_mean_val_adj_sharpe_across_fold, l_best_mean_time, l_best_num_epochs = do_grid_search(L_TRAIN_SIZE_LIST, L_VAL_SIZE_LIST, L_SEQ_LEN_LIST, L_INPUT_DIM_LIST, L_HIDDEN_DIM_LIST, L_STEP_SIZE, L_MAX_FOLDS)

Maximum training/test folds = 100
Number of folds = 100
Mean adjusted sharpe ratio across folds = 0.944
Mean adjusted sharpe ratio (Validation) across folds = 0.84
Mean time taken across folds = 0.018
Mean number of epochs across folds = 14
Validation size = 180, Train size = 756, Hidden dim = 20, Input dimension: 5, Parameter count = 2181, Seq length = 1

Number of folds = 100
Mean adjusted sharpe ratio across folds = 0.946
Mean adjusted sharpe ratio (Validation) across folds = 0.854
Mean time taken across folds = 0.02
Mean number of epochs across folds = 14
Validation size = 180, Train size = 756, Hidden dim = 20, Input dimension: 6, Parameter count = 2261, Seq length = 1

Number of folds = 100
Mean adjusted sharpe ratio across folds = 0.947
Mean adjusted sharpe ratio (Validation) across folds = 0.84
Mean time taken across folds = 0.02
Mean number of epochs across folds = 15
Validation size = 180, Train size = 756, Hidden dim = 20, Input dimension: 7, Parameter count = 2341, Seq leng

In [22]:
#Best validation size = 16, Best train size = 1512, Best hidden dimension = 6, Best input dimension = 3, Best sequence length = 1, Best parameter count = 271, Training adjusted sharpe = 2.28, Validation adjusted sharpe = 2.465, Best time = 0.085, Best number of epochs = 4

#Best validation size = 180, Best train size = 1260, Best hidden dimension = 20, Best input dimension = 4, Best sequence length = 1, Best parameter count = 2101, Training adjusted sharpe = 0.755, Validation adjusted sharpe = 0.812, Best time = 0.019, Best number of epochs = 12
#Best validation size = 180, Best train size = 1008, Best hidden dimension = 22, Best input dimension = 6, Best sequence length = 1, Best parameter count = 2663, Training adjusted sharpe = 0.755, Validation adjusted sharpe = 0.818, Best time = 0.018, Best number of epochs = 13
#Best validation size = 180, Best train size = 1008, Best hidden dimension = 24, Best input dimension = 5, Best sequence length = 1, Best parameter count = 3001, Training adjusted sharpe = 0.76, Validation adjusted sharpe = 0.817, Best time = 0.016, Best number of epochs = 13
#Best validation size = 180, Best train size = 1260, Best hidden dimension = 24, Best input dimension = 7, Best sequence length = 1, Best parameter count = 3193, Training adjusted sharpe = 0.749, Validation adjusted sharpe = 0.818, Best time = 0.018, Best number of epochs = 11

# Attempting the best model on a Kaggle simulated environment

In [23]:
train_df = pd.read_csv('./kaggle/input/hull-tactical-market-prediction/train.csv', skip_blank_lines = True)
lt_model = None
pipeline = None
retrain_df = Nonen
best_val_size = 1

FileNotFoundError: [Errno 2] No such file or directory: './kaggle/input/hull-tactical-market-prediction/train.csv'

In [None]:
time_list = []
pred_list = []

In [None]:
count = 0
def predict(test: pl.DataFrame) -> float: #pl.DataFrame:
    
    global count, train_df, lt_model, retrain_df, pipeline
    print(test)

    start_time = time.time()
    
    test_df = test.to_pandas()#.iloc[:, 0: -4] #To exclude some blank columns that are being read.
    
    #Copying 'forward_returns', 'risk_free_rate', and 'market_forward_excess_returns' from the current test row to the latests training row.
    train_df.iloc[-1, [-3, -2, -1]] = test_df.iloc[0, [-3, -2, -1]]

    #Retrain the model.
    if count%best_val_size == 0:

        #Prepare the training data.
        retrain_df = train_df.iloc[-best_train_size:]
        pipeline = Pipeline([('cleaner', CleanerTransformer()),
                             ('latent', LatentTransformer(best_input_dim))])
        with warnings.catch_warnings():
            warnings.filterwarnings(
                            "ignore",
                            category=FutureWarning,
                            module="sklearn.pipeline"
                        )
            pipeline = pipeline.fit(retrain_df.iloc[:, :-3].values)
            lt_train_latent_ten = torch.tensor(pipeline.transform(retrain_df.iloc[:, :-3].values), dtype=torch.float32)
        
        forward_returns_ten = torch.tensor(retrain_df['forward_returns'].values, dtype=torch.float32)
        risk_free_rate_ten = torch.tensor(retrain_df['risk_free_rate'].values, dtype=torch.float32)

        #We will then train the model here.
        lt_model, param_count, adj_sharpe_best_epoch, best_val_adj_sharpe, time_taken, _ = train_model(lt_train_latent_ten, forward_returns_ten, risk_free_rate_ten, None, None, None, best_seq_len, best_input_dim, best_hidden_dim, 1, shuffle = False, num_epochs = best_num_epochs, patience = 2, verbose = False)

    #Make the prediction
    lt_val_latent_ten = torch.tensor(pipeline.transform(test_df.iloc[:, :-4].values), dtype=torch.float32)
    
    lt_pred = lt_model(lt_val_latent_ten.unsqueeze(0))
        
    #Append the current test row with blank 'forward_returns', 'risk_free_rate', and 'market_forward_excess_returns' to the training dataset.
    cols = train_df.columns[:train_df.shape[1] - 3]
    new_train_row_df = test_df.loc[:, cols]
    train_df = pd.concat([train_df, new_train_row_df], axis = 0, ignore_index = True)

    pred_time = round((time.time() - start_time)/60, 3)
    print(f'Count = {count}, Predictions = {lt_pred.item()}, Prediction time = {pred_time}')
    time_list.append(pred_time)

    """Replace this function with your inference code.
    You can return either a Pandas or Polars dataframe, though Polars is recommended for performance.
    Each batch of predictions (except the very first) must be returned within 5 minutes of the batch features being provided.
    """
    pred_df = pl.DataFrame({
        "row_id": test["date_id"],      # must match exactly what's in test data
        "target": pl.Series([lt_pred.item()] * test.height)  # dummy prediction
    })

    pred_list.append(lt_pred.item())
    
    count += 1
    
    return pred_df

In [None]:
inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('./kaggle/input/hull-tactical-market-prediction/',))

In [None]:
print(f'Mean prediction time = {np.mean(time_list)}')
print(f'Minimum prediction time = {np.min(time_list)}')
print(f'Maximum prediction time = {np.max(time_list)}')
print("")
print(f'Mean prediction = {np.mean(pred_list)}')
print(f'Minimum prediction = {np.min(pred_list)}')
print(f'Maximum prediction = {np.max(pred_list)}')