## Enrivonment Selection

In [None]:
## TODO: select the environment installed from the environment.yml file

## Load Data

In [None]:
### Load Futures Series

import os

## TODO: add path to the folder containing the src folder
os.chdir(r'')

import numpy as np; import pandas as pd; import torch
from datetime import datetime; from sklearn.preprocessing import LabelEncoder
from src.models import Transformer
from src.features import DeepMomentumFeatures, MACDFeatures, DatetimeFeatures, DefaultFeatureCreator

## Series
series = pd.read_csv('prices_back-ratio-adj.csv', index_col=0, parse_dates=True)

## TODO: if you want to run the TMOM:
# 1. Uncomment the following lines up to the next "TODO".
# 2. For one asset at a time set the label (label='ISM' or 'CZN') and run all the cells.

# label = 'CZN' # labels: ISM, CZN
# all_used_assets = [label+'_back_adj']
# series = series.rename(columns=dict(zip(series.columns, series.columns+'_close')))

# TODO: if you want to run the DMN, CS-DMN, or STMOM:
# 1. Uncomment the following lines.
# 2. Run all the cells.

# label = []
# all_used_assets = series.columns
# series = series.rename(columns=dict(zip(series.columns, series.columns+'_close')))

## Prepare Features

In [5]:
USE_DATETIME_FEATURES = True

used_features = [DeepMomentumFeatures, MACDFeatures] # one can implement and add other features like VWAP, RSI etc
features_configs = [{}, {}]

cols_not_to_check = ['close','srs']

if USE_DATETIME_FEATURES:
    used_features.append(DatetimeFeatures)
    features_configs.append({})
    cols_not_to_check += ['date']

fc = DefaultFeatureCreator(series, all_used_assets, used_features, features_configs)

features = fc.create_features()

shared_cols = None

# Check if something is wrong with the data
BAD_VALUES_THRESHOLD = 10000

for key in features.keys(): # for every asset
    # features[key] = features[key].iloc[:-2,:] # TODO: FIX LATER
    assert features[key].drop(cols_not_to_check, axis=1) \
    .max().max() < BAD_VALUES_THRESHOLD # no values exceeding 10k
    assert features[key].drop(cols_not_to_check, axis=1) \
    .min().min() > -BAD_VALUES_THRESHOLD # no values below -10k

In [None]:
cols_to_use = [
    'norm_daily_return','norm_monthly_return','norm_quarterly_return',
    'norm_biannual_return','norm_annual_return',
    'macd_8_24','macd_16_48','macd_32_96'
    ]

if USE_DATETIME_FEATURES:
    # use: capture seasonalities in the series
    datetime_cols = [
        'year',
        'month_of_year',
         'day_of_week'
        ]
    enable_categorical=True

    # Encode (already existing) categorical features
    for col in datetime_cols:
        for key in features.keys():
            features[key][col] = LabelEncoder().fit_transform(features[key][col]) # fit label encoder and return encoded labels
else:
    datetime_cols = []
    enable_categorical=False

# Configure experiment

# Training is done via expanding window approach:
# train, val, test = min 5 years for training & validation + next 5 years for testing
# validation data constitutes the last 10% of training data

test_delta = pd.Timedelta('1825days') # 5 years of testing
date_range = pd.to_datetime([ datetime(year, 1, 1) for year in range(2010, 2025, 5) ]) # original: (2005, 2025, 5)

date_range

## Configure Experiment

In [None]:
MODEL_MAPPING   = {'trf': Transformer}
hyperparams     = {}
fixed_params    = {}
params_c        = {}

## Settings
model_type  = 'trf' # model type from MODEL_MAPPING
target_vol  = 0.15 # measure for turnover evaluation
device      = 'cuda' if torch.cuda.is_available() else 'cpu'

settings = {
    'plot': False,
    'print_': False,
    'save_model': False,
}

## TODO: if want to regularize w.r.t transaction costs:
# 1. set 'apply_turnover_reg' to True
# 2. set 'transaction_cost' to transaction cost, in bsp, that the model needs to regularize for 
params_c['apply_turnover_reg']  = False
params_c['transaction_cost']    = 0.0
params_c['basis_points']        = [0.0,1.0,5.0,10.0] # ignore

## Hyperparameters grid
hyperparams['batch_size']   = [512,1024] # all good
hyperparams['lr']           = [1e-4,1e-3,1e-2] # add 1e-1 -> bad performance
hyperparams['dropout']      = [0.1,0.3,0.5] # all good
hyperparams['max_norm']     = [1e-2,1e-1] # all good
hyperparams['n_heads']      = [2,4] # all good
hyperparams['n_layers']     = [1,2,3] # all good
hyperparams['d_model']      = [16,32,64,128] # remove 8, add 128
hyperparams['multiplier']   = [2,4,8,16] # remove 1, add 16
hyperparams['l1_reg_weight'] = [0.0]
hyperparams['l2_reg_weight'] = [0.0]

## Fixed parameters grid
fixed_params['n_epochs']        = 100
fixed_params['patience']        = 10
fixed_params['n_seeds']         = 5
fixed_params['apply_l1_reg']    = False
fixed_params['l2_reg_weight']   = 0.0
fixed_params['scaling']         = None # None, 'standard', 'minmax'
fixed_params['timesteps']       = 63

## Sample Random Search Hyperparameters

In [None]:
grid                        = hyperparams
n_seeds                     = 5
n_samples                   = 50
sampled_models              = []
sampled_models_with_seed    = []
hyperparams_grid            = []

np.random.seed(42)

## Sample unique models
while len(sampled_models) < n_samples:

    sampled_model = []

    for k, v in grid.items():
        val = np.random.choice(v)
        if hasattr(val, "item"):
            val = val.item()
        sampled_model.append(val)
    
    if sampled_model not in sampled_models:
        sampled_models.append(sampled_model)

## Add seed to sampled unique models
for sampled_model in sampled_models:
    for seed in range(n_seeds):
        sampled_model_with_seed = sampled_model.copy()
        sampled_model_with_seed.append(seed)
        sampled_models_with_seed.append(sampled_model_with_seed)

models = pd.DataFrame(data=sampled_models_with_seed, columns=list(grid.keys())+['seed'])

assert models.shape == models.drop_duplicates().shape

## Turn pd.DataFrame into dictionary
for idx, model in models.iterrows():

    hyperparams_iter = {}

    for key in model.index:
        hyperparams_iter[key] = float(model[key]) if model[key] % 1 != 0 else int(model[key])
 
    hyperparams_grid.append(hyperparams_iter)

## Run Experiments

In [None]:
### DMN

## TODO: add path to the folder containing the src folder
os.chdir(r'')

from joblib import Parallel, delayed
from src.utils import train_model_trf
import time

def run_trial_transf(hyperparams):
    try:
        torch.set_num_threads(1)
        return train_model_trf(
            hyperparams=hyperparams, date_range=date_range, model_type=model_type, 
            params_c=params_c, fixed_params=fixed_params, features=features, 
            cols_to_use=cols_to_use, shared_cols=shared_cols, datetime_cols=datetime_cols, 
            test_delta=test_delta, device=device, target_vol=target_vol, settings=settings
    )
    except Exception as e:
        return {"error": str(e), "hyperparams": hyperparams}

# Parameters to automatically restart parallel run on failure
max_retries = 5  # maximum number of retries
retry_count = 0

## Set folder name
folder_parts = [model_type]

## TODO: name the folder where the results will be stored after the model's name
folder_parts.append('DMN_CS')

if params_c.get('apply_turnover_reg'):
    folder_parts.append(str(params_c['transaction_cost']))

folder_name = os.path.join('_'.join(folder_parts))

if not os.path.exists(folder_name):
    os.mkdir(folder_name)
os.chdir(folder_name)

## Run experiments
while retry_count < max_retries:
    try:
        print(f"Starting Parallel run (attempt {retry_count + 1})...")
        results = Parallel(n_jobs=8)(
            delayed(run_trial_transf)(hyperparams) for hyperparams in hyperparams_grid
        )
        print("Parallel run completed successfully.")
        break  # Exit the loop if successful
    except Exception as e:
        retry_count += 1
        print(f"Parallel run failed with error: {e}")
        if retry_count < max_retries:
            print("Retrying...")
            time.sleep(0.5)  # Optional: Wait before retrying
        else:
            print("Max retries reached. Exiting.")