In [3]:
import os 
import pandas as pd 
import numpy as np 
from functools import reduce 
from backtest import run_all_windows


HP_MINIBATCH_SIZE = [64, 128, 256]
TICKERS = ['ADM', 'ALB', 'ALCO', 'AMZN', 'BA', 'BAC', 'BG', 'BKNG', 'C', 'CAH', 'CMCSA', 'DHI', 'DIS', 'DUK', 'FCX', 'FDP',
           'GOOGL', 'GPC', 'GS', 'GWW', 'HSIC', 'INTC', 'JPM', 'KO', 'LEN', 'LMNR', 'MCK', 'META', 'NEE', 'NEM', 'NVR', 
           'PEP', 'PG', 'PHM', 'RGLD', 'SCCO', 'T', 'TOL', 'TWX', 'VZ', 'WFC', 'WMT']
MODLE_PARAMS = {
    "architecture": "TFT",
    "total_time_steps": 252,
    "early_stopping_patience": 25,
    "multiprocessing_workers": 32,
    "num_epochs": 300,
    "early_stopping_patience": 25,
    "fill_blank_dates": False,
    "split_tickers_individually": True,
    "random_search_iterations": 50 ,
    "evaluate_diversified_val_sharpe": True,
    "train_valid_ratio": 0.90,
    "time_features": False,
    "force_output_sharpe_length": 0,
}


TEST_MODE = True
ASSET_CLASS_MAPPING = dict(zip(TICKERS, ["STOCK"] * len(TICKERS)))
TRAIN_VALID_RATIO = 0.9
TIME_FEATURES = False
FORCE_OUTPUT_SHARPE_LENGTH = None
EVALUATE_DIVERSIFIED_VAL_SHARPE = True
NAME = "stock"

TypeError: Descriptors cannot be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

In [2]:
folder_path = "Data/Finished_Datasets/"
combined_data = []

for file_name in os.listdir(folder_path):
    if file_name.endswith(".csv"):
        file_path = os.path.join(folder_path, file_name)
        data = pd.read_csv(file_path)

        combined_data.append(data)


combined_df = pd.concat(combined_data, ignore_index=True)

combined_df = combined_df.drop(columns=["close", "srs"])


combined_df.to_csv("Data/full_data.csv", index=False)

In [2]:
def run_test(experiment: str, train_start: int, test_start: int, test_end: int, test_window_size: int, num_repeats: int):

    if experiment == "LSTM":
        architecture = "LSTM"
        lstm_time_steps = 63
        changepoint_lbws = [21]
    elif experiment == "TFT":
        architecture = "TFT"
        lstm_time_steps = 252
        changepoint_lbws = [21]
    elif experiment == "TFT-SHORT":
        architecture = "TFT"
        lstm_time_steps = 63
        changepoint_lbws = [21]
    else:
        raise BaseException("Invalid experiment.")
    
    versions = range(1, 1 + num_repeats) if not TEST_MODE else [1]

    experiment_prefix = (
        NAME
        + ("_TEST" if TEST_MODE else "")
        + ("" if TRAIN_VALID_RATIO == 0.90 else f"_split{int(TRAIN_VALID_RATIO * 100)}")
    )

    cp_string = (
        "none"
        if not changepoint_lbws
        else reduce(lambda x, y: str(x) + str(y), changepoint_lbws)
    )
    time_string = "time" if TIME_FEATURES else "notime"
    _project_name = f"{experiment_prefix}_{architecture.lower()}_cp{cp_string}_len{lstm_time_steps}_{time_string}_{'div' if EVALUATE_DIVERSIFIED_VAL_SHARPE else 'val'}"
    
    if FORCE_OUTPUT_SHARPE_LENGTH:
        _project_name += f"_outlen{FORCE_OUTPUT_SHARPE_LENGTH}"
    _project_name += "_v"

    for v in versions:
        PROJECT_NAME = _project_name + str(v)

        intervals = [
            (train_start, y, y + test_window_size)
            for y in range(test_start, test_end)
        ]


        params = MODLE_PARAMS.copy()
        params["total_time_steps"] = lstm_time_steps
        params["architecture"] = architecture
        params["evaluate_diversified_val_sharpe"] = EVALUATE_DIVERSIFIED_VAL_SHARPE
        params["train_valid_ratio"] = TRAIN_VALID_RATIO
        params["time_features"] = TIME_FEATURES
        params["force_output_sharpe_length"] = FORCE_OUTPUT_SHARPE_LENGTH


        if TEST_MODE:
            params["num_epochs"] = 1
            params["random_search_iterations"] = 2
        
        features_file_path = "Data/full_data.csv"

        run_all_windows(
            PROJECT_NAME,
            features_file_path,
            intervals,
            params,
            changepoint_lbws,
            ASSET_CLASS_MAPPING,
            [32, 64, 128] if lstm_time_steps == 252 else HP_MINIBATCH_SIZE,
            test_window_size,
        )



In [3]:
run_test("LSTM", 2017, 2019, 2023, 1, 5)

Deep Momentum Network Parameters:
architecture = LSTM
total_time_steps = 63
early_stopping_patience = 25
multiprocessing_workers = 32
num_epochs = 1
fill_blank_dates = False
split_tickers_individually = True
random_search_iterations = 2
evaluate_diversified_val_sharpe = True
train_valid_ratio = 0.9
time_features = False
force_output_sharpe_length = None
input_size = 10
output_size = 1
category_counts = []
static_input_loc = []
known_regular_inputs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
known_categorical_inputs = []

Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
hidden_layer_size |20                |?                 
dropout_rate      |0.4               |?                 
max_gradient_norm |1                 |?                 
learning_rate     |0.0001            |?                 

test
tmp/checkpoint
test
better
results\stock_TEST_lstm_cp21_len63_notime_div_v1\2019-2020\hp\stock_TEST_lstm_cp21_len63_notime_div_v1\trial_08e1f63a371a73910c6940ab2c4c3c8

NotFoundError: {{function_node __wrapped__SaveV2_dtypes_21_device_/job:localhost/replica:0/task:0/device:CPU:0}} Failed to create a NewWriteableFile: results\stock_TEST_lstm_cp21_len63_notime_div_v1\2019-2020\hp\stock_TEST_lstm_cp21_len63_notime_div_v1\trial_08e1f63a371a73910c6940ab2c4c3c88\checkpoints\epoch_0\checkpoint_temp/part-00000-of-00001.data-00000-of-00001.tempstate16746230146695259019 : The system cannot find the path specified.
; No such process [Op:SaveV2]

In [None]:
raw_data = pd.read_csv("Data/full_data.csv", index_col=0, parse_dates=True)
raw_data.rename(columns={'date.1': 'date'}, inplace=True)
raw_data["date"] = raw_data["date"].astype("datetime64[ns]")

In [7]:
df = raw_data.dropna()
df = df[df["year"] >= 2017].copy()
years = df["year"]

In [6]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))


Num GPUs Available:  1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
import os
os.environ['CUDA_HOME'] = r"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2" 
os.environ['PATH'] += r";C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/bin" 
os.environ['PATH'] += r";C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/extras/CUPTI/libx64" 
os.environ['PATH'] += r";C:/tools/cuda/bin"

In [5]:
import tensorflow as tf 
physical_devices = tf.config.list_physical_devices("GPU") 
if physical_devices: 
    try: 
        for gpu in physical_devices: 
            tf.config.experimental.set_memory_growth(gpu, True) 
            print("Memory growth set for GPUs.") 
    except RuntimeError as e: 
        print("Error setting memory growth:", e) 
else: print("No GPUs found.")

Memory growth set for GPUs.


In [7]:
import tensorflow as tf

print(tf.__version__)

2.10.0


In [7]:
test = "results\stocks_TEST_lstm_cp21_len63_notime_div_v1\2019-2020\hp\experiment_stocks_TEST_lstm_cp21_len63_notime_div_v1\trial_411433bdbcbf8165d1cbd9e69e534202\checkpoints\epoch_0\checkpoint_temp/part-00000-of-00001.data-00000-of-00001.tempstate1022316393468577740"

In [8]:
os.path.normpath(test)

'results\\stocks_TEST_lstm_cp21_len63_notime_div_v1\x819-2020\\hp\\experiment_stocks_TEST_lstm_cp21_len63_notime_div_v1\trial_411433bdbcbf8165d1cbd9e69e534202\\checkpoints\\epoch_0\\checkpoint_temp\\part-00000-of-00001.data-00000-of-00001.tempstate1022316393468577740'