In [None]:
import torch
import yfinance as yf
import pandas as pd
import os
import requests
import datetime
import ray
from ray import tune, air

import sys
sys.path.append('./data')
sys.path.append('./training')
import data, training


torch.cuda.is_available()

In [None]:
BATCH_SIZE=96
WINDOW_SIZE=32
NUM_WORKERS=6
N_FEATURES=1
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
N_TRIES=8 # Increase to try a larger search space

In [None]:
ray.init(num_cpus=int(os.cpu_count() * .75), num_gpus=1)

In [None]:
# Need to go requests route to avoid forbidden error
url = 'https://stockanalysis.com/list/biggest-companies/'

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

r = requests.get(url, headers=header)

dfs = pd.read_html(r.text)

symbols = list(dfs[0]['Symbol'].iloc[:25].values)

In [None]:
# Maximum number of days for YFinance 5m data is 60, but it only takes 59 day diff
end_date = datetime.date.today() 
start_date = end_date - datetime.timedelta(days=59)

dataset = yf.download(
    symbols,
    period='max', 
    interval='5m',
    start=start_date, 
    end=end_date
)['Adj Close']

train, val = data.get_train_val_dicts(dataset)

In [None]:
config = {
    'device': DEVICE,
    'D_MODEL': tune.choice([32, 64]),
    'N_HEADS': tune.choice([2, 8]),
    'D_FF': tune.choice([128, 256]),
    'DROPOUT': tune.choice([0., .05, .1, .15]),
    'ACTIVATION': tune.choice(['relu', 'gelu']),
    'NUM_LAYERS': tune.choice([3, 5]),
    'LR': tune.choice([1e-3, 1e-4, 1e-5]),
    'WINDOW_SIZE': tune.choice([32, 64])
}
param_space = {'train_loop_config': config}

In [None]:
trainer, tune_config = training.setup_ray(train, val, BATCH_SIZE, param_space, n_tries=N_TRIES)

tuner = tune.Tuner(
    trainer,
    run_config=air.RunConfig(
        local_dir="./tfmr_logger/",
    ),
    tune_config=tune_config,
)

result = tuner.fit()