# Transformer with Diffrence ID Dataset

In [17]:
from datetime import datetime
import json
import math
import os
import sys
import time

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import (TransformerDecoder, TransformerDecoderLayer,
                      TransformerEncoder, TransformerEncoderLayer)
from torch.utils.data import DataLoader
from tqdm import tqdm

In [18]:
try:
  from google.colab import drive
  IS_GOOGLE_COLAB = True
except ImportError:
  IS_GOOGLE_COLAB = False

if IS_GOOGLE_COLAB:
  mount_path = '/content/drive'
  base_folder = os.path.join(mount_path, "My Drive", "Data")
  data_folder = os.path.join(base_folder, "FX")
else:
  base_folder = 'L:/data'
  data_folder = os.path.join(base_folder, "fx", "OANDA-Japan MT5 Live")


device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cpu


In [19]:
import os
import shutil
import sys
import zipfile
import requests

def download_modlue_from_gh(repository, github_account='Naradice', branch='master', folder=None, module_path='/gdrive/My Drive/modules', **kwargs):
  if folder is None:
    folder = repository

  zip_url = f"https://github.com/{github_account}/{repository}/archive/refs/heads/{branch}.zip"
  response = requests.get(zip_url)
  if response.status_code == 200:
    with open("temp.zip", "wb") as f:
      f.write(response.content)
    with zipfile.ZipFile("temp.zip", "r") as zip_ref:
      zip_ref.extractall("temp_dir")

    if isinstance(folder, str):
      folders = [folder]
    else:
      folders = folder
    for folder in folders:    
      source_folder = f"temp_dir/{repository}-{branch}/{folder}"
      destination_folder = os.path.join(module_path, folder)
      shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
    os.remove("temp.zip")
    shutil.rmtree("temp_dir")
  else:
    print(f"filed to download {zip_url}: {response.status_code}, {response.text}")

In [20]:
if IS_GOOGLE_COLAB:
  drive.mount(mount_path)
  module_path = f"{mount_path}/My Drive/modules"
else:
  module_path = '../../modules'

if os.path.exists(module_path) is False:
  os.makedirs(module_path)

repositories = [
    {'repository': 'stocknet_study', 'branch': 'master', 'folder': ['Dataset', 'SN_Utils'], 'refresh': False},
    {'repository': 'finance_process', 'branch': 'master', 'folder': 'fprocess', 'refresh': True},
    {'repository': 'cloud_storage_handler', 'branch': 'main', 'folder': 'cloud_storage_handler', 'refresh': False},
]

destination = os.path.join(module_path, '__init__.py')
if os.path.exists(destination) is False:
  with open(destination, mode='w') as fp:
    fp.close()

for repo_kwargs in repositories:
  folders = repo_kwargs['folder']
  if isinstance(folders, str):
    folders = [folders]
  for folder in folders:
    destination = os.path.join(module_path, folder)
    if repo_kwargs['refresh'] or os.path.exists(destination) is False:
      download_modlue_from_gh(**repo_kwargs, module_path=module_path)

In [21]:
sys.path.append(module_path)

import fprocess
import Dataset
import cloud_storage_handler

In [22]:
# Initialize cloud storage handler if needed
from cloud_storage_handler import DropboxHandler


# storage_handler = DropboxHandler("nhjrq1cjpugk5hc", "http://localhost")
# storage_handler.authenticate()
# Otherwise, specify None
storage_handler = None

## Transformer with Diff ID Dataset

In [None]:
import random
import numpy as np
import torch


class CloseDiffIDDS:
    
    def __init__(self, df, ohlc_columns, volume_column=None, spread_column=None, observation_length=60,
                device="cuda", future_step_size=1, seed=1017, is_training = True, with_close=True, clip_range=None):
        self.seed(seed)
        self.columns = ohlc_columns
        self.ohlc_idf = self.__init_ohlc(df, ohlc_columns, with_close=with_close, clip_range=clip_range)
        
        if volume_column is not None:
            self.volume_idf = self.__init_volume(df[volume_column])
            self.columns.append(volume_column)
            self.__get_volume = lambda idx: self.volume_idf.iloc[idx].values.tolist()
        if spread_column is not None:
            self.spread_idf = self.__init_spread(df[spread_column])
            self.columns.append(spread_column)
            self.__get_spread = lambda idx: self.spread_idf.iloc[idx].values.tolist()
        
        self.observation_length = observation_length
        self.device = device
        self.future_step_size = future_step_size
        self.is_training = is_training
        self.__init_indicies(self.ohlc_idf)
    
    def __filtet_outfiers(self, data):
        pass
    
    def __init_indicies(self, data, split_ratio=0.8):
        length = len(data) - self.observation_length - self.future_step_size
        if length < 0:
            raise Exception(f"date length {length} is less than observation_length {self.observation_length}")
        
        to_index = int(length * split_ratio)
        from_index = 1
        train_indices = list(range(from_index, to_index))
        self.train_indices = random.sample(train_indices, k=to_index - from_index)

        # Note: If unique value exits in validation data only, validation loss would be grater than expected
        from_index = int(length * split_ratio) + self.observation_length + self.future_step_size
        to_index = length
        eval_indices = list(range(from_index, to_index))
        self.eval_indices = random.sample(eval_indices, k=to_index - from_index)
        
        if self.is_training:
            self._indices = self.train_indices
        else:
            self._indices = self.eval_indices
    
    def revert_diff(self, prediction, ndx, last_close_value=None):
        if last_close_value is None:
            if type(ndx) is int:
                target_index = self._indices[ndx] + self.observation_length - 1
                #close.iloc[index - 1] + ohlc.iloc[index]. As index=0 was dropped, index of dataset equal index + 1 of original data
                last_close = df[self.columns[3]].iloc[target_index]
            else:
                target_index = [self._indices[index] + self.observation_length - 1 for index in ndx]
                batch_size = len(ndx)
                last_close = df[self.columns[3]].iloc[target_index].values.reshape(batch_size, 1)
            return last_close + prediction
            
        else:
            return last_close_value + prediction
    
    def revert(self, diff):
        pass
    
    def __init_ohlc(self, df, ohlc_columns, decimal_digits = 3, with_close=True, clip_range=None):
        if with_close:
            close_column = [ohlc_columns[3]]
            ohlc_cls_diff_df = df[ohlc_columns].iloc[1:] - df[close_column].iloc[:-1].values
        else:
            ohlc_cls_diff_df = df[ohlc_columns].diff().dropna()
        if clip_range is not None:
            ohlc_cls_diff_df = ohlc_cls_diff_df.clip(lower=clip_range[0], upper=clip_range[1])
        min_value = ohlc_cls_diff_df.min().min()
        min_value_abs = abs(min_value)

        lower_value = math.ceil(min_value_abs) * 10 ** decimal_digits
        upper_value = math.ceil(ohlc_cls_diff_df.max().max()) * 10 ** decimal_digits
        id_df =  ohlc_cls_diff_df * 10 ** decimal_digits + lower_value
        self.ohlc_lower = lower_value
        id_df = id_df.astype('int64')
        # add 10 to the range to avoid 0 value in the dataset
        self.ohlc_range_size = lower_value + upper_value + 10
        return id_df
    
    def __get_volume(self, idx):
        return []
    
    def __get_spread(self, idx):
        return []
    
    def __init_volume(self, df):
        volume_df = df.iloc[1:].round(decimals=-1)/10
        volume_df.dropna(inplace=True)
        self.volume_range_size = math.ceil(volume_df.max()/100)*100
        volume_df = volume_df.astype(dtype='int64')
        return volume_df
    
    def __init_spread(self, df):
        spread_df = df.iloc[1:]
        spread_df = spread_df.astype('int64')
        self.spread_range_size = spread_df.max()*2
        return spread_df
    
    def __get_data_set(self, idx):
        ohlc_ids = self.ohlc_idf.iloc[idx].values.tolist()
        volume_ids = self.__get_volume(idx)
        spread_ids = self.__get_spread(idx)
        return ohlc_ids, volume_ids, spread_ids
    
    def _output_func(self, batch_size):
        if type(batch_size) == int:
            index = self._indices[batch_size]
            idx = slice(index + self.observation_length -1, index + self.observation_length + self.future_step_size)
            ohlc_ids, volume_ids, spread_ids = self.__get_data_set(idx)
            
            ohlc_ids = torch.tensor(ohlc_ids, device=self.device, dtype=torch.int)
            volume_ids = torch.tensor(volume_ids, device=self.device, dtype=torch.int).unsqueeze(2)
            spread_ids = torch.tensor(spread_ids, device=self.device, dtype=torch.int).unsqueeze(2)
            
            return ohlc_ids, volume_ids, spread_ids
        elif type(batch_size) == slice:    
            ohlc_chunk_data = []
            volume_chunk_data = []
            spread_chunk_data = []
            
            for index in self._indices[batch_size]:
                idx = slice(index + self.observation_length -1, index + self.observation_length + self.future_step_size)
                ohlc_ids, volume_ids, spread_ids = self.__get_data_set(idx)
                
                ohlc_chunk_data.append(ohlc_ids)
                volume_chunk_data.append(volume_ids)
                spread_chunk_data.append(spread_ids)
                
            ohlc_ids = torch.tensor(ohlc_chunk_data, device=self.device, dtype=torch.int)
            volume_ids = torch.tensor(volume_chunk_data, device=self.device, dtype=torch.int).unsqueeze(2)
            spread_ids = torch.tensor(spread_chunk_data, device=self.device, dtype=torch.int).unsqueeze(2)
            
            return ohlc_ids.transpose(0, 1), volume_ids.transpose(0, 1), spread_ids.transpose(0, 1)
    
    def _input_func(self, batch_size):
        if type(batch_size) == int:
            index = self._indices[batch_size]
            idx = slice(index, index + self.observation_length)
            ohlc_ids, volume_ids, spread_ids = self.__get_data_set(idx)
            
            ohlc_ids = torch.tensor(ohlc_ids, device=self.device, dtype=torch.int)
            volume_ids = torch.tensor(volume_ids, device=self.device, dtype=torch.int).unsqueeze(2)
            spread_ids = torch.tensor(spread_ids, device=self.device, dtype=torch.int).unsqueeze(2)
            
            return ohlc_ids, volume_ids, spread_ids
        elif type(batch_size) == slice:
            ohlc_chunk_data = []
            volume_chunk_data = []
            spread_chunk_data = []
            
            for index in self._indices[batch_size]:
                idx = slice(index, index + self.observation_length)
                ohlc_ids, volume_ids, spread_ids = self.__get_data_set(idx)
                
                ohlc_chunk_data.append(ohlc_ids)
                volume_chunk_data.append(volume_ids)
                spread_chunk_data.append(spread_ids)
                
            ohlc_ids = torch.tensor(ohlc_chunk_data, device=self.device, dtype=torch.int)
            volume_ids = torch.tensor(volume_chunk_data, device=self.device, dtype=torch.int).unsqueeze(2)
            spread_ids = torch.tensor(spread_chunk_data, device=self.device, dtype=torch.int).unsqueeze(2)
            
            return ohlc_ids.transpose(0, 1), volume_ids.transpose(0, 1), spread_ids.transpose(0, 1)
    
    def __len__(self):
        return len(self._indices)
    
    def __getitem__(self, ndx):
        return self._input_func(ndx), self._output_func(ndx)
    
    def seed(self, seed=None):
        '''
        '''
        if seed is None:
            seed = 1192
        else:
            torch.backends.cudnn.benchmark = False
            torch.backends.cudnn.deterministic = True
        torch.manual_seed(seed)
        random.seed(seed)
        np.random.seed(seed)
        self.seed_value = seed
            
    def eval(self):
        self._indices = self.eval_indices
        self.is_training = False
        
    def train(self):
        self._indices = self.train_indices
        self.is_training = False

### Softmax Model

input diff with positional encoding

output 6 values with softmax

In [24]:
def calculate_joint(tensor_a, tensor_b, beam_width=10):
    joint_array = []
    for i in range(beam_width):
        joint_array.append(torch.mul(tensor_a[:, :, i:i+1], tensor_b))
    joint_tensor = torch.concat(joint_array, dim=2)
    return joint_tensor

In [25]:
def next_preds_beam(scores_array, indices_array, batch_size=16, beam_width=10):
    beam_batch = batch_size * beam_width
    joint_indices_array = []
    joint_score = scores_array[0]

    for logits in scores_array[1:]:
        joint_score = calculate_joint(joint_score, logits, beam_width=beam_width)
        joint_score, joint_indices = torch.topk(joint_score, k=beam_width, dim=-1)
        joint_indices_array.append(joint_indices)
    
    next_preds_array = []
    indices_array.reverse()
    for index, indices in enumerate(indices_array[:-1]):
        joint_indices = joint_indices_array[index]
        target_indices = torch.remainder(joint_indices, beam_width)
        next_preds = indices.gather(dim=-1, index=target_indices)
        next_preds_array.append(next_preds.mT.reshape(1, beam_batch, 1))
        
    target_indices = joint_indices // beam_width
    next_preds = indices_array[-1].gather(dim=-1, index=target_indices)
    next_preds_array.append(next_preds.mT.reshape(1, beam_batch, 1))
    next_preds_array.reverse()
    
    next_ohlc_preds = torch.concat(next_preds_array[:4], dim=-1)
    next_v_preds = torch.concat(next_preds_array[4:5], dim=-1)
    next_s_preds = torch.concat(next_preds_array[5:], dim=-1)
    
    return next_ohlc_preds, next_v_preds, next_s_preds

In [26]:
def joint_beam_search(model, input_data, beam_width, max_len):
    
    ohlc_src, volume_src, spread_src = input_data
    batch_size = ohlc_src.shape[1]
        
    preds_ohlc = ohlc_src[-1:, :, :]
    preds_volume = volume_src[-1:, :, :]
    preds_spread = spread_src[-1:, :, :]
    
    # first prediction
    ohlc_src, volume_src, spread_src = input_data
        
    preds_ohlc = ohlc_src[-1:, :, :]
    preds_volume = volume_src[-1:, :, :]
    preds_spread = spread_src[-1:, :, :]
    mask_tgt = nn.Transformer.generate_square_subsequent_mask(preds_ohlc.size(0)).to(device)
    
    logits_o, logits_h,logits_l,logits_c, logits_v, logits_s  = model(
        src_ohlc=ohlc_src, src_volume=volume_src, src_spread=spread_src,
        tgt_ohlc=preds_ohlc, tgt_volume=preds_volume, tgt_spread=preds_spread,
        mask_tgt=mask_tgt, mask_src=None, padding_mask_src=None, padding_mask_tgt=None,
        memory_key_padding_mask=None
    )
    
    scores_o, indicecs_o = torch.topk(logits_o, k=beam_width, dim=-1)
    scores_h, indicecs_h = torch.topk(logits_h, k=beam_width, dim=-1)
    scores_l, indicecs_l = torch.topk(logits_l, k=beam_width, dim=-1)
    scores_c, indicecs_c = torch.topk(logits_c, k=beam_width, dim=-1)
    scores_v, indicecs_v = torch.topk(logits_v, k=beam_width, dim=-1)
    scores_s, indicecs_s = torch.topk(logits_s, k=beam_width, dim=-1)
    
    next_ohlc_preds, next_v_preds, next_s_preds = next_preds_beam(
        scores_array=[scores_o, scores_h, scores_l, scores_c, scores_v, scores_s],
        indices_array=[indicecs_o, indicecs_h, indicecs_l, indicecs_c, indicecs_v, indicecs_s],
        batch_size=batch_size, beam_width=beam_width
    )
    
    preds_ohlc = preds_ohlc.repeat(1, beam_width, 1)
    preds_volume = preds_volume.repeat(1, beam_width, 1)
    preds_spread = preds_spread.repeat(1, beam_width, 1)    
    ohlc_src = ohlc_src.repeat(1, beam_width, 1)
    volume_src = volume_src.repeat(1, beam_width, 1)
    spread_src = spread_src.repeat(1, beam_width, 1)
    
    for t in range(1, max_len):
        preds_ohlc = torch.concat([preds_ohlc, next_ohlc_preds], dim=0)
        preds_volume = torch.concat([preds_volume, next_v_preds], dim=0)
        preds_spread = torch.concat([preds_spread, next_s_preds], dim=0)

        mask_tgt = nn.Transformer.generate_square_subsequent_mask(preds_ohlc.size(0)).to(device)
        logits_o, logits_h, logits_l,logits_c, logits_v, logits_s  = model(
            src_ohlc=ohlc_src, src_volume=volume_src, src_spread=spread_src,
            tgt_ohlc=preds_ohlc, tgt_volume=preds_volume, tgt_spread=preds_spread,
            mask_tgt=mask_tgt, mask_src=None, padding_mask_src=None, padding_mask_tgt=None,
            memory_key_padding_mask=None
        )
        
        scores_o, indicecs_o = torch.topk(logits_o[-1:].reshape(1, batch_size, 8000*beam_width), k=beam_width, dim=-1)
        indicecs_o = torch.remainder(indicecs_o, 8000)
        scores_h, indicecs_h = torch.topk(logits_h[-1:].reshape(1, batch_size, 8000*beam_width), k=beam_width, dim=-1)
        indicecs_h = torch.remainder(indicecs_h, 8000)
        scores_l, indicecs_l = torch.topk(logits_l[-1:].reshape(1, batch_size, 8000*beam_width), k=beam_width, dim=-1)
        indicecs_l = torch.remainder(indicecs_l, 8000)
        scores_c, indicecs_c = torch.topk(logits_c[-1:].reshape(1, batch_size, 8000*beam_width), k=beam_width, dim=-1)
        indicecs_c = torch.remainder(indicecs_c, 8000)
        scores_v, indicecs_v = torch.topk(logits_v[-1:].reshape(1, batch_size, 7300*beam_width), k=beam_width, dim=-1)
        indicecs_v = torch.remainder(indicecs_v, 7300)
        scores_s, indicecs_s = torch.topk(logits_s[-1:].reshape(1, batch_size, 400*beam_width), k=beam_width, dim=-1)
        indicecs_s = torch.remainder(indicecs_s, 400)
        
        next_ohlc_preds, next_v_preds, next_s_preds = next_preds_beam(
            scores_array=[scores_o, scores_h, scores_l, scores_c, scores_v, scores_s],
            indices_array=[indicecs_o, indicecs_h, indicecs_l, indicecs_c, indicecs_v, indicecs_s],
            batch_size=batch_size, beam_width=beam_width
        )
    
    preds_ohlc = torch.concat([preds_ohlc, next_ohlc_preds], dim=0)
    preds_volume = torch.concat([preds_volume, next_v_preds], dim=0)
    preds_spread = torch.concat([preds_spread, next_s_preds], dim=0)
        
    return preds_ohlc, preds_volume, preds_spread

In [27]:
def revert_target(tgt_ohlc, tgt_volume, tgt_spread, indices):
    if tgt_ohlc.size(1) != len(indices):
        raise Exception("batch_size should be same as indices size")
    batch_size = len(indices)
    #revet id to difference: ohlc - last close
    ohlc_diff = (tgt_ohlc - ds.ohlc_lower) * 0.1 ** 3
    ohlc_diff = ohlc_diff.cpu().detach().numpy()
    first_ohlc = ds.revert_diff(ohlc_diff[0, :, :], indices)
    last_close = first_ohlc[:, -1:]
    revert_ohlc = first_ohlc.reshape(1, batch_size, 4)
    
    # last_observation + future_step_size
    for i in range(1, tgt_ohlc.size(0)):
        next_ohlc = ohlc_diff[i, :, :] + last_close
        last_close = next_ohlc[:, -1:]
        revert_ohlc = np.concatenate([revert_ohlc, next_ohlc.reshape(1, batch_size, 4)], axis=0)
    
    revert_volume = tgt_volume * 10
    revert_volume = revert_volume.cpu().detach().numpy()
    revert_spread = tgt_spread.cpu().detach().numpy()
    
    return revert_ohlc, revert_volume, revert_spread

### OHLC Softmax

In [28]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=1000, dropout=0.05):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0)/d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        #pe = pe.unsqueeze(0)
        #pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pe", pe)

    def forward(self, src, tgt):
        src_pos = src.size(1)
        tgt_pos = src_pos + tgt.size(1) - 1
        return self.dropout(src + self.pe[:src_pos, :]), self.dropout(tgt + self.pe[src_pos-1:tgt_pos, :])

In [29]:
class Perceptron(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(Perceptron, self).__init__()

        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.ReLU())

        for i in range(num_layers-2):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())

        layers.append(nn.Linear(hidden_dim, output_dim))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        out = self.layers(x)
        return out

In [30]:
class Seq2SeqTransformer(nn.Module):
    
    def __init__(
        self, num_encoder_layers: int, num_decoder_layers: int,
        d_model: int, ohlc_size: int,
        dim_feedforward:int = 512, dropout:float = 0.1, nhead:int = 8
    ):
        
        super(Seq2SeqTransformer, self).__init__()
        
        d_model = d_model + (d_model % 4)
        emb_d_model = int(d_model/4)
        self.ohlc_embedded_layer = torch.nn.Embedding(num_embeddings=ohlc_size, embedding_dim = emb_d_model)
        
        self.positional_encoding = PositionalEncoding(d_model, dropout=dropout)
        
        encoder_layer = TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward
        )
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        
        decoder_layer = TransformerDecoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward
        )
        self.transformer_decoder = TransformerDecoder(decoder_layer, num_layers=num_decoder_layers)
        
        self.open_output = Perceptron(d_model, ohlc_size, ohlc_size, 4)
        self.high_output = Perceptron(d_model, ohlc_size, ohlc_size, 4)
        self.low_output = Perceptron(d_model, ohlc_size, ohlc_size, 4)
        self.close_output = Perceptron(d_model, ohlc_size, ohlc_size, 4)
        self.activation = nn.Softmax(dim=2)
        

    def forward(
        self, src_ohlc: Tensor, tgt_ohlc: Tensor,
        mask_tgt: Tensor, mask_src: Tensor=None, padding_mask_src: Tensor=None, padding_mask_tgt: Tensor=None,
        memory_key_padding_mask: Tensor=None
    ):
        src = self.ohlc_embedded_layer(src_ohlc)
        src = src.reshape(src.size(0), src.size(1), src.size(2)*src.size(3))
        
        tgt = self.ohlc_embedded_layer(tgt_ohlc)
        tgt = tgt.reshape(tgt.size(0), tgt.size(1), tgt.size(2)*tgt.size(3))
        
        src, tgt = self.positional_encoding(src, tgt)
        memory = self.transformer_encoder(src, mask_src, padding_mask_src)
        outs = self.transformer_decoder(
            tgt, memory, mask_tgt, None,
            padding_mask_tgt, memory_key_padding_mask
        )
        open = self.activation(self.open_output(outs))
        high = self.activation(self.high_output(outs))
        low = self.activation(self.low_output(outs))
        close = self.activation(self.close_output(outs))
        
        return open, high, low, close

In [None]:
def train(model, ds, optimizers, criterion, batch_size):
    
    model = model.train()
    ds.train()
    losses = [0, 0, 0, 0]
    
    end_index = len(ds) - batch_size
    
    count = 0
    for index in tqdm(range(0, end_index, batch_size)):
        count += 1
        src, tgt = ds[index:index+batch_size]
        ohlc_src, _, _ = src
        ohlc_tgt, _, _ = tgt
        
        in_ohlc_tgt = ohlc_tgt[:-1, :]

        mask_tgt = nn.Transformer.generate_square_subsequent_mask(in_ohlc_tgt.size(0)).to(device)
        logits_o, logits_h,logits_l,logits_c  = model(
            src_ohlc=ohlc_src, tgt_ohlc=in_ohlc_tgt,
            mask_tgt=mask_tgt, mask_src=None, padding_mask_src=None, padding_mask_tgt=None,
            memory_key_padding_mask=None
        )

        out_ohlc_tgt = ohlc_tgt[1:, :]
        #print(logits_o.shape, out_ohlc_tgt.shape)
        out_o_tgt = torch.nn.functional.one_hot(out_ohlc_tgt[:, :, 0].to(dtype=torch.long), ds.ohlc_range_size).to(dtype=torch.float)
        out_h_tgt = torch.nn.functional.one_hot(out_ohlc_tgt[:, :, 1].to(dtype=torch.long), ds.ohlc_range_size).to(dtype=torch.float)
        out_l_tgt = torch.nn.functional.one_hot(out_ohlc_tgt[:, :, 2].to(dtype=torch.long), ds.ohlc_range_size).to(dtype=torch.float)
        out_c_tgt = torch.nn.functional.one_hot(out_ohlc_tgt[:, :, 3].to(dtype=torch.long), ds.ohlc_range_size).to(dtype=torch.float)
        
        #print(logits_o.shape, out_o_tgt.shape)
        #print(logits_v.shape, out_v_tgt.shape)
        for optimizer in optimizers:
            optimizer.zero_grad()
        
        o_loss = criterion(logits_o, out_o_tgt)
        h_loss = criterion(logits_h, out_h_tgt)
        l_loss = criterion(logits_l, out_l_tgt)
        c_loss = criterion(logits_c, out_c_tgt)
        loss = (o_loss + h_loss + l_loss + c_loss)/4
        
        loss.backward()
        for optimizer in optimizers:
            optimizer.step()
            
        losses[0] += o_loss.item()
        losses[1] += h_loss.item()
        losses[2] += l_loss.item()
        losses[3] += c_loss.item()
    
    losses = [loss/end_index for loss in losses]
    return losses

In [None]:
def evaluate(model, ds, criterion, batch_size):
    
    model = model.eval()
    ds.eval()
    losses = [0, 0, 0, 0]
    end_index = len(ds) - batch_size
    count = 0
    for index in range(0, end_index, batch_size):
        count += 1
        src, tgt = ds[index:index+batch_size]
        ohlc_src, _, _ = src
        ohlc_tgt, _, _ = tgt
        
        in_ohlc_tgt = ohlc_tgt[:-1, :]

        mask_tgt = nn.Transformer.generate_square_subsequent_mask(in_ohlc_tgt.size(0)).to(device)
        logits_o, logits_h,logits_l,logits_c  = model(
            src_ohlc=ohlc_src, tgt_ohlc=in_ohlc_tgt,
            mask_tgt=mask_tgt, mask_src=None, padding_mask_src=None, padding_mask_tgt=None,
            memory_key_padding_mask=None
        )

        out_ohlc_tgt = ohlc_tgt[1:, :]
        #print(logits_o.shape, out_ohlc_tgt.shape)
        out_o_tgt = torch.nn.functional.one_hot(out_ohlc_tgt[:, :, 0].to(dtype=torch.long), ds.ohlc_range_size).to(dtype=torch.float)
        out_h_tgt = torch.nn.functional.one_hot(out_ohlc_tgt[:, :, 1].to(dtype=torch.long), ds.ohlc_range_size).to(dtype=torch.float)
        out_l_tgt = torch.nn.functional.one_hot(out_ohlc_tgt[:, :, 2].to(dtype=torch.long), ds.ohlc_range_size).to(dtype=torch.float)
        out_c_tgt = torch.nn.functional.one_hot(out_ohlc_tgt[:, :, 3].to(dtype=torch.long), ds.ohlc_range_size).to(dtype=torch.float)
        
        o_loss = criterion(logits_o, out_o_tgt)
        h_loss = criterion(logits_h, out_h_tgt)
        l_loss = criterion(logits_l, out_l_tgt)
        c_loss = criterion(logits_c, out_c_tgt)
        
        losses[0] += o_loss.item()
        losses[1] += h_loss.item()
        losses[2] += l_loss.item()
        losses[3] += c_loss.item()
    
    losses = [loss/end_index for loss in losses]
    return losses

#### Row Data

In [33]:
ohlc_column = ['open','high','low','close']
time_column = "time"
file_name = "mt5_USDJPY_min30.csv"
file_path = os.path.join(data_folder, file_name)
file_path = os.path.abspath(file_path)
df = pd.read_csv(file_path, parse_dates=[time_column], index_col=0)
ds = CloseDiffIDDS(df, ohlc_column, observation_length=60,
                device=device, future_step_size=10, seed=1017, is_training = True, with_close=False, clip_range=(-2, 2))

In [40]:
ds.ohlc_range_size

4000

In [None]:
nhead = 4
d_model = 120
dim_feedforward = 10
num_encoder_layers = 4
num_decoder_layers = 4
feature_size = 5
dropout = 0

model = Seq2SeqTransformer(
    num_encoder_layers=num_encoder_layers,
    num_decoder_layers=num_decoder_layers,
    d_model=d_model,
    ohlc_size=ds.ohlc_range_size,
    dim_feedforward=dim_feedforward,
    dropout=dropout, nhead=nhead
)

for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

model = model.to(device)

lr = 0.0001
batch_size = 64

criterion = nn.CrossEntropyLoss()



In [37]:
def get_parameters(model, outlayer_name, limit_outlayer=False):
    parameters = []
    for name, param in model.named_parameters():
        if param.requires_grad:
            if "output" in name:
                if outlayer_name in name:
                    parameters.append(param)
            elif limit_outlayer is False:
                parameters.append(param)
                
    return parameters

In [38]:
optimizer_o = torch.optim.AdamW(get_parameters(model, "open_output"), lr=lr)
scheduler_o = torch.optim.lr_scheduler.StepLR(optimizer_o, 1.0, gamma = 0.5)
optimizer_h = torch.optim.AdamW(get_parameters(model, "high_output"), lr=lr)
scheduler_h = torch.optim.lr_scheduler.StepLR(optimizer_h, 1.0, gamma = 0.5)
optimizer_l = torch.optim.AdamW(get_parameters(model, "low_output"), lr=lr)
scheduler_l = torch.optim.lr_scheduler.StepLR(optimizer_l, 1.0, gamma = 0.5)
optimizer_c = torch.optim.AdamW(get_parameters(model, "close_output"), lr=lr)
scheduler_c = torch.optim.lr_scheduler.StepLR(optimizer_c, 1.0, gamma = 0.5)

In [39]:
epoch = 5
model.train()
ds.train()
best_loss = float('Inf')
best_model = None
best_losses = [float('Inf') for i in range(6)]
patience = 3
counter = 0

for loop in range(1, epoch + 1):
    
    start_time = time.time()
    
    loss_train = train(
        model=model, ds=ds, optimizers=[optimizer_o, optimizer_h, optimizer_l, optimizer_c],
        criterion=criterion, batch_size=batch_size
    )
    
    elapsed_time = time.time() - start_time
    
    loss_valid = evaluate(
        model=model, ds=ds, criterion=criterion,batch_size=batch_size
    )
    
    amount_loss_valid = sum(loss_valid)
    print(f'{loop}/{epoch} {str(int(math.floor(elapsed_time / 60))) + "m" if math.floor(elapsed_time / 60) > 0 else ""}{elapsed_time % 60}s count: {counter}, \
        {sum(loss_train)}, {amount_loss_valid}\
        {"**" if best_loss > amount_loss_valid else ""}')
    print(f'train loss: {loss_train}')
    print(f'valid loss: {loss_valid}')
    
    if best_loss > amount_loss_valid:
        best_loss = amount_loss_valid
        best_model = model
        counter = 0
    else:
        counter += 1
    for index, scheduler in enumerate([scheduler_o, scheduler_h, scheduler_l, scheduler_c]):
        if best_losses[index] < loss_valid[index]:
            scheduler.step()
        
    if counter > patience:
        break

  0%|          | 3/1258 [00:12<1:30:36,  4.33s/it]


IndexError: index out of range in self