# Cluster ID Transformer

In [46]:
import csv
from datetime import datetime
import json
import math
import os
import sys
import time

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import (TransformerDecoder, TransformerDecoderLayer,
                      TransformerEncoder, TransformerEncoderLayer)
from torch.utils.data import DataLoader
from tqdm import tqdm

In [2]:
try:
  from google.colab import drive
  IS_GOOGLE_COLAB = True
except ImportError:
  IS_GOOGLE_COLAB = False

if IS_GOOGLE_COLAB:
  mount_path = '/content/drive'
  base_folder = os.path.join(mount_path, "My Drive", "Data")
  data_folder = os.path.join(base_folder, "FX")
else:
  base_folder = '../../../Data'
  data_folder = os.path.join(base_folder, "FX", "OANDA-Japan MT5 Live")

In [3]:
import os
import shutil
import sys
import zipfile
import requests

def download_modlue_from_gh(repository, github_account='Naradice', branch='master', folder=None, module_path='/gdrive/My Drive/modules', **kwargs):
  if folder is None:
    folder = repository

  zip_url = f"https://github.com/{github_account}/{repository}/archive/refs/heads/{branch}.zip"
  response = requests.get(zip_url)
  if response.status_code == 200:
    with open("temp.zip", "wb") as f:
      f.write(response.content)
    with zipfile.ZipFile("temp.zip", "r") as zip_ref:
      zip_ref.extractall("temp_dir")

    source_folder = f"temp_dir/{repository}-{branch}/{folder}"
    destination_folder = os.path.join(module_path, folder)
    shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
    os.remove("temp.zip")
    shutil.rmtree("temp_dir")
  else:
    print(f"filed to download {zip_url}: {response.status_code}, {response.text}")

In [4]:
if IS_GOOGLE_COLAB:
  drive.mount(mount_path)
  module_path = f"{mount_path}/My Drive/modules"
else:
  module_path = '../../modules'

if os.path.exists(module_path) is False:
  os.makedirs(module_path)

repositories = [
    {'repository': 'stocknet_study', 'branch': 'master', 'folder': 'Dataset', 'refresh': False},
    {'repository': 'finance_process', 'branch': 'master', 'folder': 'fprocess', 'refresh': False},
    {'repository': 'cloud_storage_handler', 'branch': 'main', 'folder': 'cloud_storage_handler', 'refresh': False},
]

destination = os.path.join(module_path, '__init__.py')
if os.path.exists(destination) is False:
  with open(destination, mode='w') as fp:
    fp.close()

for repo_kwargs in repositories:
  destination = os.path.join(module_path, repo_kwargs['folder'])
  if repo_kwargs['refresh'] or os.path.exists(destination) is False:
    download_modlue_from_gh(**repo_kwargs, module_path=module_path)

In [5]:
sys.path.append(module_path)

import fprocess
import Dataset
import cloud_storage_handler

In [24]:
class Logger:

  @classmethod
  def connect_drive(cls, mount_path='/content/drive'):
    from google.colab import drive
    drive.mount(mount_path)

  def __init__(self, model_name, version, base_path=None, storage_handler='colab', max_retry=3, local_cache_period=10, client_id=None):
    """ Logging class to store training logs

    Args:
        model_name (str): It create a folder {base_path}/{model_name}/.
        verison (str): It create a file {base_path}/{model_name}/{model_name}_v{version}.csv.
        base_path (str, optional): Base path to store logs. If you use cloud storage, this is used as temporal folder. Defaults to None.
        storage_handler (str|BaseHandler, optional): It change storage service. 'colab' can be selected. Defaults to 'colab'.
        max_retry (int, optional): max count of retry when store logs via network. Defaults to 3.
        local_cache_period(int, optional): Valid for cloud storage only. period to chache logs until send it to the storage. Defaults to 10.
        client_id(str, optional): client_id to authenticate cloud service with OAuth2.0/OIDC. Defaults to None.
    """
    # define common veriables
    MOUNT_PATH = '/content/drive'
    self.__use_cloud_storage = False
    self.__init_storage = lambda : None
    self.__local_cache_period = local_cache_period
    self.model_name = model_name
    self.version = version
    self.max_retry = max_retry

    # define variables depends on env
    if storage_handler == 'colab':
      # this case we store logs on mounted path
      self.__init_colab()
      self.__init_storage = self.__init_colab
      if base_path is None:
        self.base_path = MOUNT_PATH
      else:
        base_pathes = [p for p in base_path.split('/') if len(p) > 0]
        self.base_path = os.path.join(MOUNT_PATH, 'My Drive', *base_pathes)
    elif type(storage_handler) is str:
      raise ValueError(f"{storage_handler} is not supported. Please create StorageHandler for the service.")
    elif storage_handler is not None:
      # this case we store logs on app folder of dropbox, using cloud_storage_handlder
      self.__cloud_handler = storage_handler
      if self.__cloud_handler.refresh_token is None:
        self.__cloud_handler.authenticate()
      self.__use_cloud_storage = True
      if base_path is None:
        self.base_path = './'
      else:
        self.base_path = base_path
    else:
      self.__cloud_handler = None
      if base_path is None:
        self.base_path = './'
      else:
        self.base_path = base_path
    model_log_folder = os.path.join(self.base_path, model_name)
    if not os.path.exists(model_log_folder):
        os.makedirs(model_log_folder)
    file_name = f"{model_name}_v{version}.csv"
    self.log_file_path = os.path.join(model_log_folder, file_name)
    self.__cache = []

  def __init_colab(self):
    from google.colab import drive
    drive.mount(MOUNT_PATH)

  def __store_files_to_cloud_storage(self, file_path):
    try:
      self.__cloud_handler.upload_training_results(self.model_name, [file_path])
    except Exception as e:
      print(f"failed to save logs to dropbox: {e}")

  def reset(self, model_name=None, file_name=None):
    if file_name is None:
      file_name = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
    if model_name is None:
      if file_name is None:
        raise ValueError("Either model_name or file_name should be specified")
      self.log_file_path = os.path.join(self.base_path, file_name)
    else:
      model_log_folder = os.path.join(self.base_path, model_name)
      if not os.path.exists(model_log_folder):
        os.makedirs(model_log_folder)
      self.log_file_path = os.path.join(model_log_folder, file_name)
    self.__cache = []

  def __cache_log(self, log_entry: list):
    self.__cache.append(log_entry)

  def __append_log(self, log_entry:list, retry_count=0):
      try:
          with open(self.log_file_path, 'a') as log_file:
            writer = csv.writer(log_file)
            if len(self.__cache) > 0:
              writer.writerows(self.__cache)
              self.__cache = []
            writer.writerow(log_entry)
      except Exception as e:
        if retry_count < self.max_retry:
          if retry_count == 0:
            print(e)
          self.__init_storage()
          self.__append_log(log_entry, retry_count+1)
        else:
          self.__cache.append(log_entry)

  def save_params(self, params:dict, model_name=None, model_version=None):
    data_folder = os.path.dirname(self.log_file_path)
    param_file_path = os.path.join(data_folder, f'{model_name}_v{model_version}_params.json')
    if "device" in params:
      device = params["device"]
      if not isinstance(device, str):
        params["device"] = str(device)
    with open(param_file_path, mode="w") as fp:
      json.dump(params, fp)
    if self.__use_cloud_storage:
      self.__store_files_to_cloud_storage(param_file_path)

  def save_model(self, model, model_name=None, model_version=None):
    if model is not None:
      data_folder = os.path.dirname(self.log_file_path)
      param_file_path = os.path.join(data_folder, f'{model_name}_v{model_version}.torch')
      torch.save(model.state_dict(), param_file_path)
      if self.__use_cloud_storage:
        self.__store_files_to_cloud_storage(param_file_path)

  def save_checkpoint(self, model, optimizer, scheduler, model_name, model_version, **kwargs):
    if model is not None:
      data_folder = os.path.dirname(self.log_file_path)
      model_path = os.path.join(data_folder, f'{model_name}_v{model_version}.torch')
      torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        **kwargs
      }, model_path)
      if self.__use_cloud_storage:
        self.__store_files_to_cloud_storage(model_path)

  def save_logs(self):
    if len(self.__cache) > 0:
      with open(self.log_file_path, 'a') as log_file:
        if len(self.__cache) > 0:
          writer = csv.writer(log_file)
          writer.writerows(self.__cache)
    if self.__use_cloud_storage:
        self.__store_files_to_cloud_storage(self.log_file_path)

  def add_training_log(self, training_loss, validation_loss, log_entry:list=None):
    timestamp = datetime.now().isoformat()
    basic_entry = [timestamp, training_loss, validation_loss]
    if log_entry is not None:
      if type(log_entry) is list and len(log_entry) > 0:
        basic_entry.extend(log_entry)
    if len(self.__cache) < self.__local_cache_period:
      self.__cache_log(basic_entry)
    else:
      self.__append_log(basic_entry)
      if self.__use_cloud_storage:
        self.__store_files_to_cloud_storage(self.log_file_path)

  def get_min_losses(self, train_loss_column=1, val_loss_column=2):
    logs = None
    if os.path.exists(self.log_file_path) is False:
      if self.__cloud_handler is not None:
        file_name = os.path.dirname(self.log_file_path)
        destination_path = f'/{self.model_name}/{file_name}'
        response = self.__cloud_handler.download_file(destination_path, self.log_file_path)
        if response is not None:
          logs = pd.read_csv(self.log_file_path)
    else:
      try:
        logs = pd.read_csv(self.log_file_path)
      except pd.errors.EmptyDataError:
        logs = None

    if logs is None:
      print("no log available")
      return np.inf, np.inf
    else:
      if type(train_loss_column) is int:
        train_loss = logs.iloc[:, train_loss_column]
      elif type(train_loss_column) is str:
        train_loss = logs[train_loss_column]
      min_train_loss = train_loss.min()

      if type(val_loss_column) is int:
        val_loss = logs.iloc[:, val_loss_column]
      elif type(val_loss_column) is str:
        val_loss = logs[val_loss_column]
      min_val_loss = val_loss.min()

      return min_train_loss, min_val_loss

In [7]:
# Initialize cloud storage handler if needed
from cloud_storage_handler import DropboxHandler


# storage_handler = DropboxHandler("nhjrq1cjpugk5hc", "http://localhost")
# storage_handler.authenticate()
# Otherwise, specify None
storage_handler = None

In [8]:
def load_model(model_name, model_version, device, train=True, storage_handler=None, model_folder=None, optimizer_class=None, scheduler_class=None):
  if model_folder is None:
    model_folder = base_folder
  model_folder = os.path.join(model_folder, model_name)

  params_file_name = f'{model_folder}/{model_name}_v{model_version}_params.json'
  if os.path.exists(params_file_name) is False:
    if storage_handler is None:
      print(f"exsisting model params not found on {params_file_name}.")
      return None, None, None, None
    else:
      response = storage_handler.download_file(f"/{model_name}/{model_name}_v{model_version}_params.json", params_file_name)
      if response is None:
        print("exsisting model params not found.")
        return None, None, None, None
  with open(params_file_name) as fp:
      params = json.load(fp)
  # need to create create_model function for respective model
  if "device" not in params:
    params["device"] = device
  model = create_model(**params, feature_size=len(params["features"])).to(device)
  optimizer = optimizer_class(model.parameters(), lr=lr)
  scheduler = scheduler_class(optimizer, 1.0)
  if train:
    model_path = f'{model_folder}/{model_name}_train_v{model_version}.torch'
  else:
    model_path = f'{model_folder}/{model_name}_v{model_version}.torch'
  if os.path.exists(model_path) is False:
    if storage_handler is None:
      print("exsisting model not found.")
      return None, None, None, None
    file_name = os.path.basename(model_path)
    response = storage_handler.download_file(f"/{model_name}/{file_name}", model_path)
    if response is None:
      print("exsisting model not found.")
      return None, None, None, None

  if torch.cuda.is_available():
    check_point = torch.load(model_path)
  else:
    check_point = torch.load(model_path, map_location=torch.device('cpu'))
  if "model_state_dict" in check_point:
    model.load_state_dict(check_point['model_state_dict'])
    optimizer.load_state_dict(check_point['optimizer_state_dict'])
    scheduler.load_state_dict(check_point['scheduler_state_dict'])
    return params, model, optimizer, scheduler
  else:
    if optimizer_class is not None:
      print("checkpoint is not available.")
    model.load_state_dict(check_point)
    return params, model, None, None

# Cluster Dataset

In [9]:
import random

def k_means(src_df, label_num_k, initial_centers = None, max_iter = 10000):
  np.random.seed(100)
  random.seed(100)
  
  count = 0

  labels = np.fromiter(random.choices(range(label_num_k), k=src_df.shape[0]), dtype = int)
  labels_prev = np.zeros(src_df.shape[0])
  if initial_centers is None:
    cluster_centers = np.eye(label_num_k, src_df.shape[1])
  else:
    initial_centers = np.array(initial_centers)
    if initial_centers.shape == (label_num_k, src_df.shape[1]):
      cluster_centers = initial_centers
    else:
      raise ValueError("invalid initial centeers")

  while (not (labels == labels_prev).all()):
      for i in range(label_num_k):
          clusters = src_df.iloc[labels == i]
          if len(clusters) > 0:
            cluster_centers[i, :] = clusters.mean(axis = 0)
          else:
            cluster_centers[i, :] = np.ones(src_df.shape[1])
      dist = ((src_df.values[:, :, np.newaxis] - cluster_centers.T[np.newaxis, :, :]) ** 2).sum(axis = 1)
      # dist = np.sqrt(dist)
      labels_prev = labels
      labels = dist.argmin(axis = 1)
      count += 1
      if count > max_iter:
        break
  return labels, cluster_centers

In [10]:
# Freedman–Diaconis rule. Sometimes 0 count appeare due to outfliers.
def freedamn_diaconis_bins(data):
    q75, q25 = np.percentile(data, [75 ,25])

    iqr = q75 - q25
    n = len(data)
    bin_width = (2.0 * iqr / (n**(1/3)))
    return bin_width

def prob_mass(data, bin_width=None):
    if bin_width is None:
        counts, bin_edges = np.histogram(data)
    else:
        try:
            bins=np.arange(min(data), max(data) + bin_width, bin_width)
            counts, bin_edges = np.histogram(data, bins=bins)
        except ValueError:
            counts, bin_edges = np.histogram(data)
    mass = counts / counts.sum()
    return mass, bin_edges

In [11]:
class ClusterPossibility:
    
    def __init__(self, label_df, center, **kwargs):
        self.center = center
        if label_df is not None and len(label_df) > 0:
            columns = label_df.columns
            self.pmass = {}
            self.bin_edges = {}
            self.min = label_df.min()
            self.max = label_df.max()
            self.bin_width = {}
            
            for column in columns:
                column_srs = label_df[column]
                bin_width = freedamn_diaconis_bins(column_srs).round(3)
                pmass, bin_edges = prob_mass(column_srs, bin_width)
                self.pmass[column] = pmass
                self.bin_edges[column] = bin_edges
                self.bin_width[column] = bin_width
        elif "pmass" in kwargs:
            self.pmass = kwargs["pmass"]
            self.bin_edges = kwargs["bin_edges"]
            self.min = kwargs["min"]
            self.max = kwargs["max"]
            self.bin_width = kwargs["bin_width"]
        else:
            raise Exception(f"label_df is not valid: {type(label_df)}")
            
    def __rearrange_pmass_edges(self, pmass_df, width):
        min_value = pmass_df["diff_value"].iloc[0]
        max_value = pmass_df["diff_value"].iloc[-1]
        temp_df = pmass_df.copy()

        # devide by 2 as diff_value is center value
        center_threshold = min_value + width/2
        left_edge = min_value
        right_edge = min_value + width
        
        bin_edges = [left_edge]
        joint_pmass = []
        pmass_indices = []
        while len(temp_df) > 0 and center_threshold <= max_value:
            possibilty_df = temp_df[temp_df["diff_value"] < center_threshold]
            if len(possibilty_df) > 0:
                possibilty = possibilty_df.loc[:, temp_df.columns != "diff_value"].sum()
            else:
                possibilty = 0.0
            joint_pmass.append(possibilty.values)
            pmass_indices.append(center_threshold)
            bin_edges.append(right_edge)
            
            left_edge = right_edge
            right_edge += width
            
            temp_df = temp_df[temp_df["diff_value"] >= center_threshold]
            center_threshold += width
        diff_values = pd.Series(pmass_indices, name="diff_value")
        pmass_df = pd.DataFrame(joint_pmass, columns=["probability"])
        pmass_df = pd.concat([diff_values, pmass_df], axis=1)
        return pmass_df, np.asarray(bin_edges)
            
    def __joint_probability(self, pmass, bin_edges, pmass_2, bin_edges_2):
        joint_pmass = []
        all_centers = []
        width = min(bin_edges[1:] - bin_edges[:-1])
        width_2 = min(bin_edges_2[1:] - bin_edges_2[:-1])
        
        for index_i, edge_i in enumerate(bin_edges[:-1]):
            left_edge = edge_i
            right_edge = bin_edges[index_i+1]
            
            for index_j, edge_j in enumerate(bin_edges_2[:-1]):
                joint_pmass.append(pmass[index_i] * pmass_2[index_j])
                next_left_edge = edge_j
                next_right_edge = bin_edges_2[index_j]
                
                joint_left_edge = left_edge + next_left_edge
                joint_right_edge = right_edge + next_right_edge
                
                joint_center = (joint_left_edge + joint_right_edge)/2
                all_centers.append(joint_center)

        all_edges_df = pd.Series(all_centers, name="diff_value")
        joint_pmass_df = pd.Series(joint_pmass, name="probability")
        joint_pmass_df = pd.concat([joint_pmass_df, all_edges_df], axis=1)
        joint_pmass_df = joint_pmass_df.groupby('diff_value').sum().reset_index()
        joint_pmass_df.sort_values(by="diff_value")
        joint_width = width + width_2
        joint_pmass_df, joint_bin_edges = self.__rearrange_pmass_edges(joint_pmass_df, joint_width)
        return joint_pmass_df, joint_bin_edges, joint_width
            
    def __mul__(self, other):
        if isinstance(other, ClusterPossibility):
            columns = set([*self.pmass.keys(), *other.pmass.keys()])
            joint_pmass = {}
            joint_bin_edges = {}
            joint_min = {}
            joint_max = {}
            joint_bin_width = {}
            joint_center = {}
            
            if len(columns) > 0:
                for index, column in enumerate(columns):
                    pmass_df, bin_edges, width = self.__joint_probability(self.pmass[column], self.bin_edges[column], 
                                                                          other.pmass[column], other.bin_edges[column])
                    pmass = pmass_df["probability"].values
                    joint_pmass[column] = pmass
                    joint_bin_edges[column] = bin_edges
                    joint_min[column] = pmass[0]
                    joint_max[column] = pmass[-1]
                    joint_bin_width[column] = width
                    if isinstance(self.center, (pd.DataFrame, dict)) and column in self.center:
                        center = self.center[column]
                    else:
                        center = self.center[index]
                    if isinstance(other.center, (pd.DataFrame, dict)) and column in other.center:
                        other_center = other.center[column]
                    else:
                        other_center = other.center[index]
                    joint_center[column] = center+other_center
                joint_cluster = ClusterPossibility(None, center=joint_center, pmass=joint_pmass, bin_edges=joint_bin_edges,
                                   min=joint_min, max=joint_max, bin_width=joint_bin_width)
                return joint_cluster   
            else:
                raise ValueError("clusters don't have any same columns.")
        else:
            raise TypeError("Unsupported operand type")
        
    def __getitem__(self, ndx):
        if isinstance(ndx, str):
            return self.pmass[ndx], self.bin_edges[ndx]
        else:
            partial_pmass = {}
            partial_bins = {}
            for key in self.pmass.keys():
                partial_pmass[key] = self.pmass[key][ndx]
                partial_bins[key] = self.bin_edges[key][ndx]
            return partial_pmass, partial_bins

In [97]:
from Dataset.base import TimeDataset

class ClusterDataset(TimeDataset):
    
    def __init__(
        self,
        df,
        columns: list,
        label_num_k:int = 30,
        freq=30,
        observation_length: int = 60,
        device="cuda",
        prediction_length=10,
        seed=1017,
        is_training=True,
        randomize=True,
        index_sampler=None,
        split_ratio=0.8,
        indices=None,
    ):
        diff_p = fprocess.DiffPreProcess(columns=columns)
        src_df = df[columns].dropna()
        src_df = diff_p(src_df).dropna()
        processes = [fprocess.WeeklyIDProcess(freq=freq, time_column= "index")]
        
        divisions = [i / (label_num_k-1) for i in range(label_num_k)]
        ini_centers = [
            np.quantile(src_df, p, axis=0) for p in divisions
        ]
        labels, centers = k_means(src_df, label_num_k=label_num_k, initial_centers=ini_centers)
        self.centers = centers
        clusters = []
        for label in range(label_num_k):
            label_df = src_df[labels == label]
            center = centers[label]
            clusters.append(ClusterPossibility(label_df, center))
        self.clusters = clusters
        new_columns = ["label"]
        token_df = pd.DataFrame(labels, index=src_df.index, columns=new_columns)
        super().__init__(token_df, columns=new_columns, observation_length=observation_length, processes=processes,
                         device=device, prediction_length=prediction_length, seed=seed, is_training=is_training, randomize=randomize,
                         index_sampler=index_sampler, split_ratio=split_ratio, indices=indices, dtype=torch.int)
        
    
    def to_labels(self, observations):
        if isinstance(observations, pd.DataFrame):
            observations = observations.values
        dist = ((observations[:, :, np.newaxis] - self.centers.T[np.newaxis, :, :]) ** 2).sum(axis = 1)
        labels = dist.argmin(axis = 1)
        return labels
    
    def output_indices(self, index):
        # output overrap with last input
        return slice(index + self.observation_length - 1, index + self.observation_length + self._prediction_length)

    def __getitem__(self, ndx):
        src, src_time = self._input_func(ndx)
        ans, ans_time = self._output_func(ndx)
        src = src.squeeze()
        ans = ans.squeeze()
        return (src, src_time), (ans, ans_time)

# Simple Transformer

In [13]:
class PositionalEncoding(nn.Module):
    def __init__(self, time_size, d_model, dropout=0.1, device=None):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.pe = nn.Embedding(time_size, d_model, device=device)

    def forward(self,time_ids):
        position = self.pe(time_ids)
        return self.dropout(position)

In [14]:
class Seq2SeqTransformer(nn.Module):
    
    def __init__(
        self, num_encoder_layers: int, num_decoder_layers: int,
        cluster_size: int, time_size: int, d_model: int,
        dim_feedforward:int = 512, dropout:float = 0.1, nhead:int = 8, device=None
    ):
        
        super(Seq2SeqTransformer, self).__init__()

        self.cluster_embedded_layer = torch.nn.Embedding(num_embeddings=cluster_size, embedding_dim = d_model, device=device)
        self.dropaut_layer = nn.Dropout(dropout)
        self.tgt_dropaut_layer = nn.Dropout(dropout)
        self.positional_encoding = PositionalEncoding(time_size, d_model, dropout, device=device)
                
        encoder_layer = TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, device=device
        )
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        
        decoder_layer = TransformerDecoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,dropout=dropout, device=device
        )
        self.transformer_decoder = TransformerDecoder(decoder_layer, num_layers=num_decoder_layers)
        
        self.output_layer = nn.Linear(d_model, cluster_size, device=device)
        

    def forward(
        self, src: Tensor, src_time: Tensor,
        tgt: Tensor, tgt_time: Tensor,
        mask_tgt: Tensor, mask_src: Tensor=None, padding_mask_src: Tensor=None, padding_mask_tgt: Tensor=None,
        memory_key_padding_mask: Tensor=None
    ):
        src_pos = self.positional_encoding(src_time)
        src_emb = self.cluster_embedded_layer(src)
        src_emb = torch.add(src_emb, src_pos)
        src_emb = self.dropaut_layer(src_emb)
                
        tgt_pos = self.positional_encoding(tgt_time)
        tgt_emb = self.cluster_embedded_layer(tgt)
        tgt_emb = torch.add(tgt_emb, tgt_pos)
        tgt_emb = self.tgt_dropaut_layer(tgt_emb)
                
        memory = self.transformer_encoder(src_emb, mask_src, padding_mask_src)
        outs = self.transformer_decoder(
            tgt_emb, memory, mask_tgt, None,
            padding_mask_tgt, memory_key_padding_mask
        )
        output = self.output_layer(outs)
        
        return output

In [103]:
def train(model, ds, optimizer, criterion, batch_size, cluster_size, device):
    
    model.train()
    ds.train()
    losses = 0
    
    length = 0.0
    end_index = len(ds)
    for index in tqdm(range(0, end_index - batch_size, batch_size)):
        length+=1.0
        src, tgt = ds[index:index+batch_size]
        src, src_time = src
        tgt, tgt_time = tgt
        
        in_tgt = tgt[:-1]
        in_tgt_time = tgt_time[:-1]

        mask_tgt = nn.Transformer.generate_square_subsequent_mask(in_tgt.size(0)).to(device)
        out = model(
            src=src, src_time=src_time, tgt=in_tgt, tgt_time=in_tgt_time,
            mask_tgt=mask_tgt, padding_mask_src=None, padding_mask_tgt=None,
            memory_key_padding_mask=None
        )
        
        optimizer.zero_grad()
        out_tgt = tgt[1:].to(torch.int64)
        out_tgt = torch.nn.functional.one_hot(out_tgt, num_classes=cluster_size).to(torch.float32)
        
        loss = criterion(out.permute(1, 2, 0), out_tgt.permute(1, 2, 0))
        loss.backward()
        optimizer.step()
        losses += loss.item()
        
    return losses / length

In [104]:
def evaluate(model, ds, criterion, batch_size, cluster_size, device):
    
    model.eval()
    ds.eval()
    losses = 0
    length = 0.0
    for index in range(0, len(ds) - batch_size, batch_size):
        length+=1.0
        src, tgt = ds[index:index+batch_size]
        src, src_time = src
        tgt, tgt_time = tgt
        
        in_tgt = tgt[:-1]
        in_tgt_time = tgt_time[:-1]

        mask_tgt = nn.Transformer.generate_square_subsequent_mask(in_tgt.size(0)).to(device)
        out = model(
            src=src, src_time=src_time, tgt=in_tgt, tgt_time=in_tgt_time,
            mask_tgt=mask_tgt, padding_mask_src=None, padding_mask_tgt=None,
            memory_key_padding_mask=None
        )

        out_tgt = tgt[1:].to(torch.int64)
        out_tgt = torch.nn.functional.one_hot(out_tgt, num_classes=cluster_size).to(torch.float32)

        loss = criterion(out.permute(1, 2, 0), out_tgt.permute(1, 2, 0))
        losses += loss.item()
        
    return losses / length

In [17]:
def create_model(num_encoder_layers: int, num_decoder_layers: int,
        cluster_size: int, time_size: int, d_model: int,
        dim_feedforward:int = 512, dropout:float = 0.1, nhead:int = 8, **kwargs):
    model = Seq2SeqTransformer(
        num_encoder_layers=int(num_encoder_layers),
        num_decoder_layers=int(num_decoder_layers),
        cluster_size=int(cluster_size),
        time_size=int(time_size),
        d_model=int(d_model),
        dim_feedforward=dim_feedforward,
        dropout=dropout, nhead=nhead
    )
    return model

## Training with Row Data 

In [18]:
import os
import pandas as pd

ohlc_column = ['open','high','low','close']
file_name = "mt5_USDJPY_min30.csv"

file_path = os.path.join(data_folder, file_name)
file_path = os.path.abspath(file_path)
df = pd.read_csv(file_path, parse_dates=True, index_col=0)
df

Unnamed: 0_level_0,open,high,low,close,tick_volume,spread,real_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-07-07 08:30:00,102.086,102.122,102.081,102.102,738,3,0
2014-07-07 09:00:00,102.102,102.146,102.098,102.113,1036,3,0
2014-07-07 09:30:00,102.113,102.115,102.042,102.044,865,3,0
2014-07-07 10:00:00,102.047,102.052,102.005,102.019,983,3,0
2014-07-07 10:30:00,102.017,102.025,101.918,101.941,1328,3,0
...,...,...,...,...,...,...,...
2022-08-12 21:30:00,133.461,133.506,133.439,133.484,1125,3,0
2022-08-12 22:00:00,133.484,133.530,133.437,133.475,1277,3,0
2022-08-12 22:30:00,133.475,133.486,133.433,133.483,1506,3,0
2022-08-12 23:00:00,133.484,133.536,133.465,133.521,1038,3,0


In [98]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_name = "finance_cluster_transformer"

#Dataset parameters
columns = ["open", "close"]
batch_size = 64
observation_length = 60
prediction_length = 10
feature_size = 4
cluster_size = 30
lr=0.005


ds = ClusterDataset(df, columns, label_num_k=cluster_size, freq=30,
                    observation_length=observation_length, prediction_length=prediction_length, device=device)

In [107]:
# num of encoder version, cluss size version, d_model version
model_version = "2.1.2"
model_params, model, optimizer, scheduler = load_model(model_name, model_version, device, True, storage_handler=storage_handler,
                                 optimizer_class=torch.optim.Adam,
                                 scheduler_class=torch.optim.lr_scheduler.StepLR)

if model is None:
    print("Initialize a new model.")

    # Hyper parameters
    model_params = {
        "num_encoder_layers":4,
        "num_decoder_layers":4,
        "cluster_size":cluster_size,
        "time_size":int(7*24*(60/30)), 
        "d_model":24,
        "dim_feedforward":100,
        "dropout":0.1, "nhead":4
    }

    model = create_model(
        **model_params
    ).to(device)

params_num = 0
for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)
    if p.requires_grad:
        params_num += p.numel()
print(f"params: {params_num}")

exsisting model params not found on ../../../Data\finance_cluster_transformer/finance_cluster_transformer_v2.1.2_params.json.
Initialize a new model.
params: 78686


In [109]:
criterion = nn.CrossEntropyLoss()
if optimizer is None:
    print("initialize optimizer")
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma = 0.9)

In [110]:
logger = Logger(model_name, model_version, base_folder, storage_handler=storage_handler, local_cache_period=1)

start_index, end_index = ds.get_date_range()
params = {"processes": [],
          "source": {
              "path": file_path,
              "start": start_index.isoformat(),
              "end": end_index.isoformat(),
              "length": len(ds)
          },
          "features": columns,
          "batch_size": batch_size,
          "observation_length": observation_length,
          "prediction_length": prediction_length,
          **model_params,
          "params_num": params_num,
          "version": 2
}

logger.save_params(params, model_name, model_version)

print("training log will be saved on ", logger.log_file_path)

training log will be saved on  ../../../Data\finance_cluster_transformer\finance_cluster_transformer_v2.1.2.csv


In [111]:
epoch = 500
best_train_loss, best_valid_loss = logger.get_min_losses()
best_model = None
best_train_model = None
patience = 3
counter = 0

for loop in range(1, epoch + 1):    
    start_time = time.time()
    
    loss_train = train(
        model=model, ds=ds, optimizer=optimizer,
        criterion=criterion, batch_size=batch_size, cluster_size=cluster_size, device=device
    )
    
    elapsed_time = time.time() - start_time
    
    loss_valid = evaluate(
        model=model, ds=ds, criterion=criterion,batch_size=batch_size, cluster_size=cluster_size, device=device
    )
       
    logger.add_training_log(loss_train, loss_valid, elapsed_time)
    
    if best_train_loss > loss_train:
        best_train_loss = loss_train
        best_train_model = model
        counter = 0
    else:
        counter += 1
        if counter == 1:
          logger.save_checkpoint(best_train_model, optimizer, scheduler, f'{model_name}_train', model_version)
        scheduler.step()
        
    print('[{}/{}] train loss: {:.10f}, valid loss: {:.10f}  [{}{:.0f}s] count: {}, {}'.format(
        loop, epoch,
        loss_train, loss_valid,
        str(int(math.floor(elapsed_time / 60))) + 'm' if math.floor(elapsed_time / 60) > 0 else '',
        elapsed_time % 60,
        counter,
        '**' if best_valid_loss > loss_valid else ''
    ))
    
    if best_valid_loss > loss_valid:
        best_valid_loss = loss_valid
        best_model = model
        logger.save_checkpoint(best_model, optimizer, scheduler, model_name, model_version)

    if counter > patience:
        break
    
logger.save_checkpoint(best_train_model, optimizer, scheduler, f'{model_name}_train', model_version)
logger.save_checkpoint(best_model, optimizer, scheduler, model_name, model_version)

no log available


100%|██████████| 1258/1258 [04:40<00:00,  4.48it/s]


[1/500] train loss: 1.9126627856, valid loss: 1.7780492462  [4m41s] count: 0, **


100%|██████████| 1258/1258 [04:25<00:00,  4.73it/s]


[2/500] train loss: 1.8181385371, valid loss: 1.7821034239  [4m26s] count: 0, 


100%|██████████| 1258/1258 [03:52<00:00,  5.40it/s]


[3/500] train loss: 1.8093514489, valid loss: 1.7801957123  [3m53s] count: 0, 


100%|██████████| 1258/1258 [04:11<00:00,  5.01it/s]


[4/500] train loss: 1.8046710007, valid loss: 1.7760209725  [4m11s] count: 0, **


100%|██████████| 1258/1258 [04:07<00:00,  5.08it/s]


[5/500] train loss: 1.8032649899, valid loss: 1.7808107305  [4m8s] count: 0, 


100%|██████████| 1258/1258 [04:39<00:00,  4.50it/s]


[6/500] train loss: 1.8028142894, valid loss: 1.7789260515  [4m40s] count: 0, 


100%|██████████| 1258/1258 [04:06<00:00,  5.11it/s]


[7/500] train loss: 1.8014934397, valid loss: 1.7741441571  [4m6s] count: 0, **


100%|██████████| 1258/1258 [03:55<00:00,  5.35it/s]


[8/500] train loss: 1.7993209064, valid loss: 1.7826853099  [3m55s] count: 0, 


100%|██████████| 1258/1258 [03:58<00:00,  5.27it/s]


[9/500] train loss: 1.8198659554, valid loss: 1.7857826404  [3m59s] count: 0, 


100%|██████████| 1258/1258 [04:19<00:00,  4.85it/s]


[10/500] train loss: 1.8152038790, valid loss: 1.7903617937  [4m20s] count: 1, 


100%|██████████| 1258/1258 [03:58<00:00,  5.27it/s]


[11/500] train loss: 1.8066787002, valid loss: 1.7881882396  [3m59s] count: 2, 


100%|██████████| 1258/1258 [04:16<00:00,  4.91it/s]


[12/500] train loss: 1.7989881164, valid loss: 1.7854255552  [4m16s] count: 3, 


100%|██████████| 1258/1258 [04:28<00:00,  4.69it/s]


[13/500] train loss: 1.7925414403, valid loss: 1.7860790506  [4m28s] count: 0, 


100%|██████████| 1258/1258 [04:20<00:00,  4.82it/s]


[14/500] train loss: 1.7892320836, valid loss: 1.7775110083  [4m21s] count: 0, 


100%|██████████| 1258/1258 [04:19<00:00,  4.85it/s]


[15/500] train loss: 1.7884894994, valid loss: 1.7823181331  [4m20s] count: 0, 


100%|██████████| 1258/1258 [04:32<00:00,  4.62it/s]


[16/500] train loss: 1.7860513456, valid loss: 1.7863104663  [4m32s] count: 0, 


100%|██████████| 1258/1258 [04:29<00:00,  4.67it/s]


[17/500] train loss: 1.7837795372, valid loss: 1.7818372908  [4m29s] count: 0, 


100%|██████████| 1258/1258 [04:03<00:00,  5.17it/s]


[18/500] train loss: 1.7825500405, valid loss: 1.7866842244  [4m3s] count: 0, 


100%|██████████| 1258/1258 [04:22<00:00,  4.80it/s]


[19/500] train loss: 1.7811351993, valid loss: 1.7834209893  [4m22s] count: 0, 


100%|██████████| 1258/1258 [04:27<00:00,  4.70it/s]


[20/500] train loss: 1.7798599753, valid loss: 1.7831654332  [4m28s] count: 0, 


100%|██████████| 1258/1258 [04:25<00:00,  4.75it/s]


[21/500] train loss: 1.7778182062, valid loss: 1.7902129401  [4m25s] count: 0, 


100%|██████████| 1258/1258 [04:36<00:00,  4.56it/s]


[22/500] train loss: 1.7789979767, valid loss: 1.7867191592  [4m36s] count: 0, 


100%|██████████| 1258/1258 [04:24<00:00,  4.76it/s]


[23/500] train loss: 1.7768908880, valid loss: 1.7891712646  [4m25s] count: 1, 


100%|██████████| 1258/1258 [04:19<00:00,  4.85it/s]


[24/500] train loss: 1.7758881296, valid loss: 1.7889904347  [4m19s] count: 0, 


100%|██████████| 1258/1258 [04:18<00:00,  4.87it/s]


[25/500] train loss: 1.7745908724, valid loss: 1.7973162069  [4m18s] count: 0, 


100%|██████████| 1258/1258 [03:52<00:00,  5.40it/s]


[26/500] train loss: 1.7740752446, valid loss: 1.7913367505  [3m53s] count: 0, 


100%|██████████| 1258/1258 [03:45<00:00,  5.58it/s]


[27/500] train loss: 1.7730510878, valid loss: 1.7943392002  [3m46s] count: 0, 


100%|██████████| 1258/1258 [03:48<00:00,  5.51it/s]


[28/500] train loss: 1.7710605095, valid loss: 1.7910596696  [3m48s] count: 0, 


100%|██████████| 1258/1258 [03:36<00:00,  5.81it/s]


[29/500] train loss: 1.7704308403, valid loss: 1.7984833249  [3m37s] count: 0, 


100%|██████████| 1258/1258 [03:31<00:00,  5.94it/s]


[30/500] train loss: 1.7716869162, valid loss: 1.7953789592  [3m32s] count: 0, 


100%|██████████| 1258/1258 [03:39<00:00,  5.74it/s]


[31/500] train loss: 1.7728432338, valid loss: 1.7943446065  [3m39s] count: 1, 


100%|██████████| 1258/1258 [03:44<00:00,  5.61it/s]


[32/500] train loss: 1.7714037782, valid loss: 1.7973445471  [3m44s] count: 2, 


100%|██████████| 1258/1258 [03:39<00:00,  5.73it/s]


[33/500] train loss: 1.7683132193, valid loss: 1.8004900933  [3m40s] count: 3, 


100%|██████████| 1258/1258 [03:42<00:00,  5.66it/s]


[34/500] train loss: 1.7680542688, valid loss: 1.7989035525  [3m42s] count: 0, 


100%|██████████| 1258/1258 [03:46<00:00,  5.56it/s]


[35/500] train loss: 1.7733360800, valid loss: 1.7982422388  [3m46s] count: 0, 


100%|██████████| 1258/1258 [03:32<00:00,  5.91it/s]


[36/500] train loss: 1.7685836548, valid loss: 1.8017226602  [3m33s] count: 1, 


100%|██████████| 1258/1258 [04:20<00:00,  4.82it/s]


[37/500] train loss: 1.7660983553, valid loss: 1.8036396161  [4m21s] count: 2, 


100%|██████████| 1258/1258 [04:48<00:00,  4.36it/s]


[38/500] train loss: 1.7648513748, valid loss: 1.8013411840  [4m48s] count: 0, 


100%|██████████| 1258/1258 [04:49<00:00,  4.35it/s]


[39/500] train loss: 1.7592806810, valid loss: 1.8009614708  [4m50s] count: 0, 


100%|██████████| 1258/1258 [04:27<00:00,  4.71it/s]


[40/500] train loss: 1.7579977748, valid loss: 1.8022536763  [4m27s] count: 0, 


100%|██████████| 1258/1258 [03:58<00:00,  5.26it/s]


[41/500] train loss: 1.7588919360, valid loss: 1.8038791643  [3m59s] count: 0, 


100%|██████████| 1258/1258 [03:55<00:00,  5.34it/s]


[42/500] train loss: 1.7565957106, valid loss: 1.8022576140  [3m55s] count: 1, 


100%|██████████| 1258/1258 [03:56<00:00,  5.32it/s]


[43/500] train loss: 1.7548785317, valid loss: 1.8012540500  [3m57s] count: 0, 


100%|██████████| 1258/1258 [03:57<00:00,  5.29it/s]


[44/500] train loss: 1.7530448166, valid loss: 1.8060237291  [3m58s] count: 0, 


100%|██████████| 1258/1258 [03:45<00:00,  5.57it/s]


[45/500] train loss: 1.7520994614, valid loss: 1.7975020443  [3m46s] count: 0, 


100%|██████████| 1258/1258 [03:54<00:00,  5.37it/s]


[46/500] train loss: 1.7526635734, valid loss: 1.8024578719  [3m54s] count: 0, 


100%|██████████| 1258/1258 [03:52<00:00,  5.41it/s]


[47/500] train loss: 1.7517741382, valid loss: 1.8024320503  [3m53s] count: 1, 


100%|██████████| 1258/1258 [03:59<00:00,  5.26it/s]


[48/500] train loss: 1.7512041112, valid loss: 1.8037038089  [3m59s] count: 0, 


100%|██████████| 1258/1258 [03:56<00:00,  5.32it/s]


[49/500] train loss: 1.7527715893, valid loss: 1.8060299371  [3m56s] count: 0, 


100%|██████████| 1258/1258 [03:59<00:00,  5.26it/s]


[50/500] train loss: 1.7525392916, valid loss: 1.8064453187  [3m59s] count: 1, 


100%|██████████| 1258/1258 [03:57<00:00,  5.29it/s]


[51/500] train loss: 1.7514967699, valid loss: 1.8063370945  [3m58s] count: 2, 


100%|██████████| 1258/1258 [03:50<00:00,  5.46it/s]


[52/500] train loss: 1.7498167348, valid loss: 1.8069614461  [3m51s] count: 3, 


100%|██████████| 1258/1258 [03:58<00:00,  5.28it/s]


[53/500] train loss: 1.7509781902, valid loss: 1.8086718081  [3m58s] count: 0, 


100%|██████████| 1258/1258 [03:54<00:00,  5.37it/s]


[54/500] train loss: 1.7497343166, valid loss: 1.8081587580  [3m54s] count: 1, 


100%|██████████| 1258/1258 [04:01<00:00,  5.20it/s]


[55/500] train loss: 1.7501463760, valid loss: 1.8105140612  [4m2s] count: 0, 


100%|██████████| 1258/1258 [03:51<00:00,  5.43it/s]


[56/500] train loss: 1.7486313601, valid loss: 1.8088147172  [3m52s] count: 1, 


100%|██████████| 1258/1258 [03:58<00:00,  5.28it/s]


[57/500] train loss: 1.7473721216, valid loss: 1.8078262532  [3m58s] count: 0, 


100%|██████████| 1258/1258 [03:57<00:00,  5.30it/s]


[58/500] train loss: 1.7471434067, valid loss: 1.8076434528  [3m57s] count: 0, 


100%|██████████| 1258/1258 [03:58<00:00,  5.28it/s]


[59/500] train loss: 1.7467055885, valid loss: 1.8094991655  [3m58s] count: 0, 


100%|██████████| 1258/1258 [04:11<00:00,  4.99it/s]


[60/500] train loss: 1.7460188301, valid loss: 1.8117058502  [4m12s] count: 0, 


100%|██████████| 1258/1258 [03:54<00:00,  5.37it/s]


[61/500] train loss: 1.7463660035, valid loss: 1.8093631671  [3m54s] count: 0, 


100%|██████████| 1258/1258 [04:00<00:00,  5.23it/s]


[62/500] train loss: 1.7455844364, valid loss: 1.8101450159  [4m1s] count: 1, 


100%|██████████| 1258/1258 [03:54<00:00,  5.36it/s]


[63/500] train loss: 1.7445392690, valid loss: 1.8092541481  [3m55s] count: 0, 


100%|██████████| 1258/1258 [03:57<00:00,  5.30it/s]


[64/500] train loss: 1.7430630213, valid loss: 1.8097344728  [3m57s] count: 0, 


100%|██████████| 1258/1258 [04:06<00:00,  5.11it/s]


[65/500] train loss: 1.7439905127, valid loss: 1.8110259741  [4m6s] count: 0, 


100%|██████████| 1258/1258 [03:58<00:00,  5.28it/s]


[66/500] train loss: 1.7439262771, valid loss: 1.8109901671  [3m58s] count: 1, 


100%|██████████| 1258/1258 [04:00<00:00,  5.24it/s]


[67/500] train loss: 1.7430223406, valid loss: 1.8116664803  [4m0s] count: 2, 


100%|██████████| 1258/1258 [03:53<00:00,  5.38it/s]


[68/500] train loss: 1.7434605069, valid loss: 1.8096479296  [3m54s] count: 0, 


100%|██████████| 1258/1258 [04:07<00:00,  5.09it/s]


[69/500] train loss: 1.7428059386, valid loss: 1.8107111020  [4m7s] count: 1, 


100%|██████████| 1258/1258 [04:23<00:00,  4.77it/s]


[70/500] train loss: 1.7425484020, valid loss: 1.8109703593  [4m24s] count: 0, 


100%|██████████| 1258/1258 [04:15<00:00,  4.92it/s]


[71/500] train loss: 1.7424645933, valid loss: 1.8130247448  [4m16s] count: 0, 


100%|██████████| 1258/1258 [04:07<00:00,  5.08it/s]


[72/500] train loss: 1.7437468743, valid loss: 1.8136455518  [4m8s] count: 0, 


100%|██████████| 1258/1258 [04:19<00:00,  4.84it/s]


[73/500] train loss: 1.7439426463, valid loss: 1.8124378950  [4m20s] count: 1, 


100%|██████████| 1258/1258 [04:07<00:00,  5.08it/s]


[74/500] train loss: 1.7428735362, valid loss: 1.8130318646  [4m8s] count: 2, 


100%|██████████| 1258/1258 [04:13<00:00,  4.96it/s]


[75/500] train loss: 1.7426221438, valid loss: 1.8133256561  [4m14s] count: 3, 


## Validation

In [21]:
model_name = "finance_cluster_transformer"
model_version = "2.1.2"
_, model, _, _ = load_model(model_name, model_version, device, False, storage_handler=storage_handler,optimizer_class=torch.optim.Adam,
                                 scheduler_class=torch.optim.lr_scheduler.StepLR)
model = model.to(device)

In [153]:
ds.eval()
src, tgt = ds[:batch_size]

observation, obs_time = src
ans, ans_time = tgt

In [181]:
criterion = nn.CrossEntropyLoss()
ans_tgt = torch.nn.functional.one_hot(ans[1:].to(torch.int64), num_classes=cluster_size).to(torch.float32)
ans_tgt = ans_tgt.permute(1, 2, 0)
loss = criterion(ans_tgt, ans[1:].permute(1, 0).to(torch.int64))
loss

tensor(2.4569, device='cuda:0')

In [182]:
criterion = nn.CrossEntropyLoss()
ans_tgt = torch.nn.functional.one_hot(ans[1:].to(torch.int64), num_classes=cluster_size).to(torch.float32)
ans_tgt = ans_tgt.permute(1, 2, 0)
loss = criterion(ans_tgt, ans_tgt)
loss

tensor(2.4569, device='cuda:0')

In [166]:
softmax = nn.Softmax(dim=0)
softmax(ans[0].to(torch.float32))

tensor([3.7336e-04, 2.8310e-13, 3.7336e-04, 3.1046e-10, 1.2525e-07, 1.6950e-08,
        3.7336e-04, 3.7336e-04, 1.8588e-05, 1.5062e-01, 5.5411e-02, 5.0529e-05,
        2.8310e-13, 1.0415e-13, 3.7336e-04, 1.6950e-08, 5.0529e-05, 3.1046e-10,
        3.7336e-04, 3.7336e-04, 3.7336e-04, 3.7336e-04, 1.2525e-07, 5.0529e-05,
        4.0944e-01, 3.7336e-04, 3.7336e-04, 2.8310e-13, 1.2525e-07, 5.5411e-02,
        1.1421e-10, 3.1046e-10, 1.2525e-07, 3.1046e-10, 3.7336e-04, 1.8588e-05,
        5.5411e-02, 3.7336e-04, 3.7336e-04, 5.5411e-02, 2.8310e-13, 5.0529e-05,
        2.8310e-13, 1.0415e-13, 3.1046e-10, 3.7336e-04, 2.5157e-06, 3.1046e-10,
        3.7336e-04, 1.6950e-08, 5.5411e-02, 2.7588e-03, 5.0529e-05, 2.8310e-13,
        1.5062e-01, 3.1046e-10, 4.2016e-11, 5.0529e-05, 3.1046e-10, 1.2525e-07,
        3.7336e-04, 5.0529e-05, 4.6076e-08, 2.7588e-03], device='cuda:0')

In [161]:
test = ans[1:].permute(0,1).to(torch.float32)

In [162]:
criterion(test, test)

tensor(9377.8037, device='cuda:0')

In [144]:
softmax = nn.Softmax(dim=2)
softmax(ans_tgt)[0,]

tensor([0.0315, 0.0315, 0.0315, 0.0315, 0.0315, 0.0315, 0.0315, 0.0315, 0.0315,
        0.0315, 0.0315, 0.0315, 0.0315, 0.0315, 0.0315, 0.0315, 0.0315, 0.0315,
        0.0315, 0.0315, 0.0315, 0.0315, 0.0857, 0.0315, 0.0315, 0.0315, 0.0315,
        0.0315, 0.0315, 0.0315], device='cuda:0')

In [128]:
ans_tgt.permute(1, 2, 0).shape

torch.Size([8, 30, 10])

In [127]:
ans_tgt.shape

torch.Size([10, 8, 30])

In [167]:
preds_tgt = observation[-1:]
entire_tgt_time = (obs_time[-11:] + 10) % int(7 * 24 * (60//30))
preds_tgt_time = entire_tgt_time[:1]

for i in range(0, 10):
    mask_tgt = nn.Transformer.generate_square_subsequent_mask(preds_tgt.size(0)).to(device)

    out = model(
        src=observation, src_time=obs_time, tgt=preds_tgt, tgt_time=preds_tgt_time,
        mask_tgt=mask_tgt, padding_mask_src=None, padding_mask_tgt=None,
        memory_key_padding_mask=None
    )
    
    break

In [185]:
# (seq, chunk, classes)
out.argmax(dim=2)

tensor([[22, 22, 22, 22,  1,  0,  8, 22, 13,  8,  1,  8, 22, 22, 22, 17,  8, 22,
         22, 22, 22, 22,  1, 27, 10, 22, 22, 14,  1,  1, 10, 22,  1, 22, 22, 13,
          1, 22, 22,  1, 22,  8, 22, 14, 22, 22,  1, 22, 22, 17,  1, 23,  6, 22,
         27, 22,  0,  8, 22,  1, 22,  8,  8, 17]], device='cuda:0')

In [187]:
ans[1]

tensor([22, 12, 19, 22,  1,  0, 14, 27,  1,  8,  1, 27, 14,  8, 22,  0,  8, 22,
         8, 14, 22, 22, 28, 27, 23,  8,  8, 19, 12,  1, 29, 22, 22, 22, 20, 13,
         1, 20,  8, 19, 28,  0, 22, 12, 22, 20,  1,  8,  8,  7, 25, 29,  6,  8,
        12, 28, 17, 20, 22, 28, 20, 27,  8, 17], device='cuda:0',
       dtype=torch.int32)

In [175]:
ans_labels = torch.randint(0, 30, (10, batch_size))
ans_labels.dtype

torch.int64

In [176]:
ans_tgt.shape

torch.Size([64, 1])

In [178]:
ans_tgt.dtype

torch.int32

In [177]:
out.permute(1, 2, 0).shape

torch.Size([64, 30, 1])

In [179]:
criterion = nn.CrossEntropyLoss()
ans_tgt = ans[1:2, :].permute(1, 0)
loss = criterion(out.permute(1, 2, 0), ans_tgt.to(torch.int64))
loss

tensor(1.8204, device='cuda:0', grad_fn=<NllLoss2DBackward0>)

In [41]:
out.argmax(dim=2)

tensor([[38, 53, 21, 38, 57, 59,  0, 16]], device='cuda:0')

In [52]:
softmax = nn.Softmax(dim=2)
softmax(out).mean(dim=2)

tensor([[0.0167, 0.0167, 0.0167, 0.0167, 0.0167, 0.0167, 0.0167, 0.0167]],
       device='cuda:0', grad_fn=<MeanBackward1>)

In [53]:
criterion = nn.CrossEntropyLoss()

In [55]:
out_tgt = ans[:1].to(torch.int64)
out_tgt = torch.nn.functional.one_hot(out_tgt, num_classes=cluster_size).to(torch.float32)

In [78]:
out_tgt.shape

torch.Size([1, 8, 60])

In [85]:
total_loss = 0.0 
for chunk in range(8):
    loss = criterion(out[0, chunk, :], out_tgt[0, chunk, :])
    total_loss += loss

In [86]:
total_loss/8

tensor(7.3832, device='cuda:0', grad_fn=<DivBackward0>)

In [87]:
criterion(out.permute(1, 2, 0), out_tgt.permute(1, 2, 0))

tensor(7.3832, device='cuda:0', grad_fn=<DivBackward1>)

In [73]:
ans.shape

torch.Size([10, 8])

In [72]:
ans[0, :]

tensor([25, 11, 51, 38, 43, 22, 52, 44], device='cuda:0', dtype=torch.int32)

In [92]:
def batch_prediction(observation, obs_time, tgt_length=10, freq=30):
        
    preds_tgt = observation[-1:]
    entire_tgt_time = (obs_time[-(tgt_length+1):] + (tgt_length+1)) % (int(7 * 24 * (60/freq)))
    preds_tgt_time = obs_time[-1:]
    for i in range(0, 10):
        mask_tgt = nn.Transformer.generate_square_subsequent_mask(preds_tgt.size(0)).to(device)

        out = model(
            src=observation, src_time=obs_time, tgt=preds_tgt, tgt_time=preds_tgt_time,
            mask_tgt=mask_tgt, padding_mask_src=None, padding_mask_tgt=None,
            memory_key_padding_mask=None
        )
        
        preds_tgt_time = entire_tgt_time[:i+1]


In [45]:
out.shape

torch.Size([1, 8, 60])