In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from datetime import datetime, timedelta

import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau

sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

SBER_PATH = '/content/drive/MyDrive/Stock_Data/sber.csv'

def fix_stockdata(ticket):
    for i in range(len(ticket['Цена'])):

        price = ticket['Цена'].iloc[i]
        price = price.replace(',', '.')
        price = price.replace('.', '', price.count('.') - 1)
        ticket['Цена'].iloc[i] = price

        open = ticket['Откр.'].iloc[i]
        open = open.replace(',', '.')
        open = open.replace('.', '', open.count('.') - 1)
        ticket['Откр.'].iloc[i] = open

        max = ticket['Макс.'].iloc[i]
        max = max.replace(',', '.')
        max = max.replace('.', '', max.count('.') - 1)
        ticket['Макс.'].iloc[i] = max

        min = ticket['Мин.'].iloc[i]
        min = min.replace(',', '.')
        min = min.replace('.', '', min.count('.') - 1)
        ticket['Мин.'].iloc[i] = min

        volume = ticket['Объём'].iloc[i]
        volume = volume.replace(',', '.')
        volume = volume.replace('.', '', volume.count('.') - 1)
        if volume[len(volume) - 1] == 'K':
            volume = int(float(volume[:len(volume) - 1]) * 10**3)
        elif volume[len(volume) - 1] == 'M':
            volume = int(float(volume[:len(volume) - 1]) * 10**6)
        elif volume[len(volume) - 1] == 'B':
            volume = int(float(volume[:len(volume) - 1]) * 10**9)
        else:
            volume = int(float(volume))
        ticket['Объём'].iloc[i] = volume

        diff = ticket['Изм. %'].iloc[i]
        diff = diff.replace(',', '.')
        diff = float(diff[:len(diff) - 1])
        ticket['Изм. %'].iloc[i] = diff

    ticket['Цена'] = ticket['Цена'].astype(float)
    ticket['Откр.'] = ticket['Откр.'].astype(float)
    ticket['Макс.'] = ticket['Макс.'].astype(float)
    ticket['Мин.'] = ticket['Мин.'].astype(float)
    ticket['Объём'] = ticket['Объём'].astype(int)
    ticket['Изм. %'] = ticket['Изм. %'].astype(float)
    ticket['Дата'] = pd.to_datetime(ticket['Дата'], dayfirst=True).dt.normalize()

    ticket = ticket.set_index('Дата')

    return ticket

DATA = fix_stockdata(pd.read_csv(SBER_PATH))
DATA = DATA.rename(columns={'Дата' : 'date', 'Цена' : 'close', 'Откр.' : 'open', 'Макс.' : 'max', 'Мин.' : 'min', 'Объём' : 'volume', 'Изм. %' : 'return'})
DATA.index.rename('date', inplace=True)
DATA = DATA.sort_values(by='date', ascending=True)

Mounted at /content/drive


In [None]:
DATA

NameError: name 'DATA' is not defined

In [None]:
from google.colab import drive
drive.mount('/content/drive')
SBER_PATH = '/content/drive/MyDrive/Stock_Data/sber_data_complete.csv'
DATA = pd.read_csv(SBER_PATH)
DATA['date'] = pd.to_datetime(DATA['date']).dt.normalize()
DATA.index.rename('date', inplace=True)
DATA = DATA.set_index('date')
DATA['sentiment_index'] = ((1 + DATA['positive'])/(1 + DATA['negative'])).apply(lambda x: math.log(x))
DATA['sdi'] = abs(1 - abs((DATA['positive'] - DATA['negative'])/(DATA['positive'] + DATA['negative'])))
DATA = DATA.sort_values(by='date', ascending=True)

Mounted at /content/drive


In [None]:
#DATAFRAME_LOG = pd.DataFrame(columns=['datestart', 'dateend', 'predictors', 'window', 'oneoutput', 'n_features', 'd_model', 'n_head', 'n_layers', 'dropout', 'RMSE', 'precision', 'recall', 'accuracy', 'balance'])
#DATAFRAME_LOG = pd.read_csv('transformer_hyperparams_data_2.csv')

In [None]:
DATA

Unnamed: 0_level_0,close,open,max,min,volume,return,comments,sentiment_sum,positive,negative,sentiment_index,sdi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-11-02,204.50,200.45,204.59,196.15,82340000,1.75,108.0,-23.567652,34,74,-0.762140,0.629630
2020-11-03,209.20,205.70,211.11,203.80,90370000,2.30,849.0,-91.212983,338,511,-0.412325,0.796231
2020-11-05,216.70,214.40,218.65,211.43,152550000,3.59,1208.0,-136.892515,508,700,-0.320060,0.841060
2020-11-06,217.50,215.80,218.29,213.00,95000000,0.37,831.0,-129.184823,321,510,-0.461818,0.772563
2020-11-09,228.05,220.00,231.30,218.70,217540000,4.85,1037.0,-111.691360,443,594,-0.292737,0.854388
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-28,318.22,317.50,320.90,315.89,30190000,0.36,131.0,-16.074570,59,72,-0.196115,0.900763
2024-05-29,320.38,318.25,320.40,315.94,22990000,0.68,110.0,-30.876960,33,77,-0.830348,0.600000
2024-05-30,316.63,320.91,321.54,316.00,27010000,-1.17,151.0,-25.633767,58,93,-0.465757,0.768212
2024-05-31,313.11,316.00,318.44,309.84,49210000,-1.11,274.0,-38.080781,115,159,-0.321584,0.839416


In [None]:
px.line(DATA[['close']])

In [None]:
 px.imshow(DATA.corr())

In [None]:
all_predictors = ['close', 'open', 'max', 'min',
                  'volume', 'return', 'MA_7',
                  'MA_14', 'EMA_3', 'EMA_7',
                  'MACD', 'signal', 'histogram', 'comments', 'sentiment_sum', 'positive', 'negative', 'sentiment_index', 'sdi']


predictors = ['close','return', 'open', 'max', 'min',
                  'volume', 'MA_7',
                  'MA_14', 'EMA_3', 'EMA_7',
                  'MACD', 'signal', 'histogram', 'comments', 'sentiment_sum', 'sentiment_index', 'sdi']

In [None]:
len(predictors)

17

In [None]:
DATESTART = '2022-02-28'
DATEEND = '2024-04-20'

In [None]:
df = DATA.copy()
df = df[DATESTART:DATEEND]
#df["tomorrow"] = df["close"].shift(-1)
ma_day = [3, 7, 14]
ema_day = [3, 7]

for ma in ma_day:
      column_name = f"MA_{ma}"
      df[column_name] = df['close'].rolling(ma).mean()
for ema in ema_day:
      column_name = f"EMA_{ema}"
      df[column_name] = df['close'].ewm(span=ema, adjust=False).mean()

macd = df['EMA_3'] - df['EMA_7']
signal = macd.ewm(span=5, adjust=False).mean()
histogram = macd - signal

df['MACD'] = macd
df['signal'] = signal
df['histogram'] = histogram

df = df.dropna()
df = df[predictors]

In [None]:
DATA = DATA.copy()
#DATA = DATA[DATESTART:DATEEND]
#DATA["tomorrow"] = DATA["close"].shift(-1)
ma_day = [3, 7, 14]
ema_day = [3, 7]

for ma in ma_day:
      column_name = f"MA_{ma}"
      DATA[column_name] = DATA['close'].rolling(ma).mean()
for ema in ema_day:
      column_name = f"EMA_{ema}"
      DATA[column_name] = DATA['close'].ewm(span=ema, adjust=False).mean()

macd = DATA['EMA_3'] - DATA['EMA_7']
signal = macd.ewm(span=5, adjust=False).mean()
histogram = macd - signal

DATA['MACD'] = macd
DATA['signal'] = signal
DATA['histogram'] = histogram

DATA = DATA.dropna()
DATA = DATA[predictors]

In [None]:
df

Unnamed: 0_level_0,close,return,open,max,min,volume,MA_7,MA_14,EMA_3,EMA_7,MACD,signal,histogram,comments,sentiment_sum,sentiment_index,sdi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-04-12,135.38,-2.39,138.84,140.10,130.35,75390000,146.712857,141.520714,138.817620,142.035185,-3.217565,-0.658946,-2.558619,217.0,-59.712625,-0.797772,0.617512
2022-04-13,135.50,0.09,136.78,138.14,132.72,39090000,142.355714,141.467857,137.158810,140.401389,-3.242579,-1.520157,-1.722422,124.0,-24.479974,-0.729079,0.645161
2022-04-14,129.05,-4.76,135.85,135.89,129.05,44370000,138.777143,141.292857,133.104405,137.563542,-4.459137,-2.499817,-1.959320,236.0,-30.954427,-0.408968,0.796610
2022-04-15,130.88,1.42,128.55,131.72,125.12,46770000,137.245714,141.712857,131.992202,135.892656,-3.900454,-2.966696,-0.933758,135.0,-27.803446,-0.491408,0.755556
2022-04-18,123.85,-5.37,131.41,132.68,123.70,58500000,133.867143,141.361429,127.921101,132.881992,-4.960891,-3.631428,-1.329463,265.0,-44.141036,-0.465137,0.769811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-15,307.99,0.29,307.47,308.39,306.64,20750000,307.018571,303.863571,307.454372,306.450269,1.004103,1.287268,-0.283165,66.0,-10.840833,-0.479573,0.757576
2024-04-16,308.29,0.10,308.00,308.60,307.00,17140000,307.331429,304.805714,307.872186,306.910202,0.961984,1.178840,-0.216856,82.0,-9.796181,-0.587787,0.707317
2024-04-17,306.59,-0.55,308.65,309.72,305.91,28230000,307.165714,305.347857,307.231093,306.830151,0.400942,0.919541,-0.518599,83.0,-8.397385,-0.260283,0.867470
2024-04-18,307.99,0.46,306.01,308.29,305.12,20390000,307.341429,306.010000,307.610547,307.120114,0.490433,0.776505,-0.286072,76.0,-2.032087,0.000000,1.000000


In [None]:
DATA[df.index[-1]:]

Unnamed: 0_level_0,close,return,open,max,min,volume,MA_7,MA_14,EMA_3,EMA_7,MACD,signal,histogram,comments,sentiment_sum,sentiment_index,sdi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-04-19,307.38,-0.2,308.0,308.36,306.8,14580000,307.47,306.506429,307.495273,307.185085,0.310188,0.621066,-0.310878,75.0,-7.923233,-0.23484,0.88
2024-04-22,314.99,2.48,308.0,315.0,307.38,69010000,308.618571,307.55,311.242637,309.136314,2.106323,1.116151,0.990171,279.0,-41.944102,-0.367056,0.817204
2024-04-23,307.39,-2.41,315.39,315.79,306.26,92860000,308.66,307.597857,309.316318,308.699735,0.616583,0.949629,-0.333046,483.0,-54.313362,-0.294902,0.853002
2024-04-24,307.94,0.18,307.5,309.79,306.65,26560000,308.652857,307.835714,308.628159,308.509802,0.118358,0.672538,-0.554181,196.0,-30.871346,-0.516216,0.744898
2024-04-25,308.41,0.15,307.95,309.07,307.45,15950000,308.67,308.000714,308.51908,308.484851,0.034228,0.459768,-0.42554,123.0,-17.048091,-0.438913,0.780488
2024-04-26,309.0,0.19,308.5,309.9,308.0,21150000,309.014286,308.09,308.75954,308.613638,0.145901,0.355146,-0.209245,132.0,-3.612648,-0.119545,0.939394
2024-04-27,308.98,-0.01,309.25,309.99,308.7,13110000,309.155714,308.248571,308.86977,308.705229,0.164541,0.291611,-0.12707,47.0,-0.750558,-0.122602,0.93617
2024-04-29,308.97,0.0,309.14,309.6,307.65,10030000,309.382857,308.426429,308.919885,308.771422,0.148463,0.243895,-0.095432,26.0,-2.879083,0.0,1.0
2024-04-30,308.24,-0.24,309.19,309.49,308.03,5980000,308.418571,308.518571,308.579942,308.638566,-0.058624,0.143056,-0.201679,43.0,-6.1901,-0.313658,0.837209
2024-05-02,307.37,-0.28,308.7,309.13,306.8,16260000,308.415714,308.537857,307.974971,308.321425,-0.346453,-0.020114,-0.326339,86.0,-24.809795,-0.710242,0.651163


In [None]:
DATA

Unnamed: 0_level_0,close,return,open,max,min,volume,MA_7,MA_14,EMA_3,EMA_7,MACD,signal,histogram,comments,sentiment_sum,sentiment_index,sdi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-11-20,239.42,-0.35,239.58,240.77,236.57,48800000,243.022857,232.672143,240.757507,239.958005,0.799503,3.350598,-2.551095,516.0,-58.052157,-0.343095,0.829457
2020-11-23,239.70,0.12,240.45,243.87,238.52,74830000,242.682857,235.186429,240.228754,239.893504,0.335250,2.345482,-2.010232,417.0,-59.901303,-0.451427,0.776978
2020-11-24,247.33,3.18,241.65,247.85,239.57,84520000,243.302857,237.910000,243.779377,241.752628,2.026749,2.239238,-0.212488,648.0,-105.930981,-0.509185,0.750000
2020-11-25,251.91,1.85,248.00,252.20,246.55,133400000,243.862857,240.425000,247.844688,244.291971,3.552718,2.677064,0.875653,905.0,-114.127359,-0.336094,0.833149
2020-11-26,250.10,-0.72,252.30,252.88,247.44,79920000,244.491429,242.753571,248.972344,245.743978,3.228366,2.860832,0.367535,872.0,-84.338924,-0.290352,0.855505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-28,318.22,0.36,317.50,320.90,315.89,30190000,320.642857,319.138571,318.852218,319.610953,-0.758735,0.324624,-1.083359,131.0,-16.074570,-0.196115,0.900763
2024-05-29,320.38,0.68,318.25,320.40,315.94,22990000,320.542857,319.793571,319.616109,319.803215,-0.187106,0.154047,-0.341153,110.0,-30.876960,-0.830348,0.600000
2024-05-30,316.63,-1.17,320.91,321.54,316.00,27010000,319.970000,320.017857,318.123055,319.009911,-0.886856,-0.192921,-0.693936,151.0,-25.633767,-0.465757,0.768212
2024-05-31,313.11,-1.11,316.00,318.44,309.84,49210000,318.567143,319.893571,315.616527,317.534933,-1.918406,-0.768082,-1.150324,274.0,-38.080781,-0.321584,0.839416


https://github.com/jeffheaton/app_deep_learning/blob/main/t81_558_class_10_3_transformer_timeseries.ipynb

In [None]:
try:
    import google.colab
    COLAB = True
    print("Note: using Google CoLab")
except:
    print("Note: not using Google CoLab")
    COLAB = False

# Make use of a GPU or MPS (Apple) if one is available.  (see module 3.2)
import torch
has_mps = torch.backends.mps.is_built()
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Note: using Google CoLab
Using device: cuda


In [None]:
#DATAFRAME_LOG.sort_values(by='accuracy', ascending=False)

In [None]:
#DATAFRAME_LOG.loc[530]

In [None]:
#===============<HYPERPARAMETERS>===============
PREDICTORS = df.columns.tolist()
num_features = df.shape[1]
WINDOW = 30
ONE_OUTPUT = True
N_HEAD = 16
N_LAYERS = 3
D_MODEL = 2048
DROPOUT = 0.3
batch_size = 32

In [None]:
if ONE_OUTPUT:
  # Data Preprocessing
  #start_id = max(df[df['obs_num'] == 0].index.tolist()) + 1
  #df = df[start_id:].copy()
  df['close'] = df['close'].astype(float)
  df_train = df[:int(len(df)*0.85)]
  df_test = df[int(len(df)*0.85):]

  print(df.shape)
  print(df_train.shape)
  data_train = df_train.to_numpy().reshape(-1, num_features)
  data_test = df_test.to_numpy().reshape(-1, num_features)
  target_train = data_train[:,0].reshape(-1, 1)
  target_test = data_test[:,0].reshape(-1, 1)

  scaler_X = MinMaxScaler()
  scaler_Y = MinMaxScaler()
  data_train_scaled = scaler_X.fit_transform(data_train)
  data_test_scaled = scaler_X.transform(data_test)
  target_train_scaled = scaler_Y.fit_transform(target_train)
  target_test_scaled = scaler_Y.transform(target_test)

  # Sequence Data Preparation
  SEQUENCE_SIZE = WINDOW

  def to_sequences(seq_size, obs_x, obs_y):
      x = []
      y = []
      for i in range(len(obs_x) - seq_size):
          window = obs_x[i:(i + seq_size), :]
          after_window = obs_y[i + seq_size]
          x.append(window)
          y.append(after_window)
      return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, num_features), torch.tensor(y, dtype=torch.float32).view(-1, 1)

  x_train, y_train = to_sequences(SEQUENCE_SIZE, data_train_scaled, target_train_scaled)
  x_test, y_test = to_sequences(SEQUENCE_SIZE, data_test_scaled, target_test_scaled)

  # Setup data loaders for batch
  train_dataset = TensorDataset(x_train, y_train)
  train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False) # ATTENTION! BATCH SIZE IS VERY IMPORTANT FOR VALIDATION OF ALL RESEARCH

  test_dataset = TensorDataset(x_test, y_test)
  test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False) # ATTENTION! BATCH SIZE


else:
    # Data Preprocessing
  #start_id = max(df[df['obs_num'] == 0].index.tolist()) + 1
  #df = df[start_id:].copy()
  df['close'] = df['close'].astype(float)
  df_train = df[:int(len(df)*0.85)]
  df_test = df[int(len(df)*0.85):]
  print(df.shape)
  print(df_train.shape)
  spots_train = df_train.to_numpy().reshape(-1, num_features)
  spots_test = df_test.to_numpy().reshape(-1, num_features)
  print(spots_train.shape)
  scaler = StandardScaler()
  spots_train = scaler.fit_transform(spots_train)
  spots_test = scaler.transform(spots_test)

  # Sequence Data Preparation
  SEQUENCE_SIZE = WINDOW

  def to_sequences(seq_size, obs):
      x = []
      y = []
      for i in range(len(obs) - seq_size):
          window = obs[i:(i + seq_size), :]
          after_window = obs[i + seq_size, :]
          x.append(window)
          y.append(after_window)
      return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, num_features), torch.tensor(y, dtype=torch.float32).view(-1, num_features)

  x_train, y_train = to_sequences(SEQUENCE_SIZE, spots_train)
  x_test, y_test = to_sequences(SEQUENCE_SIZE,spots_test)

  # Setup data loaders for batch
  train_dataset = TensorDataset(x_train, y_train)
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

  test_dataset = TensorDataset(x_test, y_test)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

(513, 17)
(436, 17)


  return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, num_features), torch.tensor(y, dtype=torch.float32).view(-1, 1)


In [None]:
class DirectionalLoss(nn.Module): # for batched data
    def __init__(self, penalty=25.0):
        super().__init__()
        self.penalty = penalty

    def forward(self, y_pred, y_true):
        abs_diff = torch.abs(y_pred - y_true)**2

        # вычисляем направление изменения
        y_true_diff = y_true[1:] - y_true[:-1]
        y_pred_diff = y_pred[1:] - y_pred[:-1]
        direction_diff = (y_true_diff * y_pred_diff < 0).float()

        # вычисляем функцию потерь с учетом штрафа
        loss = torch.mean(abs_diff) + self.penalty * torch.mean(direction_diff**(1.7))

        return loss

class DirectionalLossBinary(nn.Module): # for single value binary
    def __init__(self, penalty=10.0):
        super().__init__()
        self.penalty = penalty

    def forward(self, y_pred, y_true):
        se = (y_pred - y_true)**2
        direction_diff = (y_true * y_pred < 0).float()

        loss = torch.mean(se) + self.penalty * torch.mean(direction_diff)

        return loss

# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

# Model definition using Transformer
class TransformerModel(nn.Module):
    def __init__(self, input_dim=num_features, d_model=D_MODEL, nhead=N_HEAD, num_layers=N_LAYERS, dropout=DROPOUT, batch_first=True):
        super(TransformerModel, self).__init__()

        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, 1) if ONE_OUTPUT else nn.Linear(d_model, num_features)

    def forward(self, x):
        x = self.encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = self.decoder(x[:, -1, :])
        return x

In [None]:
model = TransformerModel().to(device)


enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.self_attn.batch_first was not True(use batch_first for better inference performance)



In [None]:
#model.load_state_dict(torch.load('sber_01_01_2016_by_01_01_2019.pth', map_location=torch.device('cuda')))
model.load_state_dict(torch.load('/content/drive/MyDrive/models/h16_d2048_complete_2024-04-20.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

best: (accuracy, precision, recall): 0.5925925925925926 0.6585365853658537 0.6304347826086957

In [None]:
best_ac, best_prc, best_rec = 0, 0, 0

while True:
#for _ in range(1):
  # Train the model
  criterion = nn.MSELoss()

  criterion = DirectionalLoss()

  #criterion = DirectionalLossBinary()

  optimizer = torch.optim.Adam(model.parameters(), lr=0.00001) # 0.00001 is optimal for more than 1 layer in encoder
  scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=3, verbose=True)

  epochs = 100
  early_stop_count = 0
  min_val_loss = float('inf')

  for epoch in range(epochs):
      model.train()
      for batch in train_loader:
          x_batch, y_batch = batch
          x_batch, y_batch = x_batch.to(device), y_batch.to(device)

          optimizer.zero_grad()
          outputs = model(x_batch)
          loss = criterion(outputs, y_batch)
          loss.backward()
          optimizer.step()

      # Validation
      model.eval()
      val_losses = []
      predictions = []
      with torch.no_grad():
          for batch in test_loader:
              x_batch, y_batch = batch
              x_batch, y_batch = x_batch.to(device), y_batch.to(device)
              outputs = model(x_batch)
              if len(x_batch) > 1:
                  predictions.extend(outputs.squeeze().tolist())
                  #loss = criterion(outputs, y_batch)
                  #val_losses.append(loss.item())
              else:
                  predictions.append(outputs.item())
      loss = criterion(outputs, y_batch)
      val_losses.append(loss.item())
      val_loss = np.mean(val_losses)
      scheduler.step(val_loss)

      print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {val_loss:.4f}")
      preds = scaler_Y.inverse_transform(np.array(predictions).reshape(-1, 1))
      actual = scaler_Y.inverse_transform(y_test.numpy().reshape(-1, 1))

      rmse = np.sqrt(np.mean((preds[:,0] - actual[:,0])**2))
      print(f"Score (RMSE): {rmse:.4f}")
      valid = df[['close', 'return']][len(df) - len(preds[:,0]):]
      valid['preds'] = preds[:,0]
      valid["target"] = valid["close"].shift(-1)
      valid["target"] = (valid["target"] > valid["close"]).astype(int)
      valid['tomorrow'] = valid['preds'].shift(-1)
      valid["tomorrow"] = (valid["tomorrow"] > valid["preds"]).astype(int)
      valid["naive"] = (valid["return"] < 0).astype(int)
      print("precision:",precision_score(valid["target"], valid["tomorrow"]))
      print("recall:", recall_score(valid["target"], valid["tomorrow"]))
      print("accuracy:", accuracy_score(valid["target"], valid["tomorrow"]))
      print('='*50)
      if accuracy_score(valid["target"], valid["tomorrow"]) >= 0.60 and precision_score(valid["target"], valid["tomorrow"]) >= 0.60 and recall_score(valid["target"], valid["tomorrow"]) >= 0.60 and rmse < 2:
        break

      if val_loss < min_val_loss:
          min_val_loss = val_loss
          early_stop_count = 0
      else:
          early_stop_count += 1

      if early_stop_count >= 10:
          #print("Early stopping!")
          break



  # Evaluation
  model.eval()
  predictions = []
  with torch.no_grad():
      for batch in test_loader:

          x_batch, y_batch = batch
          x_batch = x_batch.to(device)
          outputs = model(x_batch)
          if len(x_batch) > 1:
            predictions.extend(outputs.squeeze().tolist())
          else:
            predictions.append(outputs.item())

  preds = scaler_Y.inverse_transform(np.array(predictions).reshape(-1, 1)) if ONE_OUTPUT else scaler.inverse_transform(np.array(predictions).reshape(-1, num_features))
  actual = scaler_Y.inverse_transform(y_test.numpy().reshape(-1, 1)) if ONE_OUTPUT else scaler.inverse_transform(y_test.numpy().reshape(-1, num_features))

  rmse = np.sqrt(np.mean((preds[:,0] - actual[:,0])**2))
  print(f"Score (RMSE): {rmse:.4f}")
  valid = df[['close', 'return']][len(df) - len(preds[:,0]):]
  valid['preds'] = preds[:,0]
  valid["target"] = valid["close"].shift(-1)
  valid["target"] = (valid["target"] > valid["close"]).astype(int)
  valid['tomorrow'] = valid['preds'].shift(-1)
  valid["tomorrow"] = (valid["tomorrow"] > valid["preds"]).astype(int)
  valid["naive"] = (valid["return"] < 0).astype(int)



  print("precision:",precision_score(valid["target"], valid["tomorrow"]))
  print("recall:", recall_score(valid["target"], valid["tomorrow"]))
  print("accuracy:", accuracy_score(valid["target"], valid["tomorrow"]))
  '''print(valid['target'].value_counts())
  print(valid['tomorrow'].value_counts())'''
  print('='*50)
  best_ac = max(best_ac, accuracy_score(valid["target"], valid["tomorrow"]))
  best_prc = max(best_prc, precision_score(valid["target"], valid["tomorrow"]))
  best_rec = max(best_rec, recall_score(valid["target"], valid["tomorrow"]))
  print('best: (accuracy, precision, recall):', best_ac, best_prc, best_rec)
  if accuracy_score(valid["target"], valid["tomorrow"]) >= 0.60 and precision_score(valid["target"], valid["tomorrow"]) >= 0.60 and recall_score(valid["target"], valid["tomorrow"]) >= 0.60 and rmse < 2:
    break


The verbose parameter is deprecated. Please use get_last_lr() to access the learning rate.



Epoch 1/100, Validation Loss: nan
Score (RMSE): 9.7362
precision: 0.7037037037037037
recall: 0.6785714285714286
accuracy: 0.6382978723404256
Epoch 2/100, Validation Loss: nan
Score (RMSE): 46.5505
precision: 0.7037037037037037
recall: 0.6785714285714286
accuracy: 0.6382978723404256
Epoch 3/100, Validation Loss: nan
Score (RMSE): 35.5098
precision: 0.7037037037037037
recall: 0.6785714285714286
accuracy: 0.6382978723404256
Epoch 4/100, Validation Loss: nan
Score (RMSE): 31.8764
precision: 0.7037037037037037
recall: 0.6785714285714286
accuracy: 0.6382978723404256
Epoch 5/100, Validation Loss: nan
Score (RMSE): 35.8806
precision: 0.7037037037037037
recall: 0.6785714285714286
accuracy: 0.6382978723404256
Epoch 6/100, Validation Loss: nan
Score (RMSE): 32.5196
precision: 0.7037037037037037
recall: 0.6785714285714286
accuracy: 0.6382978723404256
Epoch 7/100, Validation Loss: nan
Score (RMSE): 37.5950
precision: 0.72
recall: 0.6428571428571429
accuracy: 0.6382978723404256
Epoch 8/100, Validati


The verbose parameter is deprecated. Please use get_last_lr() to access the learning rate.



Epoch 1/100, Validation Loss: nan
Score (RMSE): 57.5226
precision: 0.6363636363636364
recall: 0.75
accuracy: 0.5957446808510638
Epoch 2/100, Validation Loss: nan
Score (RMSE): 51.7948
precision: 0.5
recall: 0.5714285714285714
accuracy: 0.40425531914893614
Epoch 3/100, Validation Loss: nan
Score (RMSE): 25.8112
precision: 0.5151515151515151
recall: 0.6071428571428571
accuracy: 0.425531914893617
Epoch 4/100, Validation Loss: nan
Score (RMSE): 51.9358
precision: 0.5757575757575758
recall: 0.6785714285714286
accuracy: 0.5106382978723404
Epoch 5/100, Validation Loss: nan
Score (RMSE): 7.5431
precision: 0.65625
recall: 0.75
accuracy: 0.6170212765957447
Epoch 6/100, Validation Loss: nan
Score (RMSE): 3.6631
precision: 0.6666666666666666
recall: 0.7142857142857143
accuracy: 0.6170212765957447
Epoch 7/100, Validation Loss: nan
Score (RMSE): 23.6029
precision: 0.6896551724137931
recall: 0.7142857142857143
accuracy: 0.6382978723404256


KeyboardInterrupt: 

In [None]:
print("precision:",precision_score(valid["target"], valid["naive"]))
print("recall:", recall_score(valid["target"], valid["naive"]))
print("accuracy:", accuracy_score(valid["target"], valid["naive"]))
print(valid['target'].value_counts())
print(valid['naive'].value_counts())

precision: 0.7368421052631579
recall: 0.5
accuracy: 0.5957446808510638
target
1    28
0    19
Name: count, dtype: int64
naive
0    28
1    19
Name: count, dtype: int64


In [None]:
print("precision:",precision_score(valid["target"][:], valid["tomorrow"][:]))
print("recall:", recall_score(valid["target"][:], valid["tomorrow"][:]))
print("accuracy:", accuracy_score(valid["target"][:], valid["tomorrow"][:]))
print(valid['target'][:].value_counts())
print(valid['tomorrow'][:].value_counts())

precision: 0.6896551724137931
recall: 0.7142857142857143
accuracy: 0.6382978723404256
target
1    28
0    19
Name: count, dtype: int64
tomorrow
1    29
0    18
Name: count, dtype: int64


In [None]:
#torch.save(model.state_dict(), '/content/drive/MyDrive/models/h16_d2048_complete_2024-04-20.pth')

In [None]:
#valid['preds'] -= 37

In [None]:
model.eval()
val_losses = []
predictions = []
with torch.no_grad():
    for batch in test_loader:
        x_batch, y_batch = batch
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        if len(x_batch) > 1:
            predictions.extend(outputs.squeeze().tolist())
            #loss = criterion(outputs, y_batch)
            #val_losses.append(loss.item())
        else:
            predictions.append(outputs.item())
loss = criterion(outputs, y_batch)
val_losses.append(loss.item())
val_loss = np.mean(val_losses)
scheduler.step(val_loss)

print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {val_loss:.4f}")
preds = scaler_Y.inverse_transform(np.array(predictions).reshape(-1, 1))
actual = scaler_Y.inverse_transform(y_test.numpy().reshape(-1, 1))

rmse = np.sqrt(np.mean((preds[:,0] - actual[:,0])**2))
print(f"Score (RMSE): {rmse:.4f}")
valid = df[['close', 'return']][len(df) - len(preds[:,0]):]
valid['preds'] = preds[:,0]
valid["target"] = valid["close"].shift(-1)
valid["target"] = (valid["target"] > valid["close"]).astype(int)
valid['tomorrow'] = valid['preds'].shift(-1)
valid["tomorrow"] = (valid["tomorrow"] > valid["preds"]).astype(int)
valid["naive"] = (valid["return"] < 0).astype(int)

Epoch 8/100, Validation Loss: nan
Score (RMSE): 38.5495


In [None]:
px.line(valid[['close','preds']])

In [None]:
valid.shape

(47, 6)

In [None]:
'''['datestart', 'dateend', 'predictors',
 'oneoutput', 'n_features', 'd_model',
 'n_head', 'n_layers', 'dropout', 'RMSE',
 'precision', 'recall', 'accuracy', 'balance']'''
'''DATAFRAME_LOG.loc[len(DATAFRAME_LOG)] = [DATESTART, DATEEND, PREDICTORS, ONE_OUTPUT,
                                         num_features, D_MODEL, N_HEAD,
                                         N_LAYERS, DROPOUT, rmse,
                                         precision_score(valid["target"], valid["tomorrow"]),
                                         recall_score(valid["target"], valid["tomorrow"]),
                                         accuracy_score(valid["target"], valid["tomorrow"]),
                                         valid['target'].value_counts().values[0]/(valid['target'].value_counts().values[0]+valid['tomorrow'].value_counts().values[1])]'''

'DATAFRAME_LOG.loc[len(DATAFRAME_LOG)] = [DATESTART, DATEEND, PREDICTORS, ONE_OUTPUT,\n                                         num_features, D_MODEL, N_HEAD,\n                                         N_LAYERS, DROPOUT, rmse,\n                                         precision_score(valid["target"], valid["tomorrow"]),\n                                         recall_score(valid["target"], valid["tomorrow"]),\n                                         accuracy_score(valid["target"], valid["tomorrow"]),\n                                         valid[\'target\'].value_counts().values[0]/(valid[\'target\'].value_counts().values[0]+valid[\'tomorrow\'].value_counts().values[1])]'

In [None]:
model.eval()
x_set = df[-(valid.shape[0] + 30):]
x_set = scaler_X.transform(x_set.to_numpy().reshape(-1, num_features))

def to_sequences(seq_size, obs):
      x = []
      for i in range(len(obs) - seq_size + 1): #add one day for predict (+1)
          window = obs[i:(i + seq_size), :]
          x.append(window)
      return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, num_features)

x_set = to_sequences(30, x_set)
x_set = DataLoader(x_set, batch_size=1, shuffle=False)

In [None]:
predict = []
with torch.no_grad():
  for batch in x_set:
    batch = batch.to(device)
    outputs = model(batch)
    if len(batch) > 1:
      predict.extend(outputs.squeeze().tolist())
    else:
      predict.append(outputs.item())

predict = scaler_Y.inverse_transform(np.array(predict).reshape(-1, 1))

In [None]:
validation = df[['close']][-(valid.shape[0]):]
validation['predict'] = predict[:-1] - 30
#validation['predict'] = validation['predict'].shift(1)
validation.loc[validation.index[-1] + timedelta(days=1)] = [np.nan, predict[-1][0] - 30]
px.line(validation[['close', 'predict']])


In [None]:
validation["target"] = validation["close"].shift(-1)
validation["target"] = (validation["target"] > validation["close"]).astype(int)
validation['tomorrow'] = validation['predict'].shift(-1)
validation["tomorrow"] = (validation["tomorrow"] > validation["predict"]).astype(int)
print("precision:",precision_score(validation["target"][:], validation["tomorrow"][:]))
print("recall:", recall_score(validation["target"][:], validation["tomorrow"][:]))
print("accuracy:", accuracy_score(validation["target"][:], validation["tomorrow"][:]))
print(validation['target'][:].value_counts())
print(validation['tomorrow'][:].value_counts())

precision: 0.6896551724137931
recall: 0.7142857142857143
accuracy: 0.6458333333333334
target
1    28
0    20
Name: count, dtype: int64
tomorrow
1    29
0    19
Name: count, dtype: int64


In [None]:
model.eval()
x_set = DATA[df.index[-31]:]
x_set = scaler_X.transform(x_set.to_numpy().reshape(-1, num_features))

def to_sequences(seq_size, obs):
      x = []
      for i in range(len(obs) - seq_size): #add one day for predict (+1)
          window = obs[i:(i + seq_size), :]
          x.append(window)
      return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, num_features)

x_set = to_sequences(30, x_set)
x_set = DataLoader(x_set, batch_size=1, shuffle=False)

In [None]:
predict = []
with torch.no_grad():
  for batch in x_set:
    batch = batch.to(device)
    outputs = model(batch)
    if len(batch) > 1:
      predict.extend(outputs.squeeze().tolist())
    else:
      predict.append(outputs.item())

predict = scaler_Y.inverse_transform(np.array(predict).reshape(-1, 1))

In [None]:
validation = DATA[['close']][df.index[-1]:]
validation['predict'] = predict[:] - 30
#validation['predict'] = validation['predict'].shift(1)
#validation.loc[validation.index[-1] + timedelta(days=1)] = [np.nan, predict[-1][0]]
px.line(validation[['close', 'predict']][:7])

In [None]:
validation["target"] = validation["close"].shift(-1)
validation["target"] = (validation["target"] > validation["close"]).astype(int)
validation['tomorrow'] = validation['predict'].shift(-1)
validation["tomorrow"] = (validation["tomorrow"] > validation["predict"]).astype(int)
validation["naive"] = (validation["target"].shift(1) > 0).astype(int)
print("precision:",precision_score(validation["target"][:7], validation["tomorrow"][:7]))
print("recall:", recall_score(validation["target"][:7], validation["tomorrow"][:7]))
print("accuracy:", accuracy_score(validation["target"][:7], validation["tomorrow"][:7]))
print(validation['target'][:7].value_counts())
print(validation['tomorrow'][:7].value_counts())
print("precision:",precision_score(validation["target"][:7], validation["naive"][:7]))
print("recall:", recall_score(validation["target"][:7], validation["naive"][:7]))
print("accuracy:", accuracy_score(validation["target"][:7], validation["naive"][:7]))
print(validation['target'][:7].value_counts())
print(validation['naive'][:7].value_counts())

precision: 0.6
recall: 0.75
accuracy: 0.5714285714285714
target
1    4
0    3
Name: count, dtype: int64
tomorrow
1    5
0    2
Name: count, dtype: int64
precision: 0.5
recall: 0.5
accuracy: 0.42857142857142855
target
1    4
0    3
Name: count, dtype: int64
naive
1    4
0    3
Name: count, dtype: int64


In [None]:
px.line(validation[["target", "tomorrow"]])

In [None]:
px.line(validation[["target", "naive"]])