In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import yfinance as yf
import talib
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
import torch.optim as optim
import os
from sklearn.model_selection import TimeSeriesSplit

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss, mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as sch
import seaborn as sns

import optuna
from optuna.samplers import TPESampler
from optuna.trial import TrialState
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau 
import shap
import plotly.graph_objs as go
import plotly.offline as pyo
from tqdm.auto import tqdm
from sklearn.utils.class_weight import compute_class_weight
import torch.nn.functional as F
import math


In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("gpu")
else:
    device = torch.device('cpu')
print(torch.__version__)
print('CUDA available:', torch.cuda.is_available())
print('CUDA version:', torch.version.cuda)
print('cuDNN version:', torch.backends.cudnn.version())

gpu
2.1.2+cu121
CUDA available: True
CUDA version: 12.1
cuDNN version: 8902


In [3]:
start_date = '2018-01-01'
end_date = '2024-01-24'

stock_data = yf.download("AAPL", start=start_date, end=end_date)[["Adj Close", "High", "Low", "Volume"]]

stock_data = stock_data.reset_index()

stock_data = stock_data[['Date', 'Adj Close', "High", "Low", "Volume"]]

stock_data = stock_data.sort_values(by="Date")
stock_data.head(45)

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,Date,Adj Close,High,Low,Volume
0,2018-01-02,40.670982,43.075001,42.314999,102223600
1,2018-01-03,40.663895,43.637501,42.990002,118071600
2,2018-01-04,40.852772,43.3675,43.02,89738400
3,2018-01-05,41.317894,43.842499,43.262501,94640000
4,2018-01-08,41.16444,43.9025,43.482498,82271200
5,2018-01-09,41.159706,43.764999,43.352501,86336000
6,2018-01-10,41.150269,43.575001,43.25,95839600
7,2018-01-11,41.384018,43.872501,43.622501,74670800
8,2018-01-12,41.811348,44.34,43.912498,101672400
9,2018-01-16,41.598869,44.8475,44.035,118263600


In [4]:
time_step = 110

In [5]:
test_index = int((len(stock_data)-time_step)*0.8+time_step+time_step)

In [6]:
date = stock_data["Date"].iloc[time_step:].dt.strftime('%Y-%m-%d')
date_test = stock_data["Date"].iloc[test_index:].reset_index()
date_test.drop(columns=["index"], inplace=True)
date_test

Unnamed: 0,Date
0,2023-05-16
1,2023-05-17
2,2023-05-18
3,2023-05-19
4,2023-05-22
...,...
168,2024-01-17
169,2024-01-18
170,2024-01-19
171,2024-01-22


In [7]:
def add_technical_indicators(data, timeperiod=time_step):

    # MACD
    macd, macdsignal, macdhist = talib.MACD(data["Adj Close"], fastperiod=12, slowperiod=26, signalperiod=9)

    rsi = talib.RSI(data["Adj Close"], timeperiod=14)

    # CMO
    cmo = talib.CMO(data["Adj Close"], timeperiod=timeperiod)

    # MOM
    mom = talib.MOM(data["Adj Close"], timeperiod=timeperiod)

    # Bollinger Bands
    upperband, middleband, lowerband = talib.BBANDS(data["Adj Close"], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # SMA
    sma_s = talib.SMA(data["Adj Close"], timeperiod=20)
    sma_l = talib.SMA(data["Adj Close"], timeperiod=50)

    # Calculate Exponential Moving Average (EMA)
    ema = talib.EMA(data["Adj Close"], timeperiod=timeperiod)

    # Calculate Stochastic Oscillator
    slowk, slowd = talib.STOCH(data['High'], data['Low'], data['Adj Close'], fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)

    # Calculate Average True Range (ATR)
    atr = talib.ATR(data['High'], data['Low'], data['Adj Close'], timeperiod=14)

    # Calculate On-Balance Volume (OBV)
    obv = talib.OBV(data['Adj Close'], data['Volume'])

    # Combine all indicators into a DataFrame
    indicators = pd.DataFrame({
        'MACD': macd,
        'MACD_signal': macdsignal,
        'RSI': rsi,
        'CMO': cmo,
        'MOM': mom,
        'Upper_BB': upperband,
        'Middle_BB': middleband,
        'Lower_BB': lowerband,
        'SMA_SHORT': sma_s,
        'SMA_LONG': sma_l,
        'EMA': ema,
        'SLOWK': slowk,
        'SLOWD': slowd,
        'ATR': atr,
        'OBV': obv,

    })
    return indicators

In [8]:
indicators = add_technical_indicators(stock_data)
indicators.head(45)

Unnamed: 0,MACD,MACD_signal,RSI,CMO,MOM,Upper_BB,Middle_BB,Lower_BB,SMA_SHORT,SMA_LONG,EMA,SLOWK,SLOWD,ATR,OBV
0,,,,,,,,,,,,,,,102223600.0
1,,,,,,,,,,,,,,,-15848000.0
2,,,,,,,,,,,,,,,73890400.0
3,,,,,,,,,,,,,,,168530400.0
4,,,,,,,,,,,,,,,86259200.0
5,,,,,,,,,,,,,,,-76800.0
6,,,,,,,,,,,,,,,-95916400.0
7,,,,,,,,,,,,,,,-21245600.0
8,,,,,,,,,,,,,,,80426800.0
9,,,,,,,,,,,,,,,-37836800.0


In [9]:
indicators_with_price = pd.concat([indicators, stock_data["Adj Close"]], axis=1, join='inner')
indicators_with_price.head(45)

Unnamed: 0,MACD,MACD_signal,RSI,CMO,MOM,Upper_BB,Middle_BB,Lower_BB,SMA_SHORT,SMA_LONG,EMA,SLOWK,SLOWD,ATR,OBV,Adj Close
0,,,,,,,,,,,,,,,102223600.0,40.670982
1,,,,,,,,,,,,,,,-15848000.0,40.663895
2,,,,,,,,,,,,,,,73890400.0,40.852772
3,,,,,,,,,,,,,,,168530400.0,41.317894
4,,,,,,,,,,,,,,,86259200.0,41.16444
5,,,,,,,,,,,,,,,-76800.0,41.159706
6,,,,,,,,,,,,,,,-95916400.0,41.150269
7,,,,,,,,,,,,,,,-21245600.0,41.384018
8,,,,,,,,,,,,,,,80426800.0,41.811348
9,,,,,,,,,,,,,,,-37836800.0,41.598869


In [10]:
indicators_with_price = indicators_with_price.dropna()
indicators_with_price = indicators_with_price.reset_index(drop=True)
indicators_with_price

Unnamed: 0,MACD,MACD_signal,RSI,CMO,MOM,Upper_BB,Middle_BB,Lower_BB,SMA_SHORT,SMA_LONG,EMA,SLOWK,SLOWD,ATR,OBV,Adj Close
0,0.836998,0.892122,62.550460,9.446275,4.837746,46.181777,45.025276,43.868775,45.025276,42.716243,41.669632,-33.985206,-25.190096,2.597924,-1.272928e+08,45.508728
1,0.807660,0.875230,64.652604,9.889990,5.094723,46.267290,45.074420,43.881550,45.074420,42.841136,41.743307,-38.421343,-32.038679,2.601199,-5.964840e+07,45.758617
2,0.745476,0.849279,59.262090,9.078365,4.529835,46.279267,45.125109,43.970952,45.125109,42.950400,41.808880,-44.539909,-38.982153,2.591212,-1.462020e+08,45.382607
3,0.690158,0.817455,59.492236,9.121226,4.088505,46.305456,45.156285,44.007113,45.156285,43.044874,41.873700,-48.497896,-43.819716,2.585404,-5.976160e+07,45.406399
4,0.601743,0.774312,53.152417,8.112894,3.775513,46.292657,45.178297,44.063937,45.178297,43.124377,41.928948,-64.119811,-52.385872,2.553132,-3.066384e+08,44.939953
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1409,-1.915982,-1.196219,36.736031,2.838111,4.958084,198.877256,188.556395,178.235534,188.556395,189.050803,183.653694,24.718011,29.233264,3.065765,3.547580e+09,182.447189
1410,-1.553414,-1.267658,51.604616,5.839546,11.119675,197.989955,188.193858,178.397761,188.193858,189.294740,183.739026,31.568411,28.661231,3.324840,3.625586e+09,188.389618
1411,-1.018216,-1.217770,56.967984,7.263183,13.986084,197.046090,187.925201,178.804311,187.925201,189.545736,183.875546,50.256116,35.514179,3.341664,3.694327e+09,191.315872
1412,-0.401665,-1.054549,60.698091,8.374917,14.647400,196.870390,187.878260,178.886130,187.878260,189.791607,184.051534,74.641367,52.155298,3.389697,3.754460e+09,193.642899


In [11]:
# Irrelelvant
indicators_with_price['Prev_Adj_Close'] = indicators_with_price['Adj Close'].shift(1)
indicators_with_price['Return'] = ((indicators_with_price['Adj Close'] - indicators_with_price['Prev_Adj_Close'])/indicators_with_price['Prev_Adj_Close'])*100
indicators_with_price['Signal'] = np.where(indicators_with_price['Return'] > 1, 1,
                                           np.where(indicators_with_price['Return'] < -1, 2, 0))
indicators_with_price


Unnamed: 0,MACD,MACD_signal,RSI,CMO,MOM,Upper_BB,Middle_BB,Lower_BB,SMA_SHORT,SMA_LONG,EMA,SLOWK,SLOWD,ATR,OBV,Adj Close,Prev_Adj_Close,Return,Signal
0,0.836998,0.892122,62.550460,9.446275,4.837746,46.181777,45.025276,43.868775,45.025276,42.716243,41.669632,-33.985206,-25.190096,2.597924,-1.272928e+08,45.508728,,,0
1,0.807660,0.875230,64.652604,9.889990,5.094723,46.267290,45.074420,43.881550,45.074420,42.841136,41.743307,-38.421343,-32.038679,2.601199,-5.964840e+07,45.758617,45.508728,0.549102,0
2,0.745476,0.849279,59.262090,9.078365,4.529835,46.279267,45.125109,43.970952,45.125109,42.950400,41.808880,-44.539909,-38.982153,2.591212,-1.462020e+08,45.382607,45.758617,-0.821727,0
3,0.690158,0.817455,59.492236,9.121226,4.088505,46.305456,45.156285,44.007113,45.156285,43.044874,41.873700,-48.497896,-43.819716,2.585404,-5.976160e+07,45.406399,45.382607,0.052426,0
4,0.601743,0.774312,53.152417,8.112894,3.775513,46.292657,45.178297,44.063937,45.178297,43.124377,41.928948,-64.119811,-52.385872,2.553132,-3.066384e+08,44.939953,45.406399,-1.027269,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1409,-1.915982,-1.196219,36.736031,2.838111,4.958084,198.877256,188.556395,178.235534,188.556395,189.050803,183.653694,24.718011,29.233264,3.065765,3.547580e+09,182.447189,183.395981,-0.517346,0
1410,-1.553414,-1.267658,51.604616,5.839546,11.119675,197.989955,188.193858,178.397761,188.193858,189.294740,183.739026,31.568411,28.661231,3.324840,3.625586e+09,188.389618,182.447189,3.257068,1
1411,-1.018216,-1.217770,56.967984,7.263183,13.986084,197.046090,187.925201,178.804311,187.925201,189.545736,183.875546,50.256116,35.514179,3.341664,3.694327e+09,191.315872,188.389618,1.553299,1
1412,-0.401665,-1.054549,60.698091,8.374917,14.647400,196.870390,187.878260,178.886130,187.878260,189.791607,184.051534,74.641367,52.155298,3.389697,3.754460e+09,193.642899,191.315872,1.216327,1


In [12]:
# Not important
indicators_with_price["Signal"].value_counts()

Signal
0    698
1    403
2    313
Name: count, dtype: int64

In [13]:
indicators_with_price.dropna(inplace=True)
indicators_with_price

Unnamed: 0,MACD,MACD_signal,RSI,CMO,MOM,Upper_BB,Middle_BB,Lower_BB,SMA_SHORT,SMA_LONG,EMA,SLOWK,SLOWD,ATR,OBV,Adj Close,Prev_Adj_Close,Return,Signal
1,0.807660,0.875230,64.652604,9.889990,5.094723,46.267290,45.074420,43.881550,45.074420,42.841136,41.743307,-38.421343,-32.038679,2.601199,-5.964840e+07,45.758617,45.508728,0.549102,0
2,0.745476,0.849279,59.262090,9.078365,4.529835,46.279267,45.125109,43.970952,45.125109,42.950400,41.808880,-44.539909,-38.982153,2.591212,-1.462020e+08,45.382607,45.758617,-0.821727,0
3,0.690158,0.817455,59.492236,9.121226,4.088505,46.305456,45.156285,44.007113,45.156285,43.044874,41.873700,-48.497896,-43.819716,2.585404,-5.976160e+07,45.406399,45.382607,0.052426,0
4,0.601743,0.774312,53.152417,8.112894,3.775513,46.292657,45.178297,44.063937,45.178297,43.124377,41.928948,-64.119811,-52.385872,2.553132,-3.066384e+08,44.939953,45.406399,-1.027269,2
5,0.523718,0.724194,52.843135,8.061502,3.756458,46.261220,45.207211,44.153203,45.207211,43.224360,41.982772,-71.835977,-61.484562,2.539698,-3.805780e+08,44.916164,44.939953,-0.052934,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1409,-1.915982,-1.196219,36.736031,2.838111,4.958084,198.877256,188.556395,178.235534,188.556395,189.050803,183.653694,24.718011,29.233264,3.065765,3.547580e+09,182.447189,183.395981,-0.517346,0
1410,-1.553414,-1.267658,51.604616,5.839546,11.119675,197.989955,188.193858,178.397761,188.193858,189.294740,183.739026,31.568411,28.661231,3.324840,3.625586e+09,188.389618,182.447189,3.257068,1
1411,-1.018216,-1.217770,56.967984,7.263183,13.986084,197.046090,187.925201,178.804311,187.925201,189.545736,183.875546,50.256116,35.514179,3.341664,3.694327e+09,191.315872,188.389618,1.553299,1
1412,-0.401665,-1.054549,60.698091,8.374917,14.647400,196.870390,187.878260,178.886130,187.878260,189.791607,184.051534,74.641367,52.155298,3.389697,3.754460e+09,193.642899,191.315872,1.216327,1


In [14]:
indicators_with_price.columns

Index(['MACD', 'MACD_signal', 'RSI', 'CMO', 'MOM', 'Upper_BB', 'Middle_BB',
       'Lower_BB', 'SMA_SHORT', 'SMA_LONG', 'EMA', 'SLOWK', 'SLOWD', 'ATR',
       'OBV', 'Adj Close', 'Prev_Adj_Close', 'Return', 'Signal'],
      dtype='object')

In [15]:
# indicators_with_price = indicators_with_price.drop(columns=['Next_Adj_Close', 'Return'])
# indicators_with_price

In [16]:
indicators_with_price.drop(columns=['Prev_Adj_Close', "Signal"], inplace=True)
indicators_with_price.head(50)

Unnamed: 0,MACD,MACD_signal,RSI,CMO,MOM,Upper_BB,Middle_BB,Lower_BB,SMA_SHORT,SMA_LONG,EMA,SLOWK,SLOWD,ATR,OBV,Adj Close,Return
1,0.80766,0.87523,64.652604,9.88999,5.094723,46.26729,45.07442,43.88155,45.07442,42.841136,41.743307,-38.421343,-32.038679,2.601199,-59648400.0,45.758617,0.549102
2,0.745476,0.849279,59.26209,9.078365,4.529835,46.279267,45.125109,43.970952,45.125109,42.9504,41.80888,-44.539909,-38.982153,2.591212,-146202000.0,45.382607,-0.821727
3,0.690158,0.817455,59.492236,9.121226,4.088505,46.305456,45.156285,44.007113,45.156285,43.044874,41.8737,-48.497896,-43.819716,2.585404,-59761600.0,45.406399,0.052426
4,0.601743,0.774312,53.152417,8.112894,3.775513,46.292657,45.178297,44.063937,45.178297,43.124377,41.928948,-64.119811,-52.385872,2.553132,-306638400.0,44.939953,-1.027269
5,0.523718,0.724194,52.843135,8.061502,3.756458,46.26122,45.207211,44.153203,45.207211,43.22436,41.982772,-71.835977,-61.484562,2.539698,-380578000.0,44.916164,-0.052934
6,0.398717,0.659098,44.361231,6.5026,3.040058,46.303933,45.184127,44.064321,45.184127,43.301909,42.022547,-73.906103,-69.953964,2.477315,-514892000.0,44.190327,-1.615983
7,0.311614,0.589601,46.803272,6.862694,2.999069,46.316059,45.176274,44.036488,45.176274,43.368141,42.06508,-65.869173,-70.537084,2.486769,-432377200.0,44.383087,0.436205
8,0.220078,0.515697,44.125409,6.332071,2.324249,46.360936,45.141767,43.922598,45.141767,43.433263,42.102386,-60.508361,-66.761212,2.502315,-535224800.0,44.135597,-0.557622
9,0.135602,0.439678,42.75737,6.056107,2.408215,46.411451,45.103334,43.795216,45.103334,43.487755,42.136705,-62.78114,-63.052891,2.495143,-644026400.0,44.007084,-0.291178
10,0.015664,0.354875,36.543833,4.660167,1.066715,46.540582,45.027061,43.51354,45.027061,43.526361,42.158614,-62.544866,-61.944789,2.475698,-770678800.0,43.352631,-1.487154


In [17]:
y = indicators_with_price["Return"]
y_2 = indicators_with_price["SMA_SHORT"]
y_3 = indicators_with_price["EMA"]
y_4 = indicators_with_price["Upper_BB"]
y_5 = indicators_with_price["Middle_BB"]
y_6 = indicators_with_price["Lower_BB"]
X = np.array(date)

trace = go.Scatter(x=X, y=y, mode="lines", name="Adj Close")
trace_2 = go.Scatter(x=X, y=y_2, mode="lines", name="SMA")
trace_3 = go.Scatter(x=X, y=y_3, mode="lines", name="EMA")
trace_4 = go.Scatter(x=X, y=y_4, mode="lines", name="Upper_BB")
trace_5 = go.Scatter(x=X, y=y_5, mode="lines", name="Middle_BB")
trace_6 = go.Scatter(x=X, y=y_6, mode="lines", name="Lower_BB")



layout = go.Layout(
    title='Stock Price and Volume',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Adj Close', side='left', rangemode='tozero'),
    yaxis2=dict(title='SMA', side='right', overlaying='y', rangemode='tozero'),
    yaxis3=dict(title='EMA', side='right', overlaying='y', rangemode='tozero'),
    yaxis4=dict(title='Upper_BB', side='right', overlaying='y', rangemode='tozero'),
    yaxis5=dict(title='Middle_BB', side='right', overlaying='y', rangemode='tozero'),
    yaxis6=dict(title='Lower_BB', side='right', overlaying='y', rangemode='tozero'),
    height=600,
)

fig = go.Figure(data=[trace, trace_2, trace_3, trace_4, trace_5, trace_6], layout=layout)

# Show plot
pyo.iplot(fig)

In [18]:
# Custom Dataset
class RollingWindowDataset(Dataset):
    def __init__(self, X, y, window_size, device="gpu"):
        self.X = X.clone().detach().to(torch.float).to(device)
        self.y = y.clone().detach().to(torch.float).to(device)
        self.window_size = window_size

    def __len__(self):
        # Adjust the length to account for window size
        return len(self.X) - self.window_size 

    def __getitem__(self, idx):
        # Ensure idx is within the valid range
        if idx + self.window_size > len(self.X):
            raise IndexError("Index out of bounds")

        X_window = self.X[idx : idx + self.window_size]
        
        y_target = self.y[idx + self.window_size]  

        return X_window.clone().detach().to(torch.float), y_target.clone().detach().to(torch.float)


In [19]:
X = indicators_with_price.iloc[:,:-1]
y = indicators_with_price.iloc[:,-2]

signal_true = indicators_with_price.iloc[:,-1]
y

1        45.758617
2        45.382607
3        45.406399
4        44.939953
5        44.916164
           ...    
1409    182.447189
1410    188.389618
1411    191.315872
1412    193.642899
1413    194.931259
Name: Adj Close, Length: 1413, dtype: float64

In [20]:
train_signal_true = signal_true.iloc[:int(len(X)*0.8)]
test_signal_true = signal_true.iloc[int(len(X)*0.8):]
test_signal_true

1131   -2.537000
1132   -1.378483
1133    1.213275
1134   -0.343483
1135    1.638981
          ...   
1409   -0.517346
1410    3.257068
1411    1.553299
1412    1.216327
1413    0.665328
Name: Return, Length: 283, dtype: float64

In [21]:
correlation_matrix = X.corr()

# Perform hierarchical clustering to find the order of features
linked = sch.linkage(sch.distance.pdist(correlation_matrix), method='ward')
cluster_order = sch.dendrogram(linked, no_plot=True)['leaves']

# Reorder the correlation matrix
correlation_matrix_ordered = correlation_matrix.iloc[cluster_order, cluster_order]

fig = go.Figure(data=go.Heatmap(
                    z=correlation_matrix_ordered,
                    x=correlation_matrix_ordered.columns,
                    y=correlation_matrix_ordered.columns,
                    colorscale='Viridis',
                    text=correlation_matrix_ordered.round(2).values,  
                    texttemplate="%{text}",
                    textfont={"size":9}  
                    ))

# Update the layout
fig.update_layout(
    title='Ordered Correlation Matrix',
    xaxis_title="Variables",
    yaxis_title="Variables",
    xaxis=dict(side='bottom'),
    yaxis=dict(autorange='reversed'),
    width=1000,  
    height=1000,  
)

# Show the figure
pyo.iplot(fig)

In [22]:
X= X.iloc[:, cluster_order]

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

y_test.head(44)

1131    141.934525
1132    139.977982
1133    141.676300
1134    141.189667
1135    143.503738
1136    144.477051
1137    142.232498
1138    135.568283
1139    133.591873
1140    131.466476
1141    131.396942
1142    134.525436
1143    131.327408
1144    130.959961
1145    129.142441
1146    125.179672
1147    128.725311
1148    129.043106
1149    124.216293
1150    125.497498
1151    124.166641
1152    128.735229
1153    129.261627
1154    129.837646
1155    132.578827
1156    132.499390
1157    133.840164
1158    135.012100
1159    134.287094
1160    134.346664
1161    136.928925
1162    140.146835
1163    141.557129
1164    140.891693
1165    142.977371
1166    144.933914
1167    142.023941
1168    143.305099
1169    144.437332
1170    149.790558
1171    153.445404
1172    150.694321
1173    153.594391
1174    150.883041
Name: Adj Close, dtype: float64

In [24]:
X_train_df = pd.DataFrame()
X_test_df = pd.DataFrame()
scaler_dict = {}

X_train_df = X_train
X_test_df = X_test

for column in X_train.columns:

    if column not in ["Adj Close", "Return"]:
        scaler = MinMaxScaler()

        X_train_scaled = scaler.fit_transform(X_train[[column]].values)
        X_train_df[column] = X_train_scaled
            
        X_test_scaled = scaler.transform(X_test[[column]].values)
        X_test_df[column] = X_test_scaled

        scaler_dict[column] = scaler


X_train_df.head(24)

features = X_train_df.columns
X_train_df.head(32)

Unnamed: 0,ATR,OBV,SMA_LONG,EMA,Adj Close,Lower_BB,Upper_BB,Middle_BB,SMA_SHORT,SLOWK,SLOWD,CMO,MOM,RSI,MACD,MACD_signal
1,0.140883,0.28961,0.032255,0.0,45.758617,0.074601,0.053324,0.061531,0.061531,0.279572,0.23828,0.523465,0.450605,0.664144,0.527454,0.526281
2,0.138684,0.274671,0.033083,0.00054,45.382607,0.075268,0.053407,0.061899,0.061899,0.245646,0.195304,0.506049,0.445706,0.578227,0.522991,0.524204
3,0.137405,0.289591,0.033799,0.001075,45.406399,0.075538,0.053588,0.062126,0.062126,0.223699,0.165363,0.506969,0.441878,0.581895,0.519021,0.521656
4,0.130299,0.246981,0.034402,0.00153,44.939953,0.075963,0.053499,0.062286,0.062286,0.137079,0.112344,0.485332,0.439164,0.480847,0.512676,0.518203
5,0.12734,0.234219,0.03516,0.001973,44.916164,0.07663,0.053282,0.062496,0.062496,0.094294,0.056029,0.484229,0.438999,0.475918,0.507076,0.514191
6,0.113603,0.211037,0.035747,0.002301,44.190327,0.075966,0.053577,0.062328,0.062328,0.082816,0.003609,0.450778,0.432785,0.340729,0.498105,0.50898
7,0.115685,0.225279,0.036249,0.002652,44.383087,0.075758,0.053661,0.062271,0.062271,0.127379,0.0,0.458505,0.43243,0.379651,0.491854,0.503417
8,0.119108,0.207528,0.036743,0.002959,44.135597,0.074907,0.05397,0.06202,0.06202,0.157104,0.02337,0.447118,0.426577,0.33697,0.485285,0.497501
9,0.117529,0.188749,0.037156,0.003242,44.007084,0.073956,0.054318,0.061741,0.061741,0.144501,0.046322,0.441197,0.427306,0.315165,0.479222,0.491416
10,0.113247,0.166889,0.037448,0.003423,43.352631,0.071852,0.055209,0.061186,0.061186,0.145812,0.053181,0.411242,0.415671,0.216131,0.470615,0.484628


In [25]:
scaler_adj = MinMaxScaler()
scaler_adj.fit(X_train[["Adj Close"]].values)

X_train_df['Adj Close'] = scaler_adj.transform(X_train[['Adj Close']].values).flatten()
X_test_df['Adj Close'] = scaler_adj.transform(X_test[['Adj Close']].values).flatten()

y_train = scaler_adj.transform(y_train.values.reshape(-1,1)).flatten()
y_test = scaler_adj.transform(y_test.values.reshape(-1,1)).flatten()



len(y_test)

283

In [26]:
correlation_matrix = X_train_df.corr()

# Create the heatmap with text
fig = go.Figure(data=go.Heatmap(
                    z=correlation_matrix,
                    x=correlation_matrix.columns,
                    y=correlation_matrix.columns,
                    colorscale='Viridis',
                    text=correlation_matrix.round(2).values,  # Rounded values for display
                    texttemplate="%{text}",
                    textfont={"size":9}  # Adjust text size if necessary
                    ))

# Update the layout
fig.update_layout(
    title='Correlation Matrix',
    xaxis_title="Variables",
    yaxis_title="Variables",
    xaxis=dict(side='bottom'),
    yaxis=dict(autorange='reversed'),
    width=1000,  # or any width you desire
    height=1000,  # or any height you desire
)

# Show the figure
pyo.iplot(fig)

In [27]:
X_train_tensor = torch.tensor(X_train_df.to_numpy(), dtype=torch.float, device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float, device=device)

X_test_tensor = torch.tensor(X_test_df.to_numpy(), dtype=torch.float, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float, device=device)

train_data = RollingWindowDataset(X_train_tensor, y_train_tensor, window_size=time_step, device=device)
test_data = RollingWindowDataset(X_test_tensor, y_test_tensor, window_size=time_step, device=device)

print(test_data.__getitem__(0)[1])
print(test_data.__getitem__(1)[0])


tensor(0.9428, device='cuda:0')
tensor([[0.5640, 0.7501, 0.8003,  ..., 0.3088, 0.4275, 0.4590],
        [0.5485, 0.7608, 0.7992,  ..., 0.3519, 0.4137, 0.4460],
        [0.5520, 0.7477, 0.7992,  ..., 0.3416, 0.4007, 0.4327],
        ...,
        [0.2575, 0.8593, 0.9317,  ..., 0.6366, 0.6923, 0.6874],
        [0.2357, 0.8529, 0.9349,  ..., 0.6130, 0.6846, 0.6892],
        [0.2155, 0.8529, 0.9377,  ..., 0.6130, 0.6761, 0.6887]],
       device='cuda:0')


In [28]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=500):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(1, max_len, d_model)
        position = torch.arange(max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, :, 0::2] = torch.sin(position * div_term)
        pe[:, :, 1::2] = torch.cos(position * div_term)
        # Originally: pe = pe.unsqueeze(0).transpose(0, 1)
        # Corrected to keep as [max_len, d_model] for easier indexing
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        x: Tensor, shape [batch_size, seq_len, embedding_dim]
        """
        pe = self.pe[:, :x.size(1), :].to(device)
        # Correctly index into pe to accommodate x's sequence length
        # Ensure x and pe have compatible sequence lengths for broadcasting
        x = x + pe

        return x


In [29]:
# class PositionalEncoding(nn.Module):
#     def __init__(self, d_model, max_seq_length=100):
#         super(PositionalEncoding, self).__init__()
        
#         pe = torch.zeros(max_seq_length, d_model)
#         position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
#         div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
        
#         pe[:, 0::2] = torch.sin(position * div_term)
#         pe[:, 1::2] = torch.cos(position * div_term)
        
#         self.register_buffer('pe', pe.unsqueeze(0))
        
#     def forward(self, x):
#         return x + self.pe[:, :x.size(1)]

In [30]:
# class Time2Vec(nn.Module):
#     def __init__(self, d_model):
#         super(Time2Vec, self).__init__()
#         self.d_model = d_model
#         self.linear = nn.Linear(1, 1)  # Linear part
#         self.periodic = nn.Linear(1, d_model - 1)  # Periodic part

#     def forward(self, x):
#         # x is expected to be of shape [batch_size, seq_len, 1] (1 for time dimension)
#         linear_part = self.linear(x)  # [batch_size, seq_len, 1]
        
#         # Apply periodic transformation
#         sin_trans = torch.sin(self.periodic(x))
#         cos_trans = torch.cos(self.periodic(x))
#         periodic_part = torch.cat((sin_trans, cos_trans), -1)[:, :, :self.d_model-1]
        
#         return torch.cat((linear_part, periodic_part), -1)  # Concatenate linear and periodic parts

In [31]:
class Time2Vec(nn.Module):
    def __init__(self, seq_len, output_size):
        super(Time2Vec, self).__init__()
        self.seq_len = seq_len
        self.output_size = output_size
        self.weights_linear = nn.Parameter(torch.randn(1, seq_len, 1))  # Adjusted shape
        self.bias_linear = nn.Parameter(torch.zeros(1, 1, 1))
        self.weights_periodic = nn.Parameter(torch.randn(1, 1, output_size - 1))  # Adjusted shape

    def forward(self, x):
        # x shape expected: [batch_size, seq_len, 1] for compatibility
        x = x.unsqueeze(-1)
        linear_part = x * self.weights_linear + self.bias_linear

        # No need to permute x for broadcasting, adjust weights shape instead
        periodic_part = torch.sin(x * self.weights_periodic)

        return torch.cat([linear_part, periodic_part], dim=-1)


In [32]:
# class TimeSeriesTransformer(nn.Module):
#     def __init__(self, input_dim, d_model, n_head, num_encoder_layers, dropout_prob, output_dim):
#         super(TimeSeriesTransformer, self).__init__()
#         self.time2vec = Time2Vec(d_model=time2vec_dim)  # Assuming time2vec_dim <= d_model
#         self.dropout = nn.Dropout(dropout_prob)
#         encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_head, dropout=dropout_prob, batch_first=True)
#         self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
#         self.output_layer = nn.Linear(d_model, output_dim)

#     def generate_square_subsequent_mask(self, sz):
#         mask = torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)
#         return mask

#     def forward(self, src):
#         time_indices = src.size(1)
#         # time_indices should be of shape [batch_size, seq_len, 1]
#         time_embeddings = self.time2vec(time_indices)  # Generate time embeddings
#         src = torch.cat((src, time_embeddings), -1)  # Concatenate src features with time embeddings
#         src = self.dropout(src)

#         mask = self.generate_square_subsequent_mask(src.size(1)).to(device)

#         output = self.transformer_encoder(src, mask=mask)  # Ensure src is [batch_size, seq_len, d_model] due to batch_first=True
#         output = self.dropout(output)
#         output = self.output_layer(output[:, -1, :])  # Taking the last time step; adjust as needed
#         return output

<img src="/home/arda/Turkcell/images/Screenshot from 2024-02-19 12-26-08.png" alt="Alt text">
<img src="/home/arda/Turkcell/images/Screenshot from 2024-02-19 12-26-54.png" alt="Alt text">
<img src="/home/arda/Turkcell/images/Screenshot from 2024-02-19 12-27-52.png" alt="Alt text">



In [33]:
# class TimeSeriesTransformer(nn.Module):
#     def __init__(self, input_dim, d_model, n_head, num_encoder_layers, dropout_prob, output_dim):
#         super(TimeSeriesTransformer, self).__init__()
#         self.input_embedding = nn.Linear(input_dim, d_model)
#         # self.dropout = nn.Dropout(dropout_prob)
#         self.pos_encoder = PositionalEncoding(d_model)
#         encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_head, batch_first=True)
#         self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
#         self.output_layer = nn.Linear(d_model, output_dim)

#     def forward(self, src):
#         src = self.input_embedding(src)  # [batch_size, seq_len, input_dim] -> [batch_size, seq_len, d_model]
#         # src = self.dropout(src)
#         src = self.pos_encode1r(src)

#         # src = self.dropout(src)

#         output = self.transformer_encoder(src)  # Ensure src is [batch_size, seq_len, d_model] due to batch_first=True
#         # output = self.dropout(output)
#         output = self.output_layer(output[:, -1, :])  # Taking the last time step; adjust as needed
#         return output

In [34]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, d_model, n_head, num_encoder_layers, dropout_prob, output_dim):
        super(TimeSeriesTransformer, self).__init__()
        seq_len = time_step
        self.time2vec = Time2Vec(seq_len, d_model)
        self.input_embedding = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_head, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        self.output_layer = nn.Linear(d_model, output_dim)

    def forward(self, src): #src shape: (batch_size, seq_len, features)
        time_encoding = self.time2vec(torch.arange(src.size(1), device=src.device).float()).expand(src.size(0), -1, -1)  # Generate time encoding
        
        src = self.input_embedding(src) + time_encoding  # Combine input embedding with time encoding
        
        output = self.transformer_encoder(src)  # Pass through transformer
        output = self.output_layer(output[:, -1, :])  # Taking the last time step
        return output


In [35]:
class ModelActioner:
    
    def __init__(self, train_data, test_data, device):
        self.train_data = train_data
        self.test_data = test_data
        self.device = device
        self.model = None
        self.optimizer = None
        self.criterion = nn.MSELoss()

    
    def train_validate(self, config, trial):

        # batch_size = config["batch_size"]
        # epochs = config["epochs"]
        # d_model = config["d_model"]
        # num_encoder_layers = config["num_encoder_layers"]
        # n_head = config["n_head"]
        # learning_rate = config["learning_rate"]
        # dropout_prob = config["dropout_prob"]
        # weight_decay = config["weight_decay"]
        # lr_step_size = config["lr_step_size"]
        # gamma = config["gamma"]

        batch_size = config["batch_size"]
        epochs = config["epochs"]
        d_model = 512
        num_encoder_layers = config["num_encoder_layers"]
        n_head = 16
        learning_rate = config["learning_rate"]
        dropout_prob = config["dropout_prob"]
        # weight_decay = config["weight_decay"]
        lr_step_size = config["lr_step_size"]
        gamma = config["gamma"]


        n_splits = 3
        tscv = TimeSeriesSplit(n_splits=n_splits)

        val_losses = []

        for fold, (train_ids, val_ids) in enumerate(tscv.split(self.train_data)):
            print(f'Starting fold {fold+1}:')
            
            self.model = TimeSeriesTransformer(input_dim=self.train_data.__getitem__(0)[0].shape[1], d_model=d_model, n_head=n_head, num_encoder_layers=num_encoder_layers, dropout_prob=dropout_prob, output_dim=1).to(self.device)

            self.optimizer = optim.Adam(self.model.parameters(), lr = learning_rate)

            scheduler = ReduceLROnPlateau(self.optimizer, patience=lr_step_size, factor=gamma, mode="min", verbose=True) 

            train_subset = Subset(self.train_data, train_ids)
            val_subset = Subset(self.train_data, val_ids)
            
            # Creating data loader
            train_loader = DataLoader(dataset=train_subset, batch_size=batch_size, shuffle=False)
            val_loader = DataLoader(dataset=val_subset, batch_size=batch_size, shuffle=False)

            # Training Loop
            for epoch in range(epochs):
                print('epochs {}/{}'.format(epoch+1,epochs))

                running_loss = 0.0
                total_sample_train = 0

                print("hidden none")

                # self.model.hidden_none()
                self.model.train()

                for batch_idx, (data, target) in enumerate(train_loader):
                    data, target = data.to(self.device), target.to(self.device)
                    target = target.view(-1,1) 

                    self.optimizer.zero_grad()
                    preds = self.model(data)

                    loss = self.criterion(preds, target)
                    loss.backward()
                    self.optimizer.step() # Update model params

                    running_loss += loss.item() * data.size(0)
                    total_sample_train += data.size(0)

                train_loss = running_loss/total_sample_train
                #print(f"train loss: {train_loss}")
                self.model.eval()
                val_running_loss = 0.0
                total_sample_val = 0

                with torch.no_grad():

                    for batch_idx, (data, target) in enumerate(val_loader):
                        data, target = data.to(self.device), target.to(self.device)
                        target = target.view(-1,1)

                        preds = self.model(data)
                        loss = self.criterion(preds, target)

                        val_running_loss += loss.item() * data.size(0)
                        total_sample_val += data.size(0)
                
                val_loss = val_running_loss/total_sample_val
                val_losses.append(val_loss)
                scheduler.step(train_loss)
                
                unique_step = fold * epochs + epoch
                trial.report(val_loss, unique_step)

                if trial.should_prune():
                    raise optuna.TrialPruned()

                current_lr = self.optimizer.param_groups[0]['lr']

                print(f'Current Learning Rate: {current_lr}')
                print(f"train_loss: {train_loss}, val_loss: {val_loss}")
                
        mean_val_loss = np.mean(val_losses)
        print(f"Mean validation loss: {mean_val_loss}")
        return mean_val_loss
    
                    
    def train(self, config):

        # batch_size = config["batch_size"]
        # epochs = config["epochs"]
        # d_model = config["d_model"]
        # num_encoder_layers = config["num_encoder_layers"]
        # n_head = config["n_head"]
        # learning_rate = config["learning_rate"]
        # dropout_prob = config["dropout_prob"]
        # weight_decay = config["weight_decay"]
        # lr_step_size = config["lr_step_size"]
        # gamma = config["gamma"]

        batch_size = config["batch_size"]
        epochs = config["epochs"]
        d_model = 512
        num_encoder_layers = config["num_encoder_layers"]
        n_head = 16
        learning_rate = config["learning_rate"]
        dropout_prob = 0
        # weight_decay = config["weight_decay"]
        lr_step_size = config["lr_step_size"]
        gamma = config["gamma"]

        self.model = TimeSeriesTransformer(input_dim=self.train_data.__getitem__(0)[0].shape[1], d_model=d_model, n_head=n_head, num_encoder_layers=num_encoder_layers, dropout_prob=dropout_prob, output_dim=1).to(self.device)

        # Update optimizer with updated lr
        self.optimizer = optim.Adam(self.model.parameters(), lr = learning_rate)

        # Creating data loader
        train_loader = DataLoader(dataset=self.train_data, batch_size=batch_size, shuffle=False)

        scheduler = ReduceLROnPlateau(self.optimizer, patience=lr_step_size, factor=gamma, mode="min", verbose=True)  

        # Training Loop
        for epoch in range(epochs):
            print('epochs {}/{}'.format(epoch+1,epochs))

            running_loss = 0.0
            total_sample_train = 0

            self.model.train()

            for batch_idx, (data, target) in enumerate(train_loader):

                data, target = data.to(self.device), target.to(self.device)
                target = target.view(-1,1)  

                self.optimizer.zero_grad()
                preds = self.model(data)

                loss = self.criterion(preds, target)
                #loss = loss.mean()
                loss.backward()
                self.optimizer.step() # Update model params

                running_loss += loss.item() * data.size(0)
                total_sample_train += data.size(0)

            train_loss = running_loss/total_sample_train
            #print(f"train loss: {train_loss}")
            scheduler.step(train_loss)
            current_lr = self.optimizer.param_groups[0]['lr']

            print(f'Current Learning Rate: {current_lr}')
            print(f"train_loss: {train_loss}")
        
        return self.model
            
    
    def test(self, config):
        batch_size = config["batch_size"]
        all_preds = []

        test_loader = DataLoader(dataset=self.test_data, batch_size=batch_size, shuffle=False)

        running_loss = .0
        total_sample = 0

        self.model.eval()

        with torch.no_grad():

            for batch_idx, (data, target) in enumerate(test_loader):

                data, target = data.to(self.device), target.to(self.device)
                target = target.view(-1,1)
                
                preds = self.model(data)
                loss = self.criterion(preds, target)

                running_loss += loss.item() * data.size(0)
                total_sample += data.size(0)

                all_preds.extend(preds.cpu().numpy())

            test_loss = running_loss/total_sample
            print(f"test_loss: {test_loss}")

        return all_preds
    


In [36]:

def objective(trial):
    # Suggest d_model as a multiple of 64 within a range.
    d_model = trial.suggest_int("d_model", 64, 512, step=64)

    # Ensure n_head is a divisor of d_model.
    # First, calculate possible divisors of d_model within the given range.
    max_heads = d_model // 2  # Assuming you want at least 2 dimensions per head.
    possible_heads = [i for i in range(2, max_heads + 1) if d_model % i == 0]

    # If there are no possible heads (which shouldn't happen with the given range but just in case),
    # default to 2 to avoid division by zero or errors.
    if not possible_heads:
        n_head = 2
    else:
        # Suggest n_head from the list of possible divisors.
        n_head_index = trial.suggest_int("n_head_index", 0, len(possible_heads) - 1)
        n_head = possible_heads[n_head_index]

    config = {
        "batch_size": trial.suggest_int("batch_size", 32, 128, log=True),
        "epochs": trial.suggest_int("epochs", 10, 20),
        "d_model": d_model,
        "num_encoder_layers": trial.suggest_int("num_encoder_layers", 1, 7),
        "n_head": n_head,
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-1, log=True),
        "dropout_prob": 0,
        # "weight_decay": trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True),
        "lr_step_size": trial.suggest_int("lr_step_size", 10, 30), 
        "gamma": trial.suggest_float("gamma", 0.1, 0.9)
    }

    trainer = ModelActioner(train_data, test_data, device)

    val_loss = trainer.train_validate(config, trial)

    return val_loss

In [37]:
study_name = "Transformer-Tunner"
storage_url = "sqlite:///db.sqlite3"

storage = optuna.storages.RDBStorage(url=storage_url)

# Check if the study exists
study_names = [study.study_name for study in optuna.study.get_all_study_summaries(storage=storage)]
if study_name in study_names:
    # Delete the study if it exists
    print(f"Deleting study '{study_name}'")
    optuna.delete_study(study_name=study_name, storage=storage_url)
else:
    print(f"Study '{study_name}' does not exist in the storage.")
    

study = optuna.create_study(direction='minimize', 
                            storage=storage_url, 
                            sampler=TPESampler(),
                            pruner=optuna.pruners.SuccessiveHalvingPruner(
                            min_resource=3,  # Minimum amount of resource allocated to a trial
                            reduction_factor=3,  # Reduction factor for pruning
                            min_early_stopping_rate=3 # Minimum early-stopping rate
                            ),
                            study_name=study_name,
                            load_if_exists=False)

pbar = tqdm(total=10, desc='Optimizing', unit='trial')

def callback(study, trial):
    # Update the progress bar
    pbar.update(1)
    pbar.set_postfix_str(f"Best Value: {study.best_value:.4f}")


study.optimize(objective, n_trials=10, callbacks=[callback])
pbar.close()

# Best hyperparameters
print('Number of finished trials:', len(study.trials))
print('Best trial:')
trial = study.best_trial

print('Value:', trial.value)
print('Params:')
for key, value in trial.params.items():
    print(f'{key}: {value}')

[I 2024-02-27 01:33:07,216] A new study created in RDB with name: Transformer-Tunner


Deleting study 'Transformer-Tunner'


Optimizing:   0%|          | 0/10 [00:00<?, ?trial/s]

Starting fold 1:
epochs 1/14
hidden none
Current Learning Rate: 0.0014214046212321328
train_loss: 14.875341541626874, val_loss: 0.3219143608037163
epochs 2/14
hidden none
Current Learning Rate: 0.0014214046212321328
train_loss: 0.6962678820880897, val_loss: 0.34975408455904794
epochs 3/14
hidden none
Current Learning Rate: 0.0014214046212321328
train_loss: 0.22330239634303486, val_loss: 0.09863213581197403
epochs 4/14
hidden none
Current Learning Rate: 0.0014214046212321328
train_loss: 0.06545549346243634, val_loss: 0.02052413595511633
epochs 5/14
hidden none
Current Learning Rate: 0.0014214046212321328
train_loss: 0.043442883583552694, val_loss: 0.21002189552082734
epochs 6/14
hidden none
Current Learning Rate: 0.0014214046212321328
train_loss: 0.007477559368400013, val_loss: 0.04048704040948959
epochs 7/14
hidden none
Current Learning Rate: 0.0014214046212321328
train_loss: 0.019746498807388192, val_loss: 0.08925577735199648
epochs 8/14
hidden none
Current Learning Rate: 0.0014214046

[I 2024-02-27 01:34:25,064] Trial 0 finished with value: 0.13116043376969827 and parameters: {'d_model': 192, 'n_head_index': 7, 'batch_size': 60, 'epochs': 14, 'num_encoder_layers': 1, 'learning_rate': 0.0014214046212321328, 'lr_step_size': 10, 'gamma': 0.14614121384540335}. Best is trial 0 with value: 0.13116043376969827.


Current Learning Rate: 0.0014214046212321328
train_loss: 0.16881465612381114, val_loss: 0.14893193192341747
Mean validation loss: 0.13116043376969827
Starting fold 1:
epochs 1/15
hidden none
Current Learning Rate: 0.00021172904134473207
train_loss: 11.541200398697573, val_loss: 1.7454893645118266
epochs 2/15
hidden none
Current Learning Rate: 0.00021172904134473207
train_loss: 0.619028182590709, val_loss: 0.08063670027014964
epochs 3/15
hidden none
Current Learning Rate: 0.00021172904134473207
train_loss: 0.23640885282965268, val_loss: 0.6783342249253217
epochs 4/15
hidden none
Current Learning Rate: 0.00021172904134473207
train_loss: 0.08199511078350684, val_loss: 0.03967037744381848
epochs 5/15
hidden none
Current Learning Rate: 0.00021172904134473207
train_loss: 0.12509147016988958, val_loss: 0.1287591972771813
epochs 6/15
hidden none
Current Learning Rate: 0.00021172904134473207
train_loss: 0.05011408889994902, val_loss: 0.1481617559404934
epochs 7/15
hidden none
Current Learning R

[I 2024-02-27 01:42:35,110] Trial 1 finished with value: 0.1626750778534687 and parameters: {'d_model': 384, 'n_head_index': 2, 'batch_size': 48, 'epochs': 15, 'num_encoder_layers': 6, 'learning_rate': 0.00021172904134473207, 'lr_step_size': 29, 'gamma': 0.5287437519191148}. Best is trial 0 with value: 0.13116043376969827.


Current Learning Rate: 0.00021172904134473207
train_loss: 0.20631447075044407, val_loss: 0.0662430866237949
Mean validation loss: 0.1626750778534687
Starting fold 1:
epochs 1/17
hidden none
Current Learning Rate: 1.2059374586594657e-06
train_loss: 0.05336462250527214, val_loss: 0.15058854011928333
epochs 2/17
hidden none
Current Learning Rate: 1.2059374586594657e-06
train_loss: 0.03245499230482999, val_loss: 0.080983888927628
epochs 3/17
hidden none
Current Learning Rate: 1.2059374586594657e-06
train_loss: 0.03960378687171375, val_loss: 0.09543573242776535
epochs 4/17
hidden none
Current Learning Rate: 1.2059374586594657e-06
train_loss: 0.037435888630502366, val_loss: 0.11449481226942118
epochs 5/17
hidden none
Current Learning Rate: 1.2059374586594657e-06
train_loss: 0.03226388903225169, val_loss: 0.09877165450769312
epochs 6/17
hidden none
Current Learning Rate: 1.2059374586594657e-06
train_loss: 0.030320819554960028, val_loss: 0.10350580688785105
epochs 7/17
hidden none
Current Lear

[I 2024-02-27 01:50:29,289] Trial 2 finished with value: 0.0871346669654134 and parameters: {'d_model': 128, 'n_head_index': 2, 'batch_size': 42, 'epochs': 17, 'num_encoder_layers': 5, 'learning_rate': 1.2059374586594657e-06, 'lr_step_size': 14, 'gamma': 0.30530822981391426}. Best is trial 2 with value: 0.0871346669654134.


Current Learning Rate: 1.2059374586594657e-06
train_loss: 0.029378717907649628, val_loss: 0.024600772066589664
Mean validation loss: 0.0871346669654134
Starting fold 1:
epochs 1/14
hidden none
Current Learning Rate: 4.282095794501404e-06
train_loss: 0.20833400768392227, val_loss: 0.03884555492550135
epochs 2/14
hidden none
Current Learning Rate: 4.282095794501404e-06
train_loss: 0.10276830378992885, val_loss: 0.023704131699952426
epochs 3/14
hidden none
Current Learning Rate: 4.282095794501404e-06
train_loss: 0.05897510628782067, val_loss: 0.1392898349200978
epochs 4/14
hidden none
Current Learning Rate: 4.282095794501404e-06
train_loss: 0.037022742716705095, val_loss: 0.1856751371832455
epochs 5/14
hidden none
Current Learning Rate: 4.282095794501404e-06
train_loss: 0.03625248094984129, val_loss: 0.07877030311261907
epochs 6/14
hidden none
Current Learning Rate: 4.282095794501404e-06
train_loss: 0.03387284041473679, val_loss: 0.06278205757924155
epochs 7/14
hidden none
Current Learnin

[I 2024-02-27 01:54:36,069] Trial 3 finished with value: 0.07435702891357505 and parameters: {'d_model': 64, 'n_head_index': 4, 'batch_size': 40, 'epochs': 14, 'num_encoder_layers': 3, 'learning_rate': 4.282095794501404e-06, 'lr_step_size': 28, 'gamma': 0.3406400121670109}. Best is trial 3 with value: 0.07435702891357505.


Current Learning Rate: 4.282095794501404e-06
train_loss: 0.024820663034915924, val_loss: 0.039605924019626544
Mean validation loss: 0.07435702891357505
Starting fold 1:
epochs 1/14
hidden none
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.3194118164041463, val_loss: 0.05314592109883533
epochs 2/14
hidden none
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.06611935654107262, val_loss: 0.05766441967365715
epochs 3/14
hidden none
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.16233215542400584, val_loss: 0.021823910185519386
epochs 4/14
hidden none
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.05635958565946887, val_loss: 0.07929123094414964
epochs 5/14
hidden none
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.035569685566074705, val_loss: 0.19466059584827983
epochs 6/14
hidden none
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.06405947142664124, val_loss: 0.18154674382770764
epochs 7/14
hidden none
Current 

[I 2024-02-27 02:03:03,094] Trial 4 finished with value: 0.0734780808294248 and parameters: {'d_model': 128, 'n_head_index': 5, 'batch_size': 87, 'epochs': 14, 'num_encoder_layers': 7, 'learning_rate': 3.6091250545812106e-06, 'lr_step_size': 27, 'gamma': 0.42172566463666394}. Best is trial 4 with value: 0.0734780808294248.


Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.022503557253409835, val_loss: 0.020563204087974393
Mean validation loss: 0.0734780808294248
Starting fold 1:
epochs 1/19
hidden none
Current Learning Rate: 0.009849089553386833
train_loss: 52.062882998410394, val_loss: 1.7050174586913165
epochs 2/19
hidden none
Current Learning Rate: 0.009849089553386833
train_loss: 3.6980798426796406, val_loss: 4.475921588785508
epochs 3/19
hidden none
Current Learning Rate: 0.009849089553386833
train_loss: 1.448360595545348, val_loss: 0.05091856460306136
epochs 4/19
hidden none
Current Learning Rate: 0.009849089553386833
train_loss: 0.233533397842856, val_loss: 0.07973387432010735
epochs 5/19
hidden none
Current Learning Rate: 0.009849089553386833
train_loss: 0.14549878699814572, val_loss: 0.3944556174909367
epochs 6/19
hidden none
Current Learning Rate: 0.009849089553386833
train_loss: 0.049267017863252584, val_loss: 0.020932188794455107
epochs 7/19
hidden none
Current Learning Rate: 0.0098

[I 2024-02-27 02:14:28,653] Trial 5 finished with value: 0.3480754265436754 and parameters: {'d_model': 256, 'n_head_index': 6, 'batch_size': 93, 'epochs': 19, 'num_encoder_layers': 7, 'learning_rate': 0.009849089553386833, 'lr_step_size': 24, 'gamma': 0.2992934702795725}. Best is trial 4 with value: 0.0734780808294248.


Current Learning Rate: 0.009849089553386833
train_loss: 0.42775374425976886, val_loss: 0.007391060346408802
Mean validation loss: 0.3480754265436754
Starting fold 1:
epochs 1/17
hidden none
Current Learning Rate: 0.00012363323778663348
train_loss: 11.882955586559632, val_loss: 0.7316383053274715
epochs 2/17
hidden none
Current Learning Rate: 0.00012363323778663348
train_loss: 2.234398356372235, val_loss: 2.306725842344995
epochs 3/17
hidden none
Current Learning Rate: 0.00012363323778663348
train_loss: 0.6437875380483913, val_loss: 0.19471261344996152
epochs 4/17
hidden none
Current Learning Rate: 0.00012363323778663348
train_loss: 0.7025366848590328, val_loss: 0.3423422142571094
epochs 5/17
hidden none
Current Learning Rate: 0.00012363323778663348
train_loss: 0.4123132439366743, val_loss: 0.10364249870181083
epochs 6/17
hidden none
Current Learning Rate: 0.00012363323778663348
train_loss: 0.10738579950057993, val_loss: 0.7102993263917811
epochs 7/17
hidden none
Current Learning Rate: 

[I 2024-02-27 02:23:08,539] Trial 6 finished with value: 0.25942149647897894 and parameters: {'d_model': 192, 'n_head_index': 3, 'batch_size': 94, 'epochs': 17, 'num_encoder_layers': 6, 'learning_rate': 0.00012363323778663348, 'lr_step_size': 14, 'gamma': 0.3269059929946909}. Best is trial 4 with value: 0.0734780808294248.


Current Learning Rate: 0.00012363323778663348
train_loss: 0.14080458350138728, val_loss: 0.09856974003361721
Mean validation loss: 0.25942149647897894
Starting fold 1:
epochs 1/13
hidden none
Current Learning Rate: 0.00010818604815084685
train_loss: 7.172814714295023, val_loss: 2.4193464999105414
epochs 2/13
hidden none
Current Learning Rate: 0.00010818604815084685
train_loss: 1.3729884785289566, val_loss: 1.2155893905490052
epochs 3/13
hidden none
Current Learning Rate: 0.00010818604815084685
train_loss: 0.5802337454814537, val_loss: 0.1343311070665425
epochs 4/13
hidden none
Current Learning Rate: 0.00010818604815084685
train_loss: 0.11403047434766503, val_loss: 0.08809908615863499
epochs 5/13
hidden none
Current Learning Rate: 0.00010818604815084685
train_loss: 0.16224537870901473, val_loss: 0.16688933489369412
epochs 6/13
hidden none
Current Learning Rate: 0.00010818604815084685
train_loss: 0.07135108001895395, val_loss: 0.3096729320053961
epochs 7/13
hidden none
Current Learning R

[I 2024-02-27 02:30:49,236] Trial 7 finished with value: 0.22500808524191918 and parameters: {'d_model': 256, 'n_head_index': 0, 'batch_size': 61, 'epochs': 13, 'num_encoder_layers': 7, 'learning_rate': 0.00010818604815084685, 'lr_step_size': 23, 'gamma': 0.537088745159866}. Best is trial 4 with value: 0.0734780808294248.


Current Learning Rate: 0.00010818604815084685
train_loss: 0.17729017556419754, val_loss: 0.07548567556283053
Mean validation loss: 0.22500808524191918
Starting fold 1:
epochs 1/15
hidden none
Current Learning Rate: 0.0011934462110405563
train_loss: 31.23613765391649, val_loss: 0.029747356934582487
epochs 2/15
hidden none
Current Learning Rate: 0.0011934462110405563
train_loss: 1.0488197463838493, val_loss: 0.2534281824733697
epochs 3/15
hidden none
Current Learning Rate: 0.0011934462110405563
train_loss: 0.42025626338287897, val_loss: 0.23892513683029248
epochs 4/15
hidden none
Current Learning Rate: 0.0011934462110405563
train_loss: 0.08028040653934666, val_loss: 0.08816945932659448
epochs 5/15
hidden none
Current Learning Rate: 0.0011934462110405563
train_loss: 0.16594401211569124, val_loss: 0.16530217843897202
epochs 6/15
hidden none
Current Learning Rate: 0.0011934462110405563
train_loss: 0.07157091728101174, val_loss: 0.27472909911006105
epochs 7/15
hidden none
Current Learning Ra

[I 2024-02-27 02:39:40,678] Trial 8 finished with value: 0.12925736352756687 and parameters: {'d_model': 256, 'n_head_index': 3, 'batch_size': 73, 'epochs': 15, 'num_encoder_layers': 7, 'learning_rate': 0.0011934462110405563, 'lr_step_size': 17, 'gamma': 0.1783617153335648}. Best is trial 4 with value: 0.0734780808294248.


Current Learning Rate: 0.0011934462110405563
train_loss: 0.19372332575883155, val_loss: 0.08173220438115737
Mean validation loss: 0.12925736352756687
Starting fold 1:
epochs 1/14
hidden none
Current Learning Rate: 0.024604124607680532
train_loss: 33.116539442539214, val_loss: 0.921484398841858
epochs 2/14
hidden none
Current Learning Rate: 0.024604124607680532
train_loss: 0.43557685762643816, val_loss: 0.02071995735168457
epochs 3/14
hidden none
Current Learning Rate: 0.024604124607680532
train_loss: 0.17627300322055817, val_loss: 0.3889918953180313
epochs 4/14
hidden none
Current Learning Rate: 0.024604124607680532
train_loss: 0.07581988088786602, val_loss: 0.03798881489783525
epochs 5/14
hidden none
Current Learning Rate: 0.024604124607680532
train_loss: 0.10825934410095214, val_loss: 0.1806647941470146
epochs 6/14
hidden none
Current Learning Rate: 0.024604124607680532
train_loss: 0.03408559262752533, val_loss: 0.06141097322106361
epochs 7/14
hidden none
Current Learning Rate: 0.024

[I 2024-02-27 02:47:58,429] Trial 9 finished with value: 0.2905904050533926 and parameters: {'d_model': 384, 'n_head_index': 0, 'batch_size': 51, 'epochs': 14, 'num_encoder_layers': 7, 'learning_rate': 0.024604124607680532, 'lr_step_size': 17, 'gamma': 0.1843823402116315}. Best is trial 4 with value: 0.0734780808294248.


Current Learning Rate: 0.024604124607680532
train_loss: 0.14062711993853252, val_loss: 0.022990942979231477
Mean validation loss: 0.2905904050533926
Number of finished trials: 10
Best trial:
Value: 0.0734780808294248
Params:
d_model: 128
n_head_index: 5
batch_size: 87
epochs: 14
num_encoder_layers: 7
learning_rate: 3.6091250545812106e-06
lr_step_size: 27
gamma: 0.42172566463666394


In [38]:
trainer = ModelActioner(train_data, test_data, device)
model = trainer.train(trial.params)

epochs 1/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.060807398394407595
epochs 2/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.46659002203275174
epochs 3/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.03793568020029103
epochs 4/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.11351385889027049
epochs 5/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.038567652158877426
epochs 6/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.039905037824064495
epochs 7/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.028067764491938492
epochs 8/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.024724396241500097
epochs 9/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.023546854559989536
epochs 10/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.025048059170298717
epochs 11/14
Current Learning Rate: 3.6091250545812106e-06
train_loss: 0.023851761210928944


In [39]:
preds = trainer.test(trial.params)
preds = np.array(preds)

y_true = y_test[time_step:]
y_true = scaler_adj.inverse_transform(y_true.reshape(-1, 1)).flatten()
preds = scaler_adj.inverse_transform(preds.reshape(-1, 1)).flatten()


mse = mean_squared_error(y_true, preds)
print(f'Mean Squared Error: {mse:.4f}')

rmse = mean_squared_error(y_true, preds, squared=False)
print(f'Root Mean Squared Error: {rmse:.4f}')

r2 = r2_score(y_true, preds)
print(f'R² Score: {r2:.4f}')

mape = mean_absolute_percentage_error(y_true, preds)*100
print(f'mape Score: % {mape:.4f}')

test_loss: 0.011969605490371498
Mean Squared Error: 253.9194
Root Mean Squared Error: 15.9348
R² Score: -3.0231
mape Score: % 6.8879


In [40]:
len(preds)

173

In [41]:
len(y_true)

173

In [42]:
stock_price = stock_data["Adj Close"].iloc[test_index:]
stock_price=stock_price.reset_index()
stock_price=stock_price.drop(columns=["index"])
stock_price

Unnamed: 0,Adj Close
0,171.393173
1,172.010727
2,174.361450
3,174.471008
4,173.514786
...,...
168,182.447189
169,188.389618
170,191.315872
171,193.642899


In [43]:
signals = pd.DataFrame(preds, columns=['pred'])
signals["next_day"] = pd.DataFrame(y_true)
signals["today"] = stock_price
signals["Signal_Pred"] = (signals["today"] < signals["pred"]).astype(int)
signals["Signal_True"] = (signals["today"] < signals["next_day"]).astype(int)

signals

Unnamed: 0,pred,next_day,today,Signal_Pred,Signal_True
0,153.208679,171.393173,171.393173,0,0
1,153.102142,172.010727,172.010727,0,0
2,153.255234,174.361450,174.361450,0,1
3,153.222687,174.471008,174.471008,0,0
4,153.245010,173.514786,173.514786,0,0
...,...,...,...,...,...
168,179.178894,182.447189,182.447189,0,0
169,178.503036,188.389618,188.389618,0,0
170,178.243668,191.315872,191.315872,0,0
171,177.943817,193.642899,193.642899,0,0


In [44]:
signals["Signal_Pred"].value_counts()

Signal_Pred
0    136
1     37
Name: count, dtype: int64

In [45]:
signals["Date"] = date_test
signals

Unnamed: 0,pred,next_day,today,Signal_Pred,Signal_True,Date
0,153.208679,171.393173,171.393173,0,0,2023-05-16
1,153.102142,172.010727,172.010727,0,0,2023-05-17
2,153.255234,174.361450,174.361450,0,1,2023-05-18
3,153.222687,174.471008,174.471008,0,0,2023-05-19
4,153.245010,173.514786,173.514786,0,0,2023-05-22
...,...,...,...,...,...,...
168,179.178894,182.447189,182.447189,0,0,2024-01-17
169,178.503036,188.389618,188.389618,0,0,2024-01-18
170,178.243668,191.315872,191.315872,0,0,2024-01-19
171,177.943817,193.642899,193.642899,0,0,2024-01-22


In [46]:
stock_price = stock_data["Adj Close"].iloc[test_index:]
stock_price=stock_price.reset_index()
stock_price=stock_price.drop(columns=["index"])
stock_price

Unnamed: 0,Adj Close
0,171.393173
1,172.010727
2,174.361450
3,174.471008
4,173.514786
...,...
168,182.447189
169,188.389618
170,191.315872
171,193.642899


In [47]:
date_test["Date"] = date_test["Date"].dt.strftime('%Y-%m-%d')
date_test

Unnamed: 0,Date
0,2023-05-16
1,2023-05-17
2,2023-05-18
3,2023-05-19
4,2023-05-22
...,...
168,2024-01-17
169,2024-01-18
170,2024-01-19
171,2024-01-22


In [48]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(date_test).flatten(), y=stock_data["Adj Close"].iloc[test_index:], mode='lines', name='TSLA Stock Price'))

# Buy and sell signals
buy_signals = signals[signals['Signal_Pred'] == 1]
sell_signals = signals[signals['Signal_Pred'] == 0]

# Ensure that buy and sell prices are aligned with the signals by matching on the 'Date' column
buy_prices = stock_data[stock_data['Date'].isin(buy_signals['Date'])]["Adj Close"]
sell_prices = stock_data[stock_data['Date'].isin(sell_signals['Date'])]["Adj Close"]


# Plot buy signals
fig.add_trace(go.Scatter(
    x=buy_signals['Date'], 
    y=buy_prices, 
    mode='markers', 
    name='Buy', 
    marker=dict(color='green', size=10, symbol='triangle-up')
))


# Plot sell signals
fig.add_trace(go.Scatter(
    x=sell_signals['Date'], 
    y=sell_prices, 
    mode='markers', 
    name='Sell', 
    marker=dict(color='red', size=10, symbol='triangle-down')
))


# Update layout
fig.update_layout(
    title='Stock Price with Buy and Sell Signals',
    xaxis_title='Date',
    yaxis_title='Price',
    xaxis_rangeslider_visible=False,
    height = 700,
    width=1280
)

# Show the plot
pyo.iplot(fig)


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [49]:
trace_pred = go.Scatter(
    x=np.array(date_test).flatten(),
    y=signals['pred'],
    mode='lines+markers',
    name='Predicted'
)

trace_true = go.Scatter(
    x=np.array(date_test).flatten(),
    y=signals['next_day'],
    mode='lines+markers',
    name='Actual Next Day'
)

# Define the layout
layout = go.Layout(
    title='Predicted vs Actual Next Day Values',
    xaxis=dict(title='Index'),
    yaxis=dict(title='Value'),
    height=700
)

# Create the figure and add traces
fig = go.Figure(data=[trace_pred, trace_true], layout=layout)

# Show the figure
fig.show()