In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn

from torch.autograd import Variable 
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import numpy as np
import pandas as pd
import time
import math
import matplotlib.pyplot as plt

from src.data_loader import *
from src.index import *
from src.ticker_selection import *


from sklearn.preprocessing import MinMaxScaler

In [2]:
aapl = DataGenerator('db', 'day').data_search('AAPL')

In [3]:
aapl.shape

(7534, 6)

In [4]:
aapl['rsi'] = rsi(aapl)
aapl['macd'] = macd(aapl)['macd']
aapl[['en_center', 'en_ub', 'en_lb']] = envelope(aapl)
aapl[['bo_center', 'bo_ub', 'bo_lb']] = bollinger(aapl)
aapl[['slow_k', 'slow_d']] = stochastic(aapl)
aapl['SMA(120)'] = sma(aapl,120)
aapl['SMA(60)'] = sma(aapl,60)
aapl['SMA(20)'] = sma(aapl,20)
aapl['SMA(5)']= sma(aapl,5)

In [5]:
aapl = stock_standard(aapl).calculator(standard=9)

In [6]:
aapl.drop('Standard', axis = 1, inplace = True)

In [21]:
aapl.to_csv('aapl.csv')

In [7]:
def window_slice(data, target,time_steps, for_periods, split_size = 0.8): 
    """
    input:
     data: 날짜를 인덱스로 가지는 주식가격(Adj Close) 데이터
    output:
     X_train, y_train: 2013/1/1부터 2018-12/31까지의 데이터
     X_test : 2019년 동안의 데이터 
    time_steps: # input 데이터의 time steps
    for_periods: # output 데이터의 time steps
    """
    X = [] 
    y = [] 

    for i in range(time_steps, len(data.values) - 1):
        X.append(data.values[i-time_steps:i,:])
        y.append(data[target].values[i:i+for_periods])
    X, y = np.array(X), np.array(y)

    X_train = Variable(torch.Tensor(X[:int(split_size * len(X))] ))
    X_train = torch.reshape(X_train,(X_train.shape[0], X_train.shape[1], X_train.shape[2]))

    X_test =  Variable(torch.Tensor(X[int(split_size * len(X)):] ))
    X_test = torch.reshape(X_test,(X_test.shape[0], X_test.shape[1], X_test.shape[2]))
    
    y_train = Variable(torch.Tensor(y[:int(split_size * len(y))]))
    y_train = torch.reshape(y_train,(y_train.shape[0],y_train.shape[1], 1 ))

    y_test = Variable(torch.Tensor(y[int(split_size * len(y)):]))
    y_test = torch.reshape(y_test,(y_test.shape[0],y_train.shape[1], 1))


    return X_train, X_test, y_train, y_test 

In [8]:
X_train, X_test, y_train, y_test  = window_slice(aapl, 'Adj Close', 5, 1)

In [9]:
X_train.shape

torch.Size([5927, 5, 29])

In [10]:
class LSTM1(nn.Module):
  def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
    super(LSTM1, self).__init__()
    self.num_classes = num_classes #number of classes
    self.num_layers = num_layers #number of layers
    self.input_size = input_size #input size
    self.hidden_size = hidden_size #hidden state
    self.seq_length = seq_length #sequence length
 
    self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                      num_layers=num_layers, batch_first=True) #lstm
    self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
    self.fc = nn.Linear(128, num_classes) #fully connected last layer

    self.relu = nn.ReLU() 

  def forward(self,x):
    h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))#.to(device) #hidden state
    c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))#.to(device) #internal state   
    # Propagate input through LSTM

    output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
   
    hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
    out = self.relu(hn)
    out = self.fc_1(out) #first Dense
    out = self.relu(out) #relu
    out = self.fc(out) #Final Output
   
    return out

In [11]:
aapl

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,rsi,macd,en_center,en_ub,...,SMA(5),s1,s2,s3,s4,s5,s6,s7,s8,s9
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1993-05-21,0.524554,0.527902,0.506696,0.513393,0.423102,148198400.0,64.511646,0.01,0.386439,0.405761,...,0.419055,0.0,1.0,0.0,1.0,0,0,1.0,1.0,0.0
1993-05-24,0.506696,0.524554,0.506696,0.514509,0.424022,150315200.0,64.877338,0.01,0.386642,0.405974,...,0.421815,0.0,1.0,0.0,0.0,0,0,1.0,0.0,0.0
1993-05-25,0.506696,0.513393,0.497768,0.503348,0.414824,180723200.0,58.396195,0.01,0.386550,0.405877,...,0.423102,0.0,1.0,0.0,1.0,0,0,1.0,0.0,0.0
1993-05-26,0.500000,0.515625,0.494420,0.515625,0.424942,121564800.0,62.798638,0.01,0.386734,0.406071,...,0.423838,0.0,1.0,0.0,0.0,0,0,1.0,1.0,0.0
1993-05-27,0.515625,0.522321,0.511161,0.513393,0.423102,197288000.0,61.523974,0.01,0.387083,0.406437,...,0.421998,0.0,1.0,0.0,0.0,0,0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-24,147.190002,150.229996,146.000000,149.449997,149.449997,75981900.0,54.436786,-1.88,153.653999,161.336699,...,145.544000,0.0,0.0,0.0,1.0,0,1,0.0,1.0,0.0
2022-10-25,150.089996,152.490005,149.360001,152.339996,152.339996,74732290.0,58.207938,-1.12,153.236999,160.898849,...,147.262000,0.0,0.0,0.0,0.0,0,1,1.0,1.0,0.0
2022-10-25,150.089996,152.490005,149.360001,152.339996,152.339996,74732300.0,58.207938,-0.51,152.823199,160.464359,...,148.957999,0.0,1.0,0.0,0.0,0,1,0.0,0.0,0.0
2022-10-26,150.960007,151.990005,148.039993,149.350006,149.350006,88194300.0,52.949432,-0.27,152.319199,159.935159,...,150.150000,0.0,1.0,0.0,0.0,0,0,0.0,0.0,0.0


In [12]:
[5927, 5, 29]

[5927, 5, 29]

In [13]:
from torch.utils.data import DataLoader, Dataset
class windowDataset(Dataset):
    def __init__(self, data, target, input_window=5, output_window=1, stride=1):
        L = data.shape[0]
        num_samples = (L - input_window - output_window) // stride + 1

        X = [] 
        y = [] 

        for i in range(input_window, len(data.values) - 1):
            X.append(data.values[i-stride:i,:])
            y.append(data[target].values[i:i+stride])
        X, y = np.array(X), np.array(y)
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2])#.transpose((1,0,2))
        y = y.reshape(y.shape[0],y.shape[1], 1 )#.transpose((1,0,2))

        self.x = X
        self.y = y
        
        self.len = len(X)
    def __getitem__(self, i):
        return self.x[i], self.y[i]
    def __len__(self):
        return self.len

In [14]:
train_dataset = windowDataset(aapl, 'Adj Close')
train_loader = DataLoader(train_dataset, batch_size=64)

In [15]:
class TFModel(nn.Module):
    def __init__(self,iw, ow, d_model, nhead, nlayers, dropout=0.5):
        super(TFModel, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=nlayers) 
        self.pos_encoder = PositionalEncoding(d_model, dropout)

        self.encoder = nn.Sequential(
            nn.Linear(29, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, d_model)
        )
        
        self.linear =  nn.Sequential(
            nn.Linear(d_model, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, 1)
        )

        self.linear2 = nn.Sequential(
            nn.Linear(iw, (iw+ow)//2),
            nn.ReLU(),
            nn.Linear((iw+ow)//2, ow)
        ) 

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def forward(self, src, srcmask):
        # print(src.shape)
        src = self.encoder(src)
        # print(src.shape)
        src = self.pos_encoder(src)
        # print(src.shape)
        output = self.transformer_encoder(src.transpose(0,1), srcmask).transpose(0,1)
        # print(output.shape)
        output = self.linear(output)[:,:,0]
        # print(output.shape)
        # output = self.linear2(output)
        # print(output.shape)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

def gen_attention_mask(x):
    mask = torch.eq(x, 0)
    return mask

In [16]:
X, y = next(iter(train_loader))

In [17]:
X.shape

torch.Size([64, 1, 29])

In [18]:
y.shape

torch.Size([64, 1, 1])

In [20]:
device = torch.device("cpu")
lr = 1e-4
model = TFModel(2, 5, 512, 8, 4, 0.1).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

epoch = 500
model.train()
progress = tqdm(range(epoch))
for i in progress:
    batchloss = 0.0
    for (inputs, outputs) in train_loader:
        optimizer.zero_grad()
        src_mask = model.generate_square_subsequent_mask(inputs.shape[1]).to(device)
        result = model(inputs.float().to(device),  src_mask)
        loss = criterion(result, outputs[:,:,0].float().to(device))
        loss.backward()
        optimizer.step()
        batchloss += loss
    progress.set_description("loss: {:0.6f}".format(batchloss.cpu().item() / len(train_loader)))

loss: 1575.137258:  13%|█▎        | 66/500 [08:00<52:37,  7.27s/it] 


KeyboardInterrupt: 