In [2]:
import FinanceDataReader as fdr
import torch
from torch import nn
import numpy as np
import time
from sklearn.preprocessing import MinMaxScaler

In [3]:
from easydict import EasyDict as edict

args = edict()
args.gpu = True
args.input_dim = 1
args.hidden_dim = 32
args.num_layers = 2
args.output_dim = 4
args.num_epochs = 100

device = 'cuda' if torch.cuda.is_available() and args.gpu else 'cpu'

In [4]:
company_name = '삼성전자'

In [5]:
df_kospi = fdr.StockListing('KOSPI')
ticker = str(df_kospi.loc[df_kospi['Name'] == company_name]['Symbol'].values[0])

In [6]:
mycompany = fdr.DataReader(ticker)
mycompany

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1997-11-08,761,809,761,810,320624,
1997-11-10,809,872,809,870,376400,0.074074
1997-11-11,857,888,823,836,314560,-0.039080
1997-11-12,831,831,791,804,304710,-0.038278
1997-11-13,822,836,798,799,273800,-0.006219
...,...,...,...,...,...,...
2021-12-06,75100,76700,74900,76300,16391250,0.009259
2021-12-07,76100,77700,75600,77400,19232453,0.014417
2021-12-08,78300,78600,77100,77400,21558340,0.000000
2021-12-09,77400,78200,77000,78200,21604528,0.010336


In [7]:
def classifyFluctuation(fluctuation):
    if fluctuation < -2.5:
        return 0
    elif fluctuation < 0:
        return 1
    elif fluctuation < 2.5:
        return 2
    else:
        return 3

In [8]:
price = mycompany[['Close']]
price['Fluctuation'] = price['Close'].div(price['Close'].shift(1)).apply(lambda x : classifyFluctuation((x - 1) * 100))

scaler = MinMaxScaler(feature_range=(-1, 1))
price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1,1))
price

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price['Fluctuation'] = price['Close'].div(price['Close'].shift(1)).apply(lambda x : classifyFluctuation((x - 1) * 100))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1,1))


Unnamed: 0_level_0,Close,Fluctuation
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1997-11-08,-0.995950,3
1997-11-10,-0.994622,3
1997-11-11,-0.995375,0
1997-11-12,-0.996083,0
1997-11-13,-0.996194,1
...,...,...
2021-12-06,0.674682,2
2021-12-07,0.699025,2
2021-12-08,0.699025,2
2021-12-09,0.716730,2


In [9]:
def split_data(stock, lookback):
    data_raw = stock.to_numpy() # convert to numpy array
    data = []
    
    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    
    data = np.array(data)
    val_and_test_set_size = int(np.round(0.125 * data.shape[0]))
    train_set_size = data.shape[0] - 2 * (val_and_test_set_size)
   
    x_train = data[:train_set_size,:-1,:1]
    y_train = data[:train_set_size,-1,1:].squeeze()

    x_val = data[train_set_size:train_set_size + val_and_test_set_size,:-1,:1]
    y_val = data[train_set_size:train_set_size + val_and_test_set_size,-1,1:].squeeze()
    
    x_test = data[train_set_size + val_and_test_set_size:,:-1,:1]
    y_test = data[train_set_size + val_and_test_set_size:,-1,1:].squeeze()
    
    return [x_train, y_train, x_val, y_val, x_test, y_test]

lookback = 20 # choose sequence length
x_train, y_train, x_val, y_val, x_test, y_test = split_data(price, lookback)
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ',y_train.shape)
print('x_val.shape = ',x_val.shape)
print('y_val.shape = ',y_val.shape)
print('x_test.shape = ',x_test.shape)
print('y_test.shape = ',y_test.shape)

x_train.shape =  (4484, 19, 1)
y_train.shape =  (4484,)
x_val.shape =  (748, 19, 1)
y_val.shape =  (748,)
x_test.shape =  (748, 19, 1)
y_test.shape =  (748,)


In [10]:
x_train = torch.from_numpy(x_train).type(torch.Tensor)
x_val = torch.from_numpy(x_val).type(torch.Tensor)
x_test = torch.from_numpy(x_test).type(torch.Tensor)
y_train_lstm = torch.from_numpy(y_train).type(torch.Tensor).type(torch.LongTensor)
y_val_lstm = torch.from_numpy(y_val).type(torch.Tensor).type(torch.LongTensor)
y_test_lstm = torch.from_numpy(y_test).type(torch.Tensor).type(torch.LongTensor)

In [17]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :])
        return out

In [18]:
model = LSTM(input_dim=args.input_dim, hidden_dim=args.hidden_dim, output_dim=args.output_dim, num_layers=args.num_layers)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [20]:
hist = np.zeros(args.num_epochs)
start_time = time.time()
lstm = []
for t in range(args.num_epochs):
    model.train()
    optimizer.zero_grad()
    y_train_pred = model(x_train)
    loss = criterion(y_train_pred, y_train_lstm)
    hist[t] = loss.item()
    loss.backward()
    optimizer.step()
    pred = torch.argmax(y_train_pred.softmax(dim=1), dim=1)
    correct = pred.eq(y_train_lstm)
    print('[Epoch {}/{}] -> Train Loss: {:.4f}, Accuracy: {:.3f}'.format(t + 1, args.num_epochs, loss.item(), correct.sum().item() / y_train_lstm.size(0)))

    with torch.no_grad():
        model.eval()
        y_val_pred = model(x_val)
        loss = criterion(y_val_pred, y_val_lstm)
        pred = torch.argmax(y_val_pred.softmax(dim=1), dim=1)
        correct = pred.eq(y_val_lstm)
        print('[Epoch {}/{}] -> Validation Loss: {:.4f}, Accuracy: {:.3f}'.format(t + 1, args.num_epochs, loss.item(), correct.sum().item() / y_val_lstm.size(0)))
    
training_time = time.time()-start_time
print("Training time: {}".format(training_time))

[Epoch 1/100] -> Train Loss: 1.3230, Accuracy: 0.379
[Epoch 1/100] -> Validation Loss: 1.2202, Accuracy: 0.496
[Epoch 2/100] -> Train Loss: 1.2985, Accuracy: 0.379
[Epoch 2/100] -> Validation Loss: 1.1521, Accuracy: 0.496
[Epoch 3/100] -> Train Loss: 1.2778, Accuracy: 0.379
[Epoch 3/100] -> Validation Loss: 1.0888, Accuracy: 0.436
[Epoch 4/100] -> Train Loss: 1.2942, Accuracy: 0.355
[Epoch 4/100] -> Validation Loss: 1.1008, Accuracy: 0.496
[Epoch 5/100] -> Train Loss: 1.2784, Accuracy: 0.379
[Epoch 5/100] -> Validation Loss: 1.1271, Accuracy: 0.496
[Epoch 6/100] -> Train Loss: 1.2713, Accuracy: 0.379
[Epoch 6/100] -> Validation Loss: 1.1494, Accuracy: 0.496
[Epoch 7/100] -> Train Loss: 1.2739, Accuracy: 0.379
[Epoch 7/100] -> Validation Loss: 1.1578, Accuracy: 0.496
[Epoch 8/100] -> Train Loss: 1.2761, Accuracy: 0.379
[Epoch 8/100] -> Validation Loss: 1.1544, Accuracy: 0.496
[Epoch 9/100] -> Train Loss: 1.2753, Accuracy: 0.379
[Epoch 9/100] -> Validation Loss: 1.1440, Accuracy: 0.496
[

In [21]:
with torch.no_grad():
    model.eval()
    y_test_pred = model(x_test)
    pred = torch.argmax(y_test_pred.softmax(dim=1), dim=1)
    correct = pred.eq(y_test_lstm)
    print('Test Accuracy: {:.3f}'.format(correct.sum().item() / y_test_lstm.size(0)))


Test Accuracy: 0.475
