In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm 

from sklearn.preprocessing import MinMaxScaler



In [2]:
df = pd.read_csv('concat.csv', index_col=0)

In [3]:
df.drop(df.columns[-5:], axis = 1, inplace=True)

In [4]:
df.shape

(11044005, 8)

데이터 갯수 조건에 맞지 않는 Ticker 선별

1. 데이터 갯수가 4년치 미만인 Ticker는 제거

In [5]:
del_ticker_list = df['Ticker'].value_counts()[df['Ticker'].value_counts()<=252*4].index

In [6]:
del_ticker_index = df[df['Ticker'].isin(del_ticker_list)].index

In [7]:
df_new = df.drop(del_ticker_index) 

In [8]:
df_new.shape

(10221509, 8)

In [9]:
# 강사님의 윈도우 코드 복붙(이건 우리 입맛에 나중에 바꿔도 상관없음)
def my_window_data(df, target, window_size=20):
    X_list = []
    y_list = []
    for i in range(len(df) - window_size-1):
        X = df.iloc[i:i+window_size]
        y = df.iloc[i+window_size][target]
        X_list.append(np.array(X))      #[[] ,[] ,[] ,[]]
        y_list.append(np.array(y))       #[]   
    return  np.array(X_list), np.array(y_list).reshape(-1)

In [10]:
def per_ticker_tt_split(df, target,  size = 0.7, window_size=20):
    Ticker_train_dict = {}
    Ticker_test_dict = {}
    for t in tqdm(df.Ticker.unique()[:10]):
        scaler = MinMaxScaler()
        train_size = df[df['Ticker']==t].iloc[:int(len(df[df.Ticker==t])*size)].drop('Ticker', axis = 1)
        train_size['Datetime'] = pd.to_datetime(train_size['Datetime'])
        train_size.set_index('Datetime', inplace = True)
        scaler.fit(train_size)
        train_size = pd.DataFrame(columns = train_size.columns, data = scaler.transform(train_size))
        test_size = df[df['Ticker']==t].iloc[int(len(df[df.Ticker==t])*size):].drop('Ticker', axis = 1)
        test_size['Datetime'] = pd.to_datetime(test_size['Datetime'])
        test_size.set_index('Datetime', inplace = True)
        test_size = pd.DataFrame(columns = test_size.columns, data = scaler.transform(test_size))

        X_train, y_train = my_window_data(train_size, target, window_size)
        X_test , y_test  = my_window_data(test_size, target, window_size)
        Ticker_train_dict[f'{t}_X_train'], Ticker_train_dict[f'{t}_y_train'] = X_train, y_train
        Ticker_test_dict[f'{t}_X_test'], Ticker_test_dict[f'{t}_y_test'] = X_test , y_test
    return Ticker_train_dict, Ticker_test_dict

In [11]:
train_dict, test_dict = per_ticker_tt_split(df = df_new, target='Adj Close')

100%|██████████| 10/10 [00:13<00:00,  1.38s/it]


In [12]:
import torch
from torch.utils.data import Dataset

class StockDataset(Dataset):
    def __init__(self, Ticker, mode = 'train'):
        self.Ticker = Ticker
        if mode == 'train':
            self.X = torch.tensor(train_dict[f'{Ticker}_X_{mode}']).float()
            self.y = torch.tensor(train_dict[f'{Ticker}_y_{mode}']).float()
        elif mode == 'test':
            self.X = torch.tensor(test_dict[f'{Ticker}_X_{mode}']).float()
            self.y = torch.tensor(test_dict[f'{Ticker}_y_{mode}']).float()
    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, i): 
        return self.X[i], self.y[i]

In [13]:
from torch import nn
class Net(nn.Module):
    # # 기본변수, layer를 초기화해주는 생성자
    def __init__(self, input_dim, hidden_dim, seq_len, output_dim, layers):
        super(Net, self).__init__()
        self.hidden_dim = hidden_dim
        self.seq_len = seq_len
        self.output_dim = output_dim
        self.layers = layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=layers,
                            # dropout = 0.1,
                            batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim, bias = True) 
        
    # 학습 초기화를 위한 함수
    def reset_hidden_state(self): 
        self.hidden = (
                torch.zeros(self.layers, self.seq_len, self.hidden_dim),
                torch.zeros(self.layers, self.seq_len, self.hidden_dim))
    
    # 예측을 위한 함수
    def forward(self, x):
        x, _status = self.lstm(x)
        x = self.fc(x[:, -1])
        return x

In [14]:
feature_num = 6
hidden_dim = 10 
output_dim = 1 
device = 'cpu'

In [15]:
import torch
from torch.utils.data import DataLoader # 데이터로더


In [16]:
def training(epoch, model, train_loader ):
    running_loss = 0

    model.train()
    for idx, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader. dataset)
    return epoch_loss
    
def testing(epoch, model,valid_loader):
    valid_running_loss = 0
    model.eval()
    with torch.no_grad():
        for idx, (X, y) in enumerate(valid_loader):
             X, y = X.to(device), y.to(device)
             y_pred = model(X)
             loss = loss_fn(y_pred, y)
             valid_running_loss += loss.item()
    epoch_valid_loss = valid_running_loss / len(valid_loader.dataset)
    # print(f'epoch:{epoch},\nloss:{round(epoch_loss, 3)},valid_loss:{round(epoch_valid_loss, 3)}')
    return epoch_valid_loss

In [17]:
model = Net(feature_num, hidden_dim, 20, output_dim, 1).to(device)  

In [18]:
ticker_list = list(df_new['Ticker'].unique())

In [19]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

In [20]:
epochs = 100
pbar = tqdm(range(epochs))
total_loss = []
total_val_loss = []
for epoch in pbar:
        loss_list = []
        val_loss_list = []
        for ticker in ticker_list[:10]:
                train_dataset = StockDataset(ticker, mode = 'train')
                valid_dataset = StockDataset(ticker, mode = 'test')
                batch = 20
                loss_fn = nn.MSELoss().to(device)
                train_loader = DataLoader(train_dataset,
                                batch_size = batch,
                                shuffle = False,  
                                drop_last = True)
                valid_loader = DataLoader(valid_dataset,
                                batch_size = batch,
                                shuffle = False,  
                                drop_last = True) 
                
                loss = training(epoch, model, train_loader)
                valid_loss = testing(epoch, model, valid_loader)
                loss_list.append(loss)
                val_loss_list.append(valid_loss)
        print(f'Epoch{str(epoch).zfill(3)}\t\t loss : {round((np.mean(loss_list)), 3)}, valid loss : {round(np.mean(val_loss_list), 3)}')
        total_loss.append(np.mean(loss_list))
        total_val_loss.append(np.mean(val_loss_list))

  return F.mse_loss(input, target, reduction=self.reduction)
  1%|          | 1/100 [00:04<07:40,  4.66s/it]

Epoch000		 loss : 0.001, valid loss : 0.09


  2%|▏         | 2/100 [00:09<07:31,  4.60s/it]

Epoch001		 loss : 0.0, valid loss : 0.069


  3%|▎         | 3/100 [00:13<07:19,  4.53s/it]

Epoch002		 loss : 0.0, valid loss : 0.064


  4%|▍         | 4/100 [00:18<07:09,  4.48s/it]

Epoch003		 loss : 0.0, valid loss : 0.062


  5%|▌         | 5/100 [00:22<07:02,  4.45s/it]

Epoch004		 loss : 0.0, valid loss : 0.061


  6%|▌         | 6/100 [00:26<06:56,  4.43s/it]

Epoch005		 loss : 0.0, valid loss : 0.06


  7%|▋         | 7/100 [00:31<06:51,  4.42s/it]

Epoch006		 loss : 0.0, valid loss : 0.06


  8%|▊         | 8/100 [00:35<06:46,  4.42s/it]

Epoch007		 loss : 0.0, valid loss : 0.059


  9%|▉         | 9/100 [00:40<06:41,  4.41s/it]

Epoch008		 loss : 0.0, valid loss : 0.059


 10%|█         | 10/100 [00:44<06:36,  4.40s/it]

Epoch009		 loss : 0.0, valid loss : 0.058


 11%|█         | 11/100 [00:48<06:31,  4.40s/it]

Epoch010		 loss : 0.0, valid loss : 0.058


 12%|█▏        | 12/100 [00:53<06:27,  4.40s/it]

Epoch011		 loss : 0.0, valid loss : 0.058


 13%|█▎        | 13/100 [00:57<06:22,  4.39s/it]

Epoch012		 loss : 0.0, valid loss : 0.057


 14%|█▍        | 14/100 [01:02<06:17,  4.39s/it]

Epoch013		 loss : 0.0, valid loss : 0.057


 15%|█▌        | 15/100 [01:06<06:13,  4.39s/it]

Epoch014		 loss : 0.0, valid loss : 0.057


 16%|█▌        | 16/100 [01:10<06:10,  4.41s/it]

Epoch015		 loss : 0.0, valid loss : 0.057


 17%|█▋        | 17/100 [01:15<06:05,  4.41s/it]

Epoch016		 loss : 0.0, valid loss : 0.056


 18%|█▊        | 18/100 [01:19<06:01,  4.40s/it]

Epoch017		 loss : 0.0, valid loss : 0.056


 19%|█▉        | 19/100 [01:24<05:56,  4.40s/it]

Epoch018		 loss : 0.0, valid loss : 0.056


 20%|██        | 20/100 [01:28<05:51,  4.40s/it]

Epoch019		 loss : 0.0, valid loss : 0.056


 21%|██        | 21/100 [01:32<05:47,  4.40s/it]

Epoch020		 loss : 0.0, valid loss : 0.055


 22%|██▏       | 22/100 [01:37<05:42,  4.39s/it]

Epoch021		 loss : 0.0, valid loss : 0.055


 23%|██▎       | 23/100 [01:41<05:37,  4.39s/it]

Epoch022		 loss : 0.0, valid loss : 0.055


 24%|██▍       | 24/100 [01:45<05:33,  4.39s/it]

Epoch023		 loss : 0.0, valid loss : 0.055


 25%|██▌       | 25/100 [01:50<05:28,  4.39s/it]

Epoch024		 loss : 0.0, valid loss : 0.055


 26%|██▌       | 26/100 [01:54<05:24,  4.39s/it]

Epoch025		 loss : 0.0, valid loss : 0.055


 27%|██▋       | 27/100 [01:59<05:20,  4.38s/it]

Epoch026		 loss : 0.0, valid loss : 0.054


 28%|██▊       | 28/100 [02:03<05:15,  4.38s/it]

Epoch027		 loss : 0.0, valid loss : 0.054


 29%|██▉       | 29/100 [02:07<05:11,  4.39s/it]

Epoch028		 loss : 0.0, valid loss : 0.054


 30%|███       | 30/100 [02:12<05:06,  4.38s/it]

Epoch029		 loss : 0.0, valid loss : 0.054


 31%|███       | 31/100 [02:16<05:02,  4.38s/it]

Epoch030		 loss : 0.0, valid loss : 0.054


 32%|███▏      | 32/100 [02:21<04:58,  4.38s/it]

Epoch031		 loss : 0.0, valid loss : 0.054


 33%|███▎      | 33/100 [02:25<04:53,  4.38s/it]

Epoch032		 loss : 0.0, valid loss : 0.054


 34%|███▍      | 34/100 [02:29<04:48,  4.38s/it]

Epoch033		 loss : 0.0, valid loss : 0.054


 35%|███▌      | 35/100 [02:34<04:44,  4.38s/it]

Epoch034		 loss : 0.0, valid loss : 0.053


 36%|███▌      | 36/100 [02:38<04:40,  4.38s/it]

Epoch035		 loss : 0.0, valid loss : 0.053


 37%|███▋      | 37/100 [02:42<04:35,  4.38s/it]

Epoch036		 loss : 0.0, valid loss : 0.053


 38%|███▊      | 38/100 [02:47<04:31,  4.38s/it]

Epoch037		 loss : 0.0, valid loss : 0.053


 39%|███▉      | 39/100 [02:51<04:27,  4.38s/it]

Epoch038		 loss : 0.0, valid loss : 0.053


 40%|████      | 40/100 [02:56<04:22,  4.38s/it]

Epoch039		 loss : 0.0, valid loss : 0.053


 41%|████      | 41/100 [03:00<04:18,  4.38s/it]

Epoch040		 loss : 0.0, valid loss : 0.052


 42%|████▏     | 42/100 [03:04<04:13,  4.38s/it]

Epoch041		 loss : 0.0, valid loss : 0.052


 43%|████▎     | 43/100 [03:09<04:09,  4.37s/it]

Epoch042		 loss : 0.0, valid loss : 0.052


 44%|████▍     | 44/100 [03:13<04:05,  4.38s/it]

Epoch043		 loss : 0.0, valid loss : 0.052


 45%|████▌     | 45/100 [03:17<04:00,  4.38s/it]

Epoch044		 loss : 0.0, valid loss : 0.052


 46%|████▌     | 46/100 [03:22<03:56,  4.38s/it]

Epoch045		 loss : 0.0, valid loss : 0.051


 47%|████▋     | 47/100 [03:26<03:52,  4.38s/it]

Epoch046		 loss : 0.0, valid loss : 0.051


 48%|████▊     | 48/100 [03:31<03:49,  4.40s/it]

Epoch047		 loss : 0.0, valid loss : 0.051


 49%|████▉     | 49/100 [03:37<04:16,  5.03s/it]

Epoch048		 loss : 0.0, valid loss : 0.051


 50%|█████     | 50/100 [03:43<04:22,  5.26s/it]

Epoch049		 loss : 0.0, valid loss : 0.051


 51%|█████     | 51/100 [09:08<1:22:41, 101.27s/it]

Epoch050		 loss : 0.0, valid loss : 0.05


 52%|█████▏    | 52/100 [09:14<58:04, 72.59s/it]   

Epoch051		 loss : 0.0, valid loss : 0.05


 53%|█████▎    | 53/100 [11:31<1:11:55, 91.81s/it]

Epoch052		 loss : 0.0, valid loss : 0.05


 54%|█████▍    | 54/100 [12:29<1:02:39, 81.73s/it]

Epoch053		 loss : 0.0, valid loss : 0.05


 55%|█████▌    | 55/100 [12:34<44:10, 58.91s/it]  

Epoch054		 loss : 0.0, valid loss : 0.05


 56%|█████▌    | 56/100 [14:20<53:29, 72.95s/it]

Epoch055		 loss : 0.0, valid loss : 0.049


 57%|█████▋    | 57/100 [15:19<49:13, 68.68s/it]

Epoch056		 loss : 0.0, valid loss : 0.049


 58%|█████▊    | 58/100 [33:03<4:17:01, 367.19s/it]

Epoch057		 loss : 0.0, valid loss : 0.049


 59%|█████▉    | 59/100 [49:09<6:13:48, 547.04s/it]

Epoch058		 loss : 0.0, valid loss : 0.049


 60%|██████    | 60/100 [49:15<4:16:23, 384.59s/it]

Epoch059		 loss : 0.0, valid loss : 0.049


 61%|██████    | 61/100 [1:04:39<5:55:07, 546.34s/it]

Epoch060		 loss : 0.0, valid loss : 0.049


 62%|██████▏   | 62/100 [1:21:56<7:19:22, 693.76s/it]

Epoch061		 loss : 0.0, valid loss : 0.049


 63%|██████▎   | 63/100 [1:39:51<8:18:11, 807.89s/it]

Epoch062		 loss : 0.0, valid loss : 0.049


 64%|██████▍   | 64/100 [1:39:56<5:40:18, 567.18s/it]

Epoch063		 loss : 0.0, valid loss : 0.048


 65%|██████▌   | 65/100 [1:40:02<3:52:33, 398.68s/it]

Epoch064		 loss : 0.0, valid loss : 0.048


 66%|██████▌   | 66/100 [1:57:13<5:33:26, 588.42s/it]

Epoch065		 loss : 0.0, valid loss : 0.048


 67%|██████▋   | 67/100 [2:14:32<6:37:57, 723.55s/it]

Epoch066		 loss : 0.0, valid loss : 0.048


 68%|██████▊   | 68/100 [2:16:40<4:50:39, 544.98s/it]

Epoch067		 loss : 0.0, valid loss : 0.048


 69%|██████▉   | 69/100 [2:16:46<3:18:00, 383.25s/it]

Epoch068		 loss : 0.0, valid loss : 0.048


 70%|███████   | 70/100 [2:16:52<2:15:00, 270.03s/it]

Epoch069		 loss : 0.0, valid loss : 0.048


 71%|███████   | 71/100 [2:16:57<1:32:10, 190.71s/it]

Epoch070		 loss : 0.0, valid loss : 0.048


 72%|███████▏  | 72/100 [2:17:03<1:03:06, 135.23s/it]

Epoch071		 loss : 0.0, valid loss : 0.048


 73%|███████▎  | 73/100 [2:17:09<43:22, 96.38s/it]   

Epoch072		 loss : 0.0, valid loss : 0.048


 74%|███████▍  | 74/100 [2:17:14<29:57, 69.15s/it]

Epoch073		 loss : 0.0, valid loss : 0.048


 75%|███████▌  | 75/100 [2:17:20<20:52, 50.08s/it]

Epoch074		 loss : 0.0, valid loss : 0.047


 76%|███████▌  | 76/100 [2:17:26<14:41, 36.74s/it]

Epoch075		 loss : 0.0, valid loss : 0.047


 77%|███████▋  | 77/100 [2:17:31<10:30, 27.40s/it]

Epoch076		 loss : 0.0, valid loss : 0.047


 78%|███████▊  | 78/100 [2:17:37<07:39, 20.87s/it]

Epoch077		 loss : 0.0, valid loss : 0.047


 79%|███████▉  | 79/100 [2:17:42<05:42, 16.29s/it]

Epoch078		 loss : 0.0, valid loss : 0.047


 80%|████████  | 80/100 [2:17:48<04:21, 13.09s/it]

Epoch079		 loss : 0.0, valid loss : 0.047


 81%|████████  | 81/100 [2:17:54<03:25, 10.84s/it]

Epoch080		 loss : 0.0, valid loss : 0.047


 82%|████████▏ | 82/100 [2:17:59<02:46,  9.27s/it]

Epoch081		 loss : 0.0, valid loss : 0.047


 83%|████████▎ | 83/100 [2:18:05<02:18,  8.17s/it]

Epoch082		 loss : 0.0, valid loss : 0.047


 84%|████████▍ | 84/100 [2:18:10<01:58,  7.40s/it]

Epoch083		 loss : 0.0, valid loss : 0.047


 85%|████████▌ | 85/100 [2:18:16<01:42,  6.86s/it]

Epoch084		 loss : 0.0, valid loss : 0.047


 86%|████████▌ | 86/100 [2:18:22<01:30,  6.48s/it]

Epoch085		 loss : 0.0, valid loss : 0.047


 87%|████████▋ | 87/100 [2:18:27<01:20,  6.22s/it]

Epoch086		 loss : 0.0, valid loss : 0.047


 88%|████████▊ | 88/100 [2:18:33<01:12,  6.04s/it]

Epoch087		 loss : 0.0, valid loss : 0.047


 89%|████████▉ | 89/100 [2:18:38<01:05,  5.91s/it]

Epoch088		 loss : 0.0, valid loss : 0.047


 90%|█████████ | 90/100 [2:18:44<00:58,  5.84s/it]

Epoch089		 loss : 0.0, valid loss : 0.047


 91%|█████████ | 91/100 [2:18:50<00:51,  5.77s/it]

Epoch090		 loss : 0.0, valid loss : 0.047


 92%|█████████▏| 92/100 [2:18:55<00:45,  5.72s/it]

Epoch091		 loss : 0.0, valid loss : 0.047


 93%|█████████▎| 93/100 [2:19:01<00:39,  5.69s/it]

Epoch092		 loss : 0.0, valid loss : 0.046


 94%|█████████▍| 94/100 [2:19:07<00:33,  5.66s/it]

Epoch093		 loss : 0.0, valid loss : 0.046


 95%|█████████▌| 95/100 [2:19:12<00:28,  5.64s/it]

Epoch094		 loss : 0.0, valid loss : 0.046


 96%|█████████▌| 96/100 [2:19:18<00:22,  5.63s/it]

Epoch095		 loss : 0.0, valid loss : 0.046


 97%|█████████▋| 97/100 [2:19:23<00:16,  5.63s/it]

Epoch096		 loss : 0.0, valid loss : 0.046


 98%|█████████▊| 98/100 [2:19:29<00:11,  5.62s/it]

Epoch097		 loss : 0.0, valid loss : 0.046


 99%|█████████▉| 99/100 [2:19:35<00:05,  5.62s/it]

Epoch098		 loss : 0.0, valid loss : 0.046


100%|██████████| 100/100 [2:19:40<00:00, 83.81s/it]

Epoch099		 loss : 0.0, valid loss : 0.046



