In [1]:
import os
import time
import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from copy import deepcopy

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable
from tqdm import tqdm_notebook
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
def load_files(path):
    
    names = glob.glob(path)
    file_list = []
    name_list = []
    for i, name in enumerate(names):
        assert len(name) == 22
        name_list.append([name[12:18]])
        sub = pd.read_csv(name)
        file_list.append(sub)
        
    return file_list, name_list

In [3]:
def scailing(data):
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace = True)
    
    df.loc[df['Close'] > 2 * df['Open'], 'Close'] = df.loc[df['Close'] > 2 * df['Open'], 'Close'] / 10
    
    X = data.iloc[:-1, 1:]
    y = data.iloc[1:, :1]

    global ms, ss, train_cnt
    
    ms = MinMaxScaler()
    ss = StandardScaler()

    X_ss = ss.fit_transform(X)
    y_ms = ms.fit_transform(y)

    ratios = [.8, .2]

    train_cnt = int(len(data) * ratios[0])
    test_cnt = int(len(data) * ratios[1])

    X_train = X_ss[:train_cnt, :]
    X_test = X_ss[train_cnt:, :]

    y_train = y_ms[:train_cnt, :]
    y_test = y_ms[train_cnt:, :]

    X_train_tensors = Variable(torch.Tensor(X_train))
    X_test_tensors = Variable(torch.Tensor(X_test))

    y_train_tensors = Variable(torch.Tensor(y_train))
    y_test_tensors = Variable(torch.Tensor(y_test))

    X_train_tensors_f = torch.reshape(X_train_tensors, (X_train_tensors.shape[0], 1, X_train_tensors.shape[1]))
    X_test_tensors_f = torch.reshape(X_test_tensors, (X_test_tensors.shape[0], 1, X_test_tensors.shape[1]))
    
    return X_train_tensors_f, X_test_tensors_f, y_train_tensors, y_test_tensors

In [4]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, 
                           num_layers = num_layers, batch_first = True)
        self.fc_1 = nn.Linear(hidden_size, 128)
        self.fc = nn.Linear(128, num_classes)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        output, (hn, cn) = self.lstm(x, (h_0, c_0))
        hn = hn.view(-1, self.hidden_size)
        out = self.relu(hn)
        out = self.fc_1(out)
        out = self.relu(out)
        out = self.fc(out)
        return out

In [5]:
class EarlyStopping:
    def __init__(self, patience=5):
        self.loss = np.inf
        self.patience = 0
        self.patience_limit = patience
        
    def step(self, loss):
        if self.loss > loss:
            self.loss = loss
            self.patience = 0
        else:
            self.patience += 1
    
    def is_stop(self):
        return self.patience >= self.patience_limit

In [6]:
def training(X_train_tensors_f, y_train_tensors):
    num_epochs = 20000
    learning_rate = 0.001

    input_size = 5
    hidden_size = 2
    num_layers = 1

    num_classes = 1
    model = LSTM(num_classes, input_size, hidden_size, num_layers, X_train_tensors_f.shape[1])

    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

    early_stop = EarlyStopping(patience=20)

    for epoch in range(num_epochs):
        outputs = model.forward(X_train_tensors_f)
        optimizer.zero_grad()
        loss = criterion(outputs, y_train_tensors)
        early_stop.step(loss.item())

        loss.backward()

        optimizer.step()

        if early_stop.is_stop():
            break

    return model, epoch

In [7]:
def predict(df, model):
    
    df_x_ss = ss.fit_transform(df.iloc[:, 1:])
    df_y_ms = ms.fit_transform(df.iloc[1:, :1])

    df_x_ss = Variable(torch.Tensor(df_x_ss))
    df_y_ms = Variable(torch.Tensor(df_y_ms))
    df_x_ss = torch.reshape(df_x_ss, (df_x_ss.shape[0], 1, df_x_ss.shape[1]))

    train_predict = model(df_x_ss)
    predicted = train_predict.data.numpy()
    label_y = df_y_ms.data.numpy()

    predicted = ms.inverse_transform(predicted)
    label_y = ms.inverse_transform(label_y)
    
    return predicted, label_y

In [8]:
def plot_chart(predictd, label_y):
    plt.figure(figsize = (20, 10))
    plt.axvline(x = train_cnt, c = 'r', linestyle = '--')

    plt.plot(predicted, label = 'Predicted Data')
    plt.plot(label_y, label = 'Actual Data')
    plt.title('Time-Series Prediction')
    plt.legend()
    plt.show()

In [9]:
def MAE(predicted, label_y):
    return int(np.mean(np.abs(predicted[:-1] - label_y)))

In [10]:
def Change(predicted, label_y):
    return int(predicted[-1] - predicted[-2])

In [11]:
def save_values(model, predicted, epoch, mae, change):
    PATH = f"../../Model/{name_list[i][0]}_{epoch}.pth"
    torch.save(model.state_dict(), PATH)
    pd.DataFrame(predicted).to_csv(f"../../Predict/MAE/{mae}_{name_list[i][0]}.csv")
    pd.DataFrame(predicted).to_csv(f"../../Predict/Change/{change}_{name_list[i][0]}.csv")

In [37]:
file_list, name_list = load_files('../../Data2/*.csv')

i = 769

for file in file_list:
    df = file_list[i]
    X_train_tensors_f, X_test_tensors_f, y_train_tensors, y_test_tensors = scailing(df)
    model, epoch = training(X_train_tensors_f, y_train_tensors)
    predicted, label_y = predict(df, model)
    #plot_chart(predicted, label_y)
    mae = MAE(predicted, label_y)
    change = Change(predicted, label_y)
    save_values(model, predicted, epoch, mae, change)
    print(i)
    i += 1

IndexError: list index out of range