In [39]:
import numpy as np
import pandas as pd
import tensorflow as tf

## 데이터 업로드

In [88]:
kospi50 = pd.read_csv("./data/preprocessed_kospi50_daily_stock_chart.csv")
kospi50

Unnamed: 0,code,name,date,open,high,low,close,z_d5,z_d10,z_d15,z_d20,z_d25,z_d30
0,U182,kospi50_index,20000302,-0.026858,0.008017,-0.049241,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,U182,kospi50_index,20000303,0.013774,0.015986,-0.018534,0.006107,-0.003035,-0.003035,-0.003035,-0.003035,-0.003035,-0.003035
2,U182,kospi50_index,20000306,0.009723,0.015594,-0.006075,0.005048,-0.005362,-0.005362,-0.005362,-0.005362,-0.005362,-0.005362
3,U182,kospi50_index,20000307,-0.002268,0.000024,-0.019956,-0.003408,-0.001470,-0.001470,-0.001470,-0.001470,-0.001470,-0.001470
4,U182,kospi50_index,20000308,-0.024744,0.003897,-0.039527,0.013068,-0.011480,-0.011480,-0.011480,-0.011480,-0.011480,-0.011480
...,...,...,...,...,...,...,...,...,...,...,...,...,...
276194,A005380,현대차,20210831,-0.011765,0.000000,-0.021176,0.011905,-0.011294,-0.019059,-0.005176,0.008471,0.015247,0.023843
276195,A005380,현대차,20210901,-0.004651,0.016279,-0.006977,0.011765,-0.017674,-0.029302,-0.017984,-0.005233,0.001581,0.010078
276196,A005380,현대차,20210902,0.011820,0.016548,-0.007092,-0.016279,0.000000,-0.010875,-0.003940,0.008038,0.016076,0.024192
276197,A005380,현대차,20210903,-0.006993,0.004662,-0.011655,0.014184,-0.008392,-0.018648,-0.019270,-0.008042,0.000466,0.007692


## PreProcessing - raw 데이터 입력형태 맞추기

In [109]:
def get_input_data(df):
    codes = df.code.unique()
    for cd in codes[:2]:
        df_stock = df[df.code == cd]
        df_stock.drop(columns=['code'], inplace=True)
        df_stock.drop(columns=['name'], inplace=True)
        df_stock.drop(columns=['date'], inplace=True)
        df_stock = df_stock.values
        
        seq_len = 10
        X_data, y_data = [], []
        for i in range(seq_len, len(df_stock)):
            X_data.append(df_stock[i-seq_len:i])
            y_data.append(df_stock[:, 3][i]) # close price
        X_data, y_data = np.array(X_data), np.array(y_data)
        
    X_data = X_data.reshape(-1, 1, seq_len, 10)

    return X_data, y_data
        
        
U182_x, U182_y = get_input_data(kospi50[kospi50.code == "U182"])
A030200_x, A030200_y = get_input_data(kospi50[kospi50.code == "A030200"])
A033780_x, A033780_y = get_input_data(kospi50[kospi50.code == "A033780"])
print(U182_x.shape, U182_y.shape)
print(A030200_x.shape, A030200_y.shape)
print(A033780_x.shape, A033780_y.shape)

(5304, 1, 10, 10) (5304,)
(5596, 1, 10, 10) (5596,)
(5402, 1, 10, 10) (5402,)


In [110]:
np.concatenate([A033780_x, A030200_x], axis=1)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 5402 and the array at index 1 has size 5596

In [None]:
print('Training set shape', X_data.shape, y_train_mi.shape)


### Market index

Training set shape (5294, 20, 10) (5294,)


In [34]:

test.shape

(5294, 1, 20, 10)

In [65]:
test1 = np.concatenate([test, test], axis=1)
test1.shape

(5294, 2, 20, 10)

### each stock

In [35]:
from tensorflow.python.keras.layers import Layer, Dense, LSTM

class AttentionLSTM(Layer):
    def __init__(self, units, ft_dim):
        super(AttentionLSTM, self).__init__()
        self.units = units
        self.ft_dim = ft_dim

    def build(self, input_shape):
        self.feature_trans_weight = Dense(self.ft_dim,
                                          input_shape=input_shape,
                                          kernel_initializer='glorot_uniform',
                                          bias_initializer='glorot_uniform',
                                          activation='tanh')

        self.lstm = LSTM(self.units, return_sequences=True, return_state=False)

    def call(self, inputs, *args, **kwargs):
        # inputs: (B, N, T, V)
        # B: batch_size, N: number of stocks, T: sequence length, V: number of features
        feature = tf.reshape(inputs, shape=(-1, inputs.shape[2], inputs.shape[3]))  # (B*N, T, V)
        feature = self.feature_trans_weight(feature) # (B*N, T, ft_dim)
        feature = self.lstm(feature) # (B*N, T, units)
        attn = tf.matmul(tf.expand_dims(feature[:,-1,:], axis=1), feature, transpose_b=True) # (B*N, 1, T)
        attn = tf.nn.softmax(attn, axis=-1) # (B*N, 1, T)
        attn = tf.reshape(attn, shape=(inputs.shape[0], inputs.shape[1], -1, 1)) # (B, N, T, 1)

        feature = tf.reshape(feature, shape=(inputs.shape[0], inputs.shape[1], feature.shape[1], feature.shape[2])) # (B, N, T, units)
        context = tf.math.reduce_sum(attn*feature, axis=2) # (B, N, units)
        return context

class ContextNormalize(Layer):
    def __init__(self):
        super(ContextNormalize, self).__init__()

    def build(self, input_shape):
        self.norm_weight = self.add_weight(name='norm_weight',
                                           shape=input_shape,
                                           initializer='uniform',
                                           trainable=True)
        self.norm_bias = self.add_weight(name='norm_bias',
                                         shape=input_shape,
                                         initializer='uniform',
                                         trainable=True)

    def call(self, context, *args, **kwargs):
        # context: (B, N, units)
        context = (context - tf.math.reduce_mean(context))/tf.math.reduce_std(context) # (B, N, units)
        context = self.norm_weight * context + self.norm_bias
        return context

In [72]:
attn1 = AttentionLSTM(72,64)(test)
attn1.shape

TensorShape([5294, 3, 72])

In [111]:
U182_x = AttentionLSTM(72,64)(U182_x)
A030200_x = AttentionLSTM(72,64)(A030200_x)
A033780_x = AttentionLSTM(72,64)(A033780_x)
print(U182_x.shape, U182_y.shape)
print(A030200_x.shape, A030200_y.shape)
print(A033780_x.shape, A033780_y.shape)

(5304, 1, 72) (5304,)
(5596, 1, 72) (5596,)
(5402, 1, 72) (5402,)
