In [55]:
import numpy as np
import csv
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers.wrappers import TimeDistributed
from keras.layers import Conv2D, Dense, Dropout, Activation, Flatten, MaxPooling2D, LSTM
from keras.optimizers import SGD

In [2]:
SET50 = ['ADVANC', 'AOT', 'BANPU', 'BBL', 'BCP', 'BDMS',
        'BEM', 'BH', 'BJC', 'BTS', 'CENTEL', 'CPALL', 
         'CPF', 'CPN', 'DTAC', 'EGCO', 'HMPRO', 
         'INTUCH', 'IRPC', 'KBANK', 'KCE', 'KKP', 'KTB', 'LH', 'MINT', 'PTT',
        'PTTEP', 'ROBINS', 'SCB', 'SCC', 
        'TCAP', 'TISCO', 'TMB', 'TOP', 'TRUE', 'TU']

In [82]:
lr = 0.1
epochs = 5
batch_size = 1
optimizer = SGD(lr=lr)
activator = 'tanh'
loss_method = 'mse'
metrics = ['mae', 'accuracy']

In [4]:
def load_stock_data(symbol, year_start, year_end):
    stock_data = []
    with open('../Data set/FIXED_SET50/' + symbol + '.BK.csv', 'r') as csv_file:
        file_data = csv.reader(csv_file, delimiter=',')
        file_data = list(file_data)[1:]
        temp_value = 0
        for row in file_data:
            if row[1] is '':
                continue
            elif int(row[0][0:4]) >= year_start and int(row[0][0:4]) <= year_end:
                temp = float(row[1])
                if temp_value != 0:
                    unrealize = (temp - temp_value)/ temp_value
                else:
                    unrealize = 0
                unrealize = ["{0:.2f}".format(unrealize)]
                stock_data.append(unrealize)
                temp_value = temp
                
    return stock_data

In [5]:
stock_datas = [ load_stock_data(x, 2008, 2017) for x in SET50 ]

min_count = len(stock_datas[0])

In [77]:
def create_cnn():
    model = Sequential()
    model.add(Conv2D(64, (7, 1), input_shape=(len(stock_datas), 30, 1), padding='same'))
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size = (2, 1), data_format= "channels_last"))
    model.add(Flatten())
    model.add(Dense(len(stock_datas)))
    return model

def create_model():
    cnn = create_cnn()
    model = Sequential()
    model.add(TimeDistributed(cnn, input_shape=(1, len(stock_datas), 30, 1)))
    model.add(LSTM(512, return_sequences=True))
    model.add(Activation('tanh'))
    model.add(Dropout(0.2))
    model.add(LSTM(512, return_sequences=True))
    model.add(Activation('tanh'))
    model.add(Dropout(0.2))
    model.add(LSTM(512, return_sequences=True))
    model.add(Activation('tanh'))
    model.add(Dropout(0.2))
    model.add(Dense(len(stock_datas)))
    model.summary()
    return model

In [78]:
model = create_model()
model.compile(loss=loss_method, optimizer=optimizer, metrics=metrics)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_33 (TimeDis (None, 1, 36)             1244708   
_________________________________________________________________
lstm_12 (LSTM)               (None, 1, 512)            1124352   
_________________________________________________________________
activation_22 (Activation)   (None, 1, 512)            0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 1, 512)            0         
_________________________________________________________________
lstm_13 (LSTM)               (None, 1, 512)            2099200   
_________________________________________________________________
activation_23 (Activation)   (None, 1, 512)            0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 1, 512)            0         
__________

In [79]:
rounds = min_count - 30
train_index = int(rounds * 0.75)
validate_index = train_index + int(rounds * 0.05)

In [80]:
data = []
count = 0
index = 0
while index < rounds:
    temp = [i[index : index + 30] for i in stock_datas]
    data.append([temp])
    index += 1
    
x = np.asarray(data)
x_train = x[0: train_index].astype('float32')
x_validate = x[train_index: validate_index].astype('float32')
x_test = x[validate_index: rounds].astype('float32')

print(x_train.shape, x_validate.shape, x_test.shape)

(1811, 1, 36, 30, 1) (120, 1, 36, 30, 1) (484, 1, 36, 30, 1)


In [81]:
target = []
index = 0
while index < rounds:
    temp = [stock_datas[i][index + 30][0] for i in range(len(stock_datas))]
    target.append([temp])
    index += 1
    
y = np.asarray(target)
y_train = y[0: train_index].astype('float32')
y_validate = y[train_index: validate_index].astype('float32')
y_test = y[validate_index: rounds].astype('float32')

print(y_train.shape, y_validate.shape, y_test.shape)

(1811, 1, 36) (120, 1, 36) (484, 1, 36)


In [83]:
model.fit(x_train, y_train, validation_data = (x_validate, y_validate), epochs = epochs, batch_size = batch_size, verbose= 1)

Train on 1811 samples, validate on 120 samples
Epoch 1/5
Epoch 2/5
 302/1811 [====>.........................] - ETA: 1:05 - loss: 5.9217e-04 - mean_absolute_error: 0.0126 - acc: 0.0232

KeyboardInterrupt: 