In [1]:
# RNN(Long Short-Term Memory, LSTM)

In [15]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, TimeDistributed, RepeatVector
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.utils import np_utils
import matplotlib.pyplot as plt
import os
%matplotlib inline

In [3]:
def mergeData():
    SaveFile_Name = 'data.csv'
    file_list = os.listdir('data')
    df = pd.read_csv('data'+'\\'+file_list[0])
    df.to_csv(SaveFile_Name,encoding="utf_8_sig",index=False)
    for i in range(1,len(file_list)):
        df = pd.read_csv('data'+'\\'+file_list[i])
        df.to_csv(SaveFile_Name,encoding="utf_8_sig",index=False, header=False, mode='a+')

In [4]:
def readData():
    train = pd.read_csv("data.csv")
    return train

In [5]:
def changeYear(data):
    for i in range(0,data.shape[0]):
        Date=data["日期"][i].split('/')
        year,month,date=Date[0],Date[1],Date[2]
        year=str(int(year)+1911)
        data.loc[i,"日期"]=year+'/'+month+'/'+date
    return data

In [6]:
# Augment Features
def augFeatures(data):
  data["日期"] = pd.to_datetime(data["日期"])
  data["年"] = data["日期"].dt.year
  data["月"] = data["日期"].dt.month
  data["日"] = data["日期"].dt.day
  data["第幾日"] = data["日期"].dt.dayofweek
  return data

In [7]:
def manage(data):
    for i in range(0,data.shape[0]):
        if data["漲跌價差"][i]=='X0.00':
            data.loc[i,"漲跌價差"]=str(int(data["收盤價"][i])-int(data["收盤價"][i-1]))
    data=data.drop(["日期"], axis=1)
    data=data.drop(["成交股數"], axis=1)
    data=data.drop(["成交金額"], axis=1)
    data=data.drop(["漲跌價差"], axis=1)
    data=data.drop(["成交筆數"], axis=1)
    data=data.convert_objects(convert_numeric=True)
    return data

In [8]:
def normalize(data):
    datanormalize=data.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)))
    return datanormalize

In [52]:
def buildTrain(train, pastDay=1, futureDay=1):
    X_train, Y_train, Z_train= [], [], []
    X,Y,Z=[],[],[]
    for i in range(train.shape[0]-futureDay-pastDay):
        X_train.append(np.array(train.iloc[i:i+pastDay]))
        Y_train.append(np.array(train.iloc[i+pastDay:i+pastDay+futureDay]["開盤價"]))
        Z_train.append(np.array(train.iloc[i:i+pastDay]["開盤價"]))
    X=np.array(X_train)
    Y=np.array(Y_train)
    Z=np.array(Z_train)
    Y=(Y-Z)/2
    Y_train=[]
    for i in range(len(Y)):
        if Y[i]<-5:
            Y_train.append(np.array([0]))
        elif -5<=Y[i]<-4:
            Y_train.append(np.array([1]))
        elif -4<=Y[i]<-3:
            Y_train.append(np.array([2]))
        elif -3<=Y[i]<-2:
            Y_train.append(np.array([3]))
        elif -2<=Y[i]<-1:
            Y_train.append(np.array([4]))
        elif -1<=Y[i]<0:
            Y_train.append(np.array([5]))
        elif 0<=Y[i]<1:
            Y_train.append(np.array([6]))
        elif 1<=Y[i]<2:
            Y_train.append(np.array([7]))
        elif 2<=Y[i]<3:
            Y_train.append(np.array([8]))
        elif 3<=Y[i]<4:
            Y_train.append(np.array([9]))
        elif 4<=Y[i]<5:
            Y_train.append(np.array([10]))
        elif 5<=Y[i]:
            Y_train.append(np.array([11]))
    Y=np.array(Y_train)
    return X, Y

In [53]:
def shuffle1(X,Y):
  np.random.seed()
  randomList = np.arange(X.shape[0])
  np.random.shuffle(randomList)
  return X[randomList], Y[randomList]

In [54]:
# 將Training Data取一部份當作Validation Data
def splitData(X,Y,rate):
    X_train = X[:-int(X.shape[0]*rate)]

    Y_train = Y[:-int(Y.shape[0]*rate)]
    
    X_val = X[-int(X.shape[0]*rate):]

    Y_val = Y[-int(Y.shape[0]*rate):]
    return X_train, Y_train, X_val, Y_val

In [79]:
def buildOneToOneModel(shape):
    model = Sequential()
    model.add(LSTM(12,input_shape=(None, 8),return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(24,return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(36))
    model.add(Dense(12))
    model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=['accuracy'])
    model.summary()
    return model

In [80]:
from sklearn.utils import shuffle

mergeData()
train=readData()
train=changeYear(train)
train=augFeatures(train)
train=manage(train)
#train = shuffle(train)

temp=train
train=normalize(train)

train_x1, train_y1 = buildTrain(train, 1, 1)
train_x2, train_y2 = buildTrain(temp, 1, 1)
train_x, train_y = train_x1,train_y2
#train_x, train_y = shuffle(train_x, train_y )

train_x, train_y , val_x, val_y = splitData(train_x, train_y , 0.1)

train_y=np_utils.to_categorical(train_y)
val_y=np_utils.to_categorical(val_y)

#train_x= np.reshape(train_x, (train_x.shape[0],train_x.shape[2]))
#val_x= np.reshape(val_x, (val_x.shape[0],val_x.shape[2]))


model = buildOneToOneModel(train_x.shape)



#callback = EarlyStopping(monitor="acc", patience=10, verbose=1, mode="auto")

model.fit(train_x, train_y, epochs=500, batch_size=200,verbose=2, validation_split=0.2)


For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  # Remove the CWD from sys.path while we load stuff.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_28 (LSTM)               (None, None, 12)          1008      
_________________________________________________________________
dropout_21 (Dropout)         (None, None, 12)          0         
_________________________________________________________________
lstm_29 (LSTM)               (None, None, 24)          3552      
_________________________________________________________________
dropout_22 (Dropout)         (None, None, 24)          0         
_________________________________________________________________
lstm_30 (LSTM)               (None, 36)                8784      
_________________________________________________________________
dense_9 (Dense)              (None, 12)                444       
Total params: 13,788
Trainable params: 13,788
Non-trainable params: 0
_________________________________________________________________
Train 

Epoch 84/500
 - 0s - loss: 5.0134 - acc: 0.3817 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 85/500
 - 0s - loss: 5.1508 - acc: 0.3833 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 86/500
 - 0s - loss: 5.0315 - acc: 0.3804 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 87/500
 - 0s - loss: 4.9483 - acc: 0.3784 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 88/500
 - 0s - loss: 5.0351 - acc: 0.3811 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 89/500
 - 0s - loss: 4.9845 - acc: 0.3835 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 90/500
 - 0s - loss: 4.9374 - acc: 0.3817 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 91/500
 - 0s - loss: 5.0062 - acc: 0.3804 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 92/500
 - 0s - loss: 5.0387 - acc: 0.3770 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 93/500
 - 0s - loss: 5.0677 - acc: 0.3824 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 94/500
 - 0s - loss: 5.0206 - acc: 0.3811 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 95/500
 - 0s - loss: 5.0206 - acc: 0.3813 - val_loss: 7.936

Epoch 180/500
 - 0s - loss: 4.9989 - acc: 0.3860 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 181/500
 - 0s - loss: 4.8940 - acc: 0.3844 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 182/500
 - 0s - loss: 5.0532 - acc: 0.3775 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 183/500
 - 0s - loss: 5.0532 - acc: 0.3855 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 184/500
 - 0s - loss: 4.9555 - acc: 0.3829 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 185/500
 - 0s - loss: 5.0459 - acc: 0.3824 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 186/500
 - 0s - loss: 5.0206 - acc: 0.3775 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 187/500
 - 0s - loss: 4.9808 - acc: 0.3815 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 188/500
 - 0s - loss: 5.0242 - acc: 0.3842 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 189/500
 - 0s - loss: 4.9772 - acc: 0.3806 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 190/500
 - 0s - loss: 4.9953 - acc: 0.3788 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 191/500
 - 0s - loss: 4.9519 - acc: 0.3822 - val

Epoch 276/500
 - 0s - loss: 4.9483 - acc: 0.3844 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 277/500
 - 0s - loss: 5.0677 - acc: 0.3815 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 278/500
 - 0s - loss: 4.9664 - acc: 0.3804 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 279/500
 - 0s - loss: 5.0640 - acc: 0.3804 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 280/500
 - 0s - loss: 5.0025 - acc: 0.3795 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 281/500
 - 0s - loss: 5.0604 - acc: 0.3808 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 282/500
 - 0s - loss: 5.0459 - acc: 0.3777 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 283/500
 - 0s - loss: 4.9845 - acc: 0.3829 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 284/500
 - 0s - loss: 5.0315 - acc: 0.3826 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 285/500
 - 0s - loss: 5.1074 - acc: 0.3833 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 286/500
 - 0s - loss: 5.0423 - acc: 0.3806 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 287/500
 - 0s - loss: 5.0713 - acc: 0.3833 - val

Epoch 372/500
 - 0s - loss: 4.9989 - acc: 0.3804 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 373/500
 - 0s - loss: 4.9411 - acc: 0.3824 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 374/500
 - 0s - loss: 5.1111 - acc: 0.3799 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 375/500
 - 0s - loss: 5.0387 - acc: 0.3846 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 376/500
 - 0s - loss: 5.0785 - acc: 0.3833 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 377/500
 - 0s - loss: 4.9772 - acc: 0.3820 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 378/500
 - 0s - loss: 5.0966 - acc: 0.3853 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 379/500
 - 0s - loss: 5.0713 - acc: 0.3831 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 380/500
 - 0s - loss: 5.1581 - acc: 0.3842 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 381/500
 - 0s - loss: 4.9772 - acc: 0.3851 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 382/500
 - 0s - loss: 5.0749 - acc: 0.3779 - val_loss: 7.9362 - val_acc: 0.4538
Epoch 383/500
 - 0s - loss: 4.9519 - acc: 0.3815 - val

Epoch 468/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 469/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 470/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 471/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 472/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 473/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 474/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 475/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 476/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 477/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 478/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val_loss: 3.1369 - val_acc: 0.4565
Epoch 479/500
 - 0s - loss: 3.6172 - acc: 0.4156 - val

<keras.callbacks.History at 0xae16e55ba8>

In [None]:
scores= model.evaluate(val_x, val_y,verbose=1,batch_size=150)
print(scores)



In [15]:
print(train_y )

[[0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
