題目：Use LSTM & CNN model to classify customized candlestick pattern (at least 3 classes) <br/>
1. Use LSTM model to classify customized candlestick pattern <br/>
程式：candlestick_train_lstm_r09723030_吳炤誼.py

In [1]:
# 插入所需套件
from sklearn.metrics import confusion_matrix
import pickle
import keras
from keras.layers import LSTM
from keras.layers import Dense, Activation, Conv2D, MaxPool2D, Dropout, Flatten
from keras.datasets import mnist
from keras.models import Sequential
from keras.optimizers import Adam

In [2]:
def load_pkl(pkl_name):
    # 讀candlestick檔
    with open(pkl_name, 'rb') as f:
        data = pickle.load(f)
    return data

# lstm資料前處理
def lstm_preprocess(x_train, x_test, y_train, y_test, n_step, n_input, n_classes):
    # 把要訓練與測試的資料變成(n_step x n_input)
    x_train = x_train.reshape(-1, n_step, n_input)
    x_test = x_test.reshape(-1, n_step, n_input)
    # 把數據變成32 bit
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    # (:normalize)除255提升模型辨識力
    x_train /= 255
    x_test /= 255
    # 轉成特定的處理格式(one hot)
    y_train = keras.utils.to_categorical(y_train, n_classes)
    y_test = keras.utils.to_categorical(y_test, n_classes)
    return (x_train, x_test, y_train, y_test)

# lstm model
def lstm_model(n_input, n_step, n_hidden, n_classes):
    model = Sequential()
    # 加入隱藏值
    model.add(LSTM(n_hidden, batch_input_shape=(None, n_step, n_input), unroll=True))
    # 輸出層
    model.add(Dense(n_classes))
    # 使用Activation中softmax fn將Y轉為機率值
    model.add(Activation('softmax'))
    return model

# 訓練lstm model
def train_lstm(model, x_train, y_train, x_test, y_test, 
        learning_rate, training_iters, batch_size):
    # 學習速度(太大會在兩側波動，太小會浪費時間)
    adam = Adam(lr=learning_rate)
    model.summary()
    # 選擇優化函數,損失函數,衡量方式
    model.compile(optimizer=adam,
        loss='categorical_crossentropy', metrics=['accuracy'])
    # 訓練模型
    model.fit(x_train, y_train,
        batch_size=batch_size, epochs=training_iters,
        verbose=1, validation_data=(x_test, y_test))

def print_result(data, x_train, x_test, model):
    # 得出預測值
    train_pred = model.predict_classes(x_train)
    test_pred = model.predict_classes(x_test)
    # 實際值
    train_label = data['train_label'][:, 0]
    test_label = data['test_label'][:, 0]
    # 比較兩者，以confusion_matrix呈現(9x9) 越集中在對角線，越準確
    train_result_cm = confusion_matrix(train_label, train_pred, labels=range(9))
    test_result_cm = confusion_matrix(test_label, test_pred, labels=range(9))
    print(train_result_cm, '\n'*2, test_result_cm)

def mnist_lstm_main():
    # 給機器學的參數
    # adam學習速度預設為0.001
    learning_rate = 0.005
    #迭代次數
    training_iters = 50
    # 學習速度調至0.005&迭代50次得到比原本較高的準確率)
    # 每次樣本數
    batch_size = 128

    # 模型參數(層、步數、隱藏值(特徵數)、分成幾類)
    n_input = 40
    n_step = 10
    n_hidden = 256
    n_classes = 10
   
    #讀取資料，進行資料前處理
    data = load_pkl('C:/Users/ben82/ipython_notebook_workplace/label8_eurusd_10bar_1500_500_val200_gaf_culr.pkl')
    x_train, y_train, x_test, y_test = data['train_gaf'], data['train_label'][:, 0], data['test_gaf'], data['test_label'][:, 0]
    x_train, x_test, y_train, y_test = lstm_preprocess(x_train, x_test, y_train, y_test, n_step, n_input, n_classes)

    #訓練lstm模型並印出結果
    model = lstm_model(n_input, n_step, n_hidden, n_classes)
    train_lstm(model, x_train, y_train, x_test, y_test, learning_rate, 
               training_iters, batch_size)
    scores = model.evaluate(x_test, y_test, verbose=0)
    print('LSTM test accuracy:', scores[1])
    print_result(data, x_train, x_test, model)

LSTM

In [3]:
mnist_lstm_main()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 256)               304128    
_________________________________________________________________
dense (Dense)                (None, 10)                2570      
_________________________________________________________________
activation (Activation)      (None, 10)                0         
Total params: 306,698
Trainable params: 306,698
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch



[[1992  138  103   98  155   80  250   66  118]
 [  94 1401    0    0    0    0    5    0    0]
 [  43    0 1290    0  165    0    0    0    2]
 [  34   92    0 1285    0    0    0   89    0]
 [  11    0    1    0 1152    0    0    0  336]
 [ 145    1    0    0    0 1346    3    5    0]
 [   4    1    0    0    2    0 1473    0   20]
 [  39    3    0  218    0   25    0 1215    0]
 [   9    0    0    0   56    0   35    0 1400]] 

 [[675  39  42  29  52  25  75  24  39]
 [ 29 471   0   0   0   0   0   0   0]
 [ 10   0 438   0  52   0   0   0   0]
 [ 20  34   0 430   0   0   0  16   0]
 [  7   0   0   0 396   0   0   0  97]
 [ 66   0   0   0   0 434   0   0   0]
 [  2   0   0   0   0   0 492   0   6]
 [ 21   0   0  40   0   7   0 432   0]
 [  4   0   0   0   2   0   8   0 486]]


2. Use CNN model to classify customized candlestick pattern <br/>
程式：candlestick_train_cnn_r09723030_吳炤誼.py

In [4]:
# 插入所需套件
from sklearn.metrics import confusion_matrix
import numpy as np
import pickle

from keras import backend as K
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Activation, MaxPool2D

In [5]:
def load_pkl(pkl_name):
    # 讀取pkl檔
    with open(pkl_name, 'rb') as f:
        data = pickle.load(f)
    return data

# cnn model
def get_cnn_model(params):
    model = Sequential()
    # 二維捲積層(用5x5去捲,輸出10x10),超過的部份補零(same fn),用忽略負值的方式(relu fn)計算
    model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu', input_shape=(10, 10, 4)))
    # 再捲一次
    model.add(Conv2D(filters=48, kernel_size=(5,5), padding='valid', activation='relu'))
    # 攤平維度 
    model.add(Flatten())
    # 疊三層(前兩層以忽略負值的方式算,最後一層用機率的方式算)
    model.add(Dense(256, activation='relu'))
    model.add(Dense(84, activation='relu'))
    model.add(Dense(9, activation='softmax'))
    return model

# 訓練model
def train_model(params, data):
    model = get_cnn_model(params)
    # 選擇優化函數,損失函數,衡量方式
    model.compile(loss='categorical_crossentropy', optimizer=params['optimizer'], metrics=['accuracy'])
    hist = model.fit(x=data['train_gaf'], y=data['train_label_arr'],
                     batch_size=params['batch_size'], epochs=params['epochs'], verbose=2)
    return (model, hist)

#印出結果
def print_result(data, model):
    # 得到訓練的值
    train_pred = model.predict_classes(data['train_gaf'])
    test_pred = model.predict_classes(data['test_gaf'])
    # 實際值
    train_label = data['train_label'][:, 0]
    test_label = data['test_label'][:, 0]
    # 比較兩者，以confusion_matrix呈現(9x9) 越集中在對角線，越準確
    train_result_cm = confusion_matrix(train_label, train_pred, labels=range(9))
    test_result_cm = confusion_matrix(test_label, test_pred, labels=range(9))
    print(train_result_cm, '\n'*2, test_result_cm)

CNN

In [7]:
    PARAMS = {}

    PARAMS['pkl_name'] = 'C:/Users/ben82/ipython_notebook_workplace/label8_eurusd_10bar_1500_500_val200_gaf_culr.pkl'
    # 分類
    PARAMS['classes'] = 9
    # 學習速度(設為0.005準確度提高)
    PARAMS['lr'] = 0.005
    # 迭代次數
    PARAMS['epochs'] = 20
    # 每次處理樣本數
    PARAMS['batch_size'] = 32
    #(設為0.005,20,32)準確度提高
    PARAMS['optimizer'] = optimizers.SGD(lr=PARAMS['lr'])

    # ---------------------------------------------------------
    # 讀檔
    data = load_pkl(PARAMS['pkl_name'])
    # train cnn model
    model, hist = train_model(PARAMS, data)
    # train & test result
    scores = model.evaluate(data['test_gaf'], data['test_label_arr'], verbose=0)
    print('CNN test accuracy:', scores[1])
    print_result(data, model)

Epoch 1/20
469/469 - 4s - loss: 1.4971 - accuracy: 0.4589
Epoch 2/20
469/469 - 4s - loss: 0.7597 - accuracy: 0.7258
Epoch 3/20
469/469 - 4s - loss: 0.5951 - accuracy: 0.7861
Epoch 4/20
469/469 - 4s - loss: 0.5217 - accuracy: 0.8098
Epoch 5/20
469/469 - 4s - loss: 0.4846 - accuracy: 0.8256
Epoch 6/20
469/469 - 4s - loss: 0.4577 - accuracy: 0.8333
Epoch 7/20
469/469 - 4s - loss: 0.4398 - accuracy: 0.8395
Epoch 8/20
469/469 - 4s - loss: 0.4227 - accuracy: 0.8493
Epoch 9/20
469/469 - 4s - loss: 0.4067 - accuracy: 0.8537
Epoch 10/20
469/469 - 4s - loss: 0.3967 - accuracy: 0.8581
Epoch 11/20
469/469 - 4s - loss: 0.3836 - accuracy: 0.8613
Epoch 12/20
469/469 - 5s - loss: 0.3768 - accuracy: 0.8629
Epoch 13/20
469/469 - 4s - loss: 0.3675 - accuracy: 0.8640
Epoch 14/20
469/469 - 4s - loss: 0.3594 - accuracy: 0.8720
Epoch 15/20
469/469 - 4s - loss: 0.3519 - accuracy: 0.8722
Epoch 16/20
469/469 - 4s - loss: 0.3447 - accuracy: 0.8755
Epoch 17/20
469/469 - 4s - loss: 0.3371 - accuracy: 0.8791
Epoch 