# 目的
前のノートではシンプルなCNNを用いたFXTFデータの予測を行った。   
ここではデータセットを増やしてみて良くなるかどうか確認する。

In [None]:
from keras import regularizers
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPool2D
from keras.models import Sequential
from keras.models import model_from_json, load_model
from keras.optimizers import Adam, Adagrad
from keras.callbacks import EarlyStopping, TensorBoard
from keras.utils import plot_model

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

import copy
import datetime
import glob
import numpy as np
import os
import pandas as pd

from FX.FX.core import utils
# from FX.FX import KerasModelAdapter

## データセットの読み込み

In [None]:
# basepath = "C:/Users/Surpris/Desktop/20170918/"
basepath = "../../images/20170918/"
filelist = np.array(glob.glob(os.path.join(basepath, "images-ohlc/*.png")))
data = pd.read_csv(basepath + "FXTF/USDJPY-cd1_20170806_k030.csv")
y = data[["label1", "label2", "label3"]].as_matrix()[9:].copy()

Xpath_train, Xpath_test, y_train, y_test = train_test_split(filelist, y, test_size=0.3)

In [None]:
grpX, grpY = utils.grouping_dataset(Xpath_train, y_train, 8)
X_test = utils.load_images_from_filelist(Xpath_test[:6000])

## モデルの読み込みと訓練

In [None]:
model = load_model(basepath + "ML/model.h5")
hists, scores = utils.train_with_groups(model, grpX, grpY, X_test, y_test[:6000], 5)

In [None]:
utils.plot_probability(model, X_test)

In [None]:
utils.calc_accuracy_above_threshold(model, X_test, y_test, threshold=0.75, verbose=1)

## まとめ
複数のデータセットで訓練したものの、最終的なスコアは50%を切るかどうかという結果であった。   
モデルを深くするか、画像の生成方法を変えるか、転移学習を試すかなどのアプローチが考えられる。

# Batch normalizationを導入する

## モデルの構築

In [None]:
try:
    img1
except:
    img1 = X_test[0]
input_shape = (img1.shape[0], img1.shape[1], 1)

model = Sequential()
# Input layer
model.add(Conv2D(10, 3, input_shape=input_shape, activation="relu"))
# model.add(Activation("relu"))
# 2nd layer
model.add(Conv2D(10, 3, activation="relu"))
model.add(MaxPool2D(pool_size=(3,3)))
# 3rd layer()
model.add(Conv2D(20, 3, activation="relu"))
model.add(BatchNormalization())
# model.add(Activation("relu"))
model.add(MaxPool2D(pool_size=(2,2)))
# 4th layer
model.add(Flatten())
model.add(Dense(1024, activation="relu"))
model.add(BatchNormalization())
# model.add(Activation("relu"))
model.add(Dropout(0.3))
# Output layer
model.add(Dense(y.shape[1], activation="softmax"))

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=1e-4),
              metrics=['accuracy'])

model.save(basepath + "ML/model_with_BN.h5")

## 訓練

In [None]:
model = load_model(basepath + "ML/model_with_BN.h5")
hists, scores = utils.train_with_groups(model, grpX, grpY, X_test, y_test[:6000], 80)

In [None]:
model.summary()

In [None]:
utils.plot_probability(model, X_test)

In [None]:
utils.calc_accuracy_above_threshold(model, X_test, y_test, threshold=0.95, verbose=1)

# L2正則化を課す

In [None]:
try:
    img1
except:
    img1 = X_test[0]
input_shape = (img1.shape[0], img1.shape[1], 1)

model = Sequential()
# Input layer
model.add(Conv2D(10, 3, input_shape=input_shape, activation="relu"))
# model.add(Activation("relu"))
# 2nd layer
model.add(Conv2D(10, 3, activation="relu"))
model.add(MaxPool2D(pool_size=(3,3)))
# 3rd layer()
model.add(Conv2D(20, 3, activation="relu", kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
# model.add(Activation("relu"))
model.add(MaxPool2D(pool_size=(2,2)))
# 4th layer
model.add(Flatten())
model.add(Dense(1024, activation="relu", kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
# model.add(Activation("relu"))
model.add(Dropout(0.5))
# Output layer
model.add(Dense(y.shape[1], activation="softmax"))

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=1e-4),
              metrics=['accuracy'])

model.save(basepath + "ML/model_with_BN_L2.h5")

In [None]:
model = load_model(basepath + "ML/model_with_BN_L2.h5")
hists, scores = utils.train_with_groups(model, grpX, grpY, X_test, y_test[:6000], 80)

In [None]:
utils.plot_probability(model, X_test)

In [None]:
utils.calc_accuracy_above_threshold(model, X_test, y_test, threshold=0.70, verbose=1)

## RMSPROPに変えてみる

In [None]:
try:
    img1
except:
    img1 = X_test[0]
input_shape = (img1.shape[0], img1.shape[1], 1)

model = Sequential()
# Input layer
model.add(Conv2D(10, 3, input_shape=input_shape, activation="relu"))
# model.add(Activation("relu"))
# 2nd layer
model.add(Conv2D(10, 3, activation="relu"))
model.add(MaxPool2D(pool_size=(3,3)))
# 3rd layer()
model.add(Conv2D(20, 3, activation="relu", kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
# model.add(Activation("relu"))
model.add(MaxPool2D(pool_size=(2,2)))
# 4th layer
model.add(Flatten())
model.add(Dense(1024, activation="relu", kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
# model.add(Activation("relu"))
model.add(Dropout(0.5))
# Output layer
model.add(Dense(y.shape[1], activation="softmax"))

model.compile(loss='categorical_crossentropy',
              optimizer="rmsprop",
              metrics=['accuracy'])

model.save(basepath + "ML/model_with_BN_L2_rmsprop.h5")

In [None]:
model = load_model(basepath + "ML/model_with_BN_L2_rmsprop.h5")
hists, scores = utils.train_with_groups(model, grpX, grpY, X_test, y_test[:6000], 80)

In [None]:
utils.plot_probability(model, X_test)

In [None]:
utils.calc_accuracy_above_threshold(model, X_test, y_test, threshold=0.90, verbose=1)

## まとめ
L2正則化とRMSPROPを加えると質が下がった気がする。   