# 目的
Kerasを用いた為替の予測をもう少し進める。   
データはFXTF社のMT4からエクスポートしたものを用いる。   

前のノートでは、ある時刻 $t$ における HL + SMA + Volume などで予測できるか試したが、   
いい結果を得られなかった。   
このノートでは、ある時刻から少しさかのぼった分も特徴量に加えると正答率があがるのかどうか検討する。

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import LSTM
from keras.callbacks import EarlyStopping
from keras.models import model_from_json
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

import copy
import datetime
import numpy as np
import os
import pandas as pd
import pickle

from PIL import Image
from PIL import ImageDraw

from FX.FX import SQLAnaforFX
from FX.FX import drawfigfunc as dff
from FX.FX import datetimefuncs as dtf
from FX.FX import analyzefuncs as af
from FX.FX import KerasModelAdapter

In [None]:
def create_model(X, y):
    model = Sequential()
    
    # 入力層
    model.add(Dense(256, input_shape=(X.shape[1],)))
    model.add(Activation('relu'))
    model.add(Dropout(0.3))

    # 中間層
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.3))

    # 出力層
    model.add(Dense(y.shape[1]))
    model.add(Activation('softmax'))

    # コンパイル
    model.compile(loss='categorical_crossentropy',
        optimizer="rmsprop",
        metrics=['accuracy'])
    
    return model

In [None]:
def ary2seqary(ary, n_seq=2):
    if len(ary.shape) != 1:
        raise ValueError
    if n_seq < 2:
        raise ValueError
    res = np.zeros((n_seq, len(ary)-n_seq + 1))
    for ii in range(n_seq):
        res[ii] = ary[ii:ii+len(ary)-n_seq+1]
    return res.copy()

## データの用意

In [None]:
"""ファイルの読み込み"""
fpath = "../../data/FXTF/USDJPY-cd1_20170806_k030.csv"
data = pd.read_csv(fpath, index_col=False)
data.head()

In [None]:
"""OHLC"""
opens = data["open"].as_matrix()
high = data["high"].as_matrix()
low = data["low"].as_matrix()
close = data["close"].as_matrix()
volume = data["volume"].as_matrix()

dclose = np.zeros_like(close)
dclose[1:] = np.diff(close)

"""Spread"""
s = 0.003

"""SMA"""
sma07 = data["sma07"].as_matrix()
sma13 = data["sma13"].as_matrix()
sma25 = data["sma25"].as_matrix()
sma = data[["sma07", "sma13", "sma25"]].as_matrix()
sma = sma.T

"""label"""
y_data = data[["label1","label2","label3"]].as_matrix()

"""Datetime"""
datetimes = []
datetime_fmt = "%Y.%m.%d %H:%M"
for date_t, time_t in zip(data["date"], data["time"]):
    datetime_str = date_t + " " + time_t
    datetime_t = datetime.datetime.strptime(datetime_str, datetime_fmt)
    datetimes.append(datetime_t)
datetimes = np.array([(datetimes[ii] - datetimes[0]).total_seconds()/60.**2 for ii in range(len(datetimes))])

In [None]:
y, diff = af.labeling(close, s*2, 30, 2)

xlim = [9, 10]

dff.makefig(5,5)
plt.subplot(211)
plt.plot(datetimes, close, label="original")

close_forward = np.zeros_like(close)
close_forward[:-30] = close[30:]
plt.plot(datetimes, close_forward, label="forward")
ind = (datetimes>= xlim[0])&(datetimes <= xlim[1])
plt.xlim(xlim)
plt.ylim([close[ind].min(), close[ind].max()])
plt.legend()

plt.subplot(212)
plt.plot(datetimes, diff, label="difference")
plt.plot(datetimes[y[:, 0]== 1], diff[y[:, 0]== 1], "ro")
plt.plot(datetimes[y[:, 2]== 1], diff[y[:, 2]== 1], "go")
plt.plot(datetimes[y_data[:, 0]== 1], diff[y_data[:, 0]== 1], "r*")
plt.plot(datetimes[y_data[:, 2]== 1], diff[y_data[:, 2]== 1], "g*")
plt.hlines([-s*2, s*2], datetimes[0], datetimes[-1], color="m")
plt.xlim(xlim)
plt.ylim(-0.1, 0.1)
plt.legend()


## 連数に変換

In [None]:
n_seq = 3
opens_seq = ary2seqary(opens, n_seq)
high_seq = ary2seqary(high, n_seq)
low_seq = ary2seqary(low, n_seq)
close_seq = ary2seqary(close, n_seq)
volume_seq = ary2seqary(volume, n_seq)

sma07_seq = ary2seqary(sma07, n_seq)
sma13_seq = ary2seqary(sma13, n_seq)
sma25_seq = ary2seqary(sma25, n_seq)
sma_seq = np.vstack((sma07_seq, sma13_seq, sma25_seq))
y = y_data[n_seq-1:]

# closeのみ

In [None]:
close_mean = close_seq.mean(axis=1)
close_norm = np.zeros_like(close_seq)
for ii in range(close_seq.shape[0]):
    close_norm[ii] = close_seq[ii] - close_mean[ii]
    close_norm[ii] /= np.abs(close_norm[ii]).max()

X = close_norm.T

In [None]:
model = create_model(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=300)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# データで訓練 --- (※5)
hist = model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=800,
    validation_split=0.1,
    callbacks=[EarlyStopping(monitor='val_loss', patience=2)],
    verbose=0)

# テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test, y_test, verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

In [None]:
"""確率の取得"""
probs = model.predict_proba(X_test, verbose=0).T
labels = ["high", "lose", "low"]

"""確率のヒストグラムの作成"""
xbins = np.arange(0, 1.0, 0.05)
hists = np.zeros((3, len(xbins)))
for ii in range(len(probs)):
    hists[ii, :-1], bins = np.histogram(probs[ii], bins=xbins)
    hists[ii] /= hists[ii].sum()

In [None]:
# 各ラベルに該当する確率
dff.makefig(18, 5)
for ii in range(len(probs)):
    plt.subplot(1,3,ii + 1)
    plt.plot(probs[ii], linewidth=1.2)
    dff.arrangefig(xlabel="Time index", ylabel="Probability", title="Probability of {}".format(labels[ii]))
    plt.ylim(0, 1)
plt.tight_layout()

# 各ラベルに該当する確率のヒストグラム
dff.makefig(18, 5)
dxbins = np.diff(xbins)[0]
for ii in range(len(hists)):
    plt.subplot(1,3,ii + 1)
    plt.bar(xbins, hists[ii], width=0.8*dxbins, hold="center", color="g")
    dff.arrangefig(ylabel="Frequency")
    ax2 = plt.gca().twinx()
    ax2.plot(xbins, 1.0 - np.cumsum(hists[ii]), "r-", linewidth=1.5)
    dff.arrangefig(xlabel="Probability", ylabel="Accumulation", title="Hist of {}".format(labels[ii]))
    plt.ylim(0, 1)
#     plt.yscale("log")
plt.tight_layout()

とりあえず60%以上を対象にするか。

In [None]:
prob_threshold = 0.7
probs = model.predict_proba(X_test, verbose=0).T
inds = np.zeros_like(probs, dtype=bool)
for ii in range(0, probs.shape[0]):
    inds[ii] = probs[ii] >= prob_threshold
ind_sum = inds.sum(axis=0) > 0
print("<# of events over threshold>")
print("[high, lose, low]:", inds.sum(axis=1), ",total:", ind_sum.sum())

# # テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test[ind_sum], y_test[ind_sum], verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

飛躍的に改善された。。。？   
乱数によっては予測確率が上がる部分も見受けられる。

# SMAを利用した場合

In [None]:
sma_mean = sma_seq.mean(axis=1)
sma_norm = np.zeros_like(sma_seq)
for ii in range(sma_seq.shape[0]):
    sma_norm[ii] = sma_seq[ii] - sma_mean[ii]
    sma_norm[ii] /= np.abs(sma_norm[ii]).max()

X = sma_norm.T
model = create_model(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=8)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# データで訓練 --- (※5)
hist = model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=800, shuffle=False,
    validation_split=0.1,
    callbacks=[EarlyStopping(monitor='val_loss', patience=2)],
    verbose=0)

# テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test, y_test, verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

## こちらでも高確率のもののみを取り出す

In [None]:
"""確率の取得"""
probs = model.predict_proba(X_test, verbose=0).T
labels = ["high", "lose", "low"]

"""確率のヒストグラムの作成"""
xbins = np.arange(0, 1.0, 0.05)
hists = np.zeros((3, len(xbins)))
for ii in range(len(probs)):
    hists[ii, :-1], bins = np.histogram(probs[ii], bins=xbins)
    hists[ii] /= hists[ii].sum()

In [None]:
# 各ラベルに該当する確率
dff.makefig(18, 5)
for ii in range(len(probs)):
    plt.subplot(1,3,ii + 1)
    plt.plot(probs[ii], linewidth=1.2)
    dff.arrangefig(xlabel="Time index", ylabel="Probability", title="Probability of {}".format(labels[ii]))
    plt.ylim(0, 1)
plt.tight_layout()

# 各ラベルに該当する確率のヒストグラム
dff.makefig(18, 5)
dxbins = np.diff(xbins)[0]
for ii in range(len(hists)):
    plt.subplot(1,3,ii + 1)
    plt.bar(xbins, hists[ii], width=0.8*dxbins, hold="center", color="g")
    dff.arrangefig(ylabel="Frequency")
    ax2 = plt.gca().twinx()
    ax2.plot(xbins, 1.0 - np.cumsum(hists[ii]), "r-", linewidth=1.5)
    dff.arrangefig(xlabel="Probability", ylabel="Accumulation", title="Hist of {}".format(labels[ii]))
    plt.ylim(0, 1)
#     plt.yscale("log")
plt.tight_layout()

In [None]:
prob_threshold = 0.60 # 50%だと期待はできないが、、
probs = model.predict_proba(X_test, verbose=0).T
inds = np.zeros_like(probs, dtype=bool)
for ii in range(0, probs.shape[0]):
    inds[ii] = probs[ii] >= prob_threshold
ind_sum = inds.sum(axis=0) > 0
print("<# of events over threshold>")
print("[high, lose, low]:", inds.sum(axis=1), 
      ",total:", ind_sum.sum(),
      ", percentage:{0:.1f}".format(ind_sum.sum()*100./len(X_test)))

# # テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test[ind_sum], y_test[ind_sum], verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

closeだけの場合と比べてパフォーマンスが下がる様子。。

# HLも加える
Openとcloseは等価であり、またSMAもcloseとほぼ等価である。   
ここではSMAにHigh/Lowを加えてみる。

In [None]:
hlsma = np.vstack((high_seq, low_seq, sma_seq))
hlsma_mean = hlsma.mean(axis=1)
hlsma_norm = np.zeros_like(hlsma)
for ii in range(hlsma.shape[0]):
    hlsma_norm[ii] = hlsma[ii] - hlsma_mean[ii]
    hlsma_norm[ii] /= np.abs(hlsma_norm[ii]).max()

X = hlsma_norm.T
model = create_model(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=8)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# データで訓練 --- (※5)
hist = model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=800, shuffle=False,
    validation_split=0.1,
    callbacks=[EarlyStopping(monitor='val_loss', patience=2)],
    verbose=0)

# テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test, y_test, verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

## こちらでも高確率のもののみを取り出す

In [None]:
"""確率の取得"""
probs = model.predict_proba(X_test, verbose=0).T
labels = ["high", "lose", "low"]

"""確率のヒストグラムの作成"""
xbins = np.arange(0, 1.0, 0.025)
hists = np.zeros((3, len(xbins)))
for ii in range(len(probs)):
    hists[ii, :-1], bins = np.histogram(probs[ii], bins=xbins)
    hists[ii] /= hists[ii].sum()

In [None]:
# 各ラベルに該当する確率
dff.makefig(18, 5)
for ii in range(len(probs)):
    plt.subplot(1,3,ii + 1)
    plt.plot(probs[ii], linewidth=1.2)
    dff.arrangefig(xlabel="Time index", ylabel="Probability", title="Probability of {}".format(labels[ii]))
    plt.ylim(0, 1)
plt.tight_layout()

# 各ラベルに該当する確率のヒストグラム
dff.makefig(18, 5)
dxbins = np.diff(xbins)[0]
for ii in range(len(hists)):
    plt.subplot(1,3,ii + 1)
    plt.bar(xbins, hists[ii], width=0.8*dxbins, hold="center", color="g")
    dff.arrangefig(ylabel="Frequency")
    ax2 = plt.gca().twinx()
    ax2.plot(xbins, 1.0 - np.cumsum(hists[ii]), "r-", linewidth=1.5)
    dff.arrangefig(xlabel="Probability", ylabel="Accumulation", title="Hist of {}".format(labels[ii]))
    plt.ylim(0, 1)
#     plt.yscale("log")
plt.tight_layout()

In [None]:
prob_threshold = 0.55
probs = model.predict_proba(X_test, verbose=0).T
inds = np.zeros_like(probs, dtype=bool)
for ii in range(0, probs.shape[0]):
    inds[ii] = probs[ii] >= prob_threshold
ind_sum = inds.sum(axis=0) > 0
print("<# of events over threshold>")
print("[high, lose, low]:", inds.sum(axis=1), 
      ",total:", ind_sum.sum(),
      ", percentage:{0:.1f}".format(ind_sum.sum()*100./len(X_test)))

# # テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test[ind_sum], y_test[ind_sum], verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

# ここまでのまとめ
乱数によってはパフォーマンスが上がるが、安定しない。

## データの保存

In [None]:
adapter = KerasModelAdapter(model)

In [None]:
adapter.save("./data/model_20170601_2/")

In [None]:
adapter2 = KerasModelAdapter()
adapter2.load("./data/model_20170601/")

In [None]:
model2 = adapter2.model()

In [None]:
prob_threshold = 0.60
probs = model2.predict_proba(X, verbose=0).T
inds = np.zeros_like(probs, dtype=bool)
for ii in range(0, probs.shape[0]):
    inds[ii] = probs[ii] >= prob_threshold
ind_sum = inds.sum(axis=0) > 0
print("<# of events over threshold>")
print("[high, lose, low]:", inds.sum(axis=1), 
      ",total:", ind_sum.sum(),
      ", percentage:{0:.1f}".format(ind_sum.sum()*100./len(X)))

# # テストデータを用いて評価する --- (※6)
score = model2.evaluate(X[ind_sum], y[ind_sum], verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

# volumeも加えてみる

In [None]:
hlsmav = np.vstack((high_seq, low_seq, sma_seq, volume_seq))
hlsmav_mean = hlsmav.mean(axis=1)
hlsmav_norm = np.zeros_like(hlsmav)
for ii in range(hlsmav.shape[0]):
    hlsmav_norm[ii] = hlsmav[ii] - hlsmav_mean[ii]
    hlsmav_norm[ii] /= np.abs(hlsmav_norm[ii]).max()

X = hlsmav_norm.T

In [None]:
model = create_model(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=300)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# データで訓練 --- (※5)
hist = model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=800, shuffle=False,
    validation_split=0.1,
    callbacks=[EarlyStopping(monitor='val_loss', patience=2)],
    verbose=0)

# テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test, y_test, verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

In [None]:
"""確率の取得"""
probs = model.predict_proba(X_test, verbose=0).T
labels = ["high", "lose", "low"]

"""確率のヒストグラムの作成"""
xbins = np.arange(0, 1.0, 0.05)
hists = np.zeros((3, len(xbins)))
for ii in range(len(probs)):
    hists[ii, :-1], bins = np.histogram(probs[ii], bins=xbins)
    hists[ii] /= hists[ii].sum()

In [None]:
# 各ラベルに該当する確率
dff.makefig(18, 5)
for ii in range(len(probs)):
    plt.subplot(1,3,ii + 1)
    plt.plot(probs[ii], linewidth=1.2)
    dff.arrangefig(xlabel="Time index", ylabel="Probability", title="Probability of {}".format(labels[ii]))
    plt.ylim(0, 1)
plt.tight_layout()

# 各ラベルに該当する確率のヒストグラム
dff.makefig(18, 5)
dxbins = np.diff(xbins)[0]
for ii in range(len(hists)):
    plt.subplot(1,3,ii + 1)
    plt.bar(xbins, hists[ii], width=0.8*dxbins, hold="center", color="g")
    dff.arrangefig(ylabel="Frequency")
    ax2 = plt.gca().twinx()
    ax2.plot(xbins, 1.0 - np.cumsum(hists[ii]), "r-", linewidth=1.5)
    dff.arrangefig(xlabel="Probability", ylabel="Accumulation", title="Hist of {}".format(labels[ii]))
    plt.ylim(0, 1)
#     plt.yscale("log")
plt.tight_layout()

In [None]:
prob_threshold = 0.55
probs = model.predict_proba(X_test, verbose=0).T
inds = np.zeros_like(probs, dtype=bool)
for ii in range(0, probs.shape[0]):
    inds[ii] = probs[ii] >= prob_threshold
ind_sum = inds.sum(axis=0) > 0
print("<# of events over threshold>")
print("[high, lose, low]:", inds.sum(axis=1), 
      ",total:", ind_sum.sum(),
      ", percentage:{0:.2f}".format(ind_sum.sum()*100./len(X_test)))

# # テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test[ind_sum], y_test[ind_sum], verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

## 所見
volumeを加えても大した差はない。さて、どうしたものか。。。

# k値を変えてみる

In [None]:
"""ファイルの読み込み"""
fpath = "../../data/FXTF/USDJPY-cd1_20170806_k060.csv"
data = pd.read_csv(fpath, index_col=False)
data.head()

In [None]:
"""OHLC"""
opens = data["open"].as_matrix()
high = data["high"].as_matrix()
low = data["low"].as_matrix()
close = data["close"].as_matrix()
volume = data["volume"].as_matrix()

dclose = np.zeros_like(close)
dclose[1:] = np.diff(close)

"""Spread"""
s = 0.003

"""SMA"""
sma07 = data["sma07"].as_matrix()
sma13 = data["sma13"].as_matrix()
sma25 = data["sma25"].as_matrix()
sma = data[["sma07", "sma13", "sma25"]].as_matrix()
sma = sma.T

"""label"""
y_data = data[["label1","label2","label3"]].as_matrix()

"""Datetime"""
datetimes = []
datetime_fmt = "%Y.%m.%d %H:%M"
for date_t, time_t in zip(data["date"], data["time"]):
    datetime_str = date_t + " " + time_t
    datetime_t = datetime.datetime.strptime(datetime_str, datetime_fmt)
    datetimes.append(datetime_t)
datetimes = np.array([(datetimes[ii] - datetimes[0]).total_seconds()/60.**2 for ii in range(len(datetimes))])

In [None]:
n_seq = 3
opens_seq = ary2seqary(opens, n_seq)
high_seq = ary2seqary(high, n_seq)
low_seq = ary2seqary(low, n_seq)
close_seq = ary2seqary(close, n_seq)
volume_seq = ary2seqary(volume, n_seq)

sma07_seq = ary2seqary(sma07, n_seq)
sma13_seq = ary2seqary(sma13, n_seq)
sma25_seq = ary2seqary(sma25, n_seq)
sma_seq = np.vstack((sma07_seq, sma13_seq, sma25_seq))
y = y_data[n_seq-1:]

In [None]:
hlsmav = np.vstack((high_seq, low_seq, sma_seq, volume_seq))
hlsmav_mean = hlsmav.mean(axis=1)
hlsmav_norm = np.zeros_like(hlsmav)
for ii in range(hlsmav.shape[0]):
    hlsmav_norm[ii] = hlsmav[ii] - hlsmav_mean[ii]
    hlsmav_norm[ii] /= np.abs(hlsmav_norm[ii]).max()

X = hlsmav_norm.T

In [None]:
model2 = create_model(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=8)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# データで訓練 --- (※5)
hist = model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=800, shuffle=False,
    validation_split=0.1,
    callbacks=[EarlyStopping(monitor='val_loss', patience=2)],
    verbose=0)

# テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test, y_test, verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

In [None]:
"""確率の取得"""
probs = model.predict_proba(X_test, verbose=0).T
labels = ["high", "lose", "low"]

"""確率のヒストグラムの作成"""
xbins = np.arange(0, 1.0, 0.05)
hists = np.zeros((3, len(xbins)))
for ii in range(len(probs)):
    hists[ii, :-1], bins = np.histogram(probs[ii], bins=xbins)
    hists[ii] /= hists[ii].sum()

In [None]:
# 各ラベルに該当する確率
dff.makefig(18, 5)
for ii in range(len(probs)):
    plt.subplot(1,3,ii + 1)
    plt.plot(probs[ii], linewidth=1.2)
    dff.arrangefig(xlabel="Time index", ylabel="Probability", title="Probability of {}".format(labels[ii]))
    plt.ylim(0, 1)
plt.tight_layout()

# 各ラベルに該当する確率のヒストグラム
dff.makefig(18, 5)
dxbins = np.diff(xbins)[0]
for ii in range(len(hists)):
    plt.subplot(1,3,ii + 1)
    plt.bar(xbins, hists[ii], width=0.8*dxbins, hold="center", color="g")
    dff.arrangefig(ylabel="Frequency")
    ax2 = plt.gca().twinx()
    ax2.plot(xbins, 1.0 - np.cumsum(hists[ii]), "r-", linewidth=1.5)
    dff.arrangefig(xlabel="Probability", ylabel="Accumulation", title="Hist of {}".format(labels[ii]))
    plt.ylim(0, 1)
#     plt.yscale("log")
plt.tight_layout()

In [None]:
prob_threshold = 0.6
probs = model.predict_proba(X_test, verbose=0).T
inds = np.zeros_like(probs, dtype=bool)
for ii in range(0, probs.shape[0]):
    inds[ii] = probs[ii] >= prob_threshold
ind_sum = inds.sum(axis=0) > 0
print("<# of events over threshold>")
print("[high, lose, low]:", inds.sum(axis=1), 
      ",total:", ind_sum.sum(),
      ", percentage:{0:.2f}".format(ind_sum.sum()*100./len(X_test)))

# # テストデータを用いて評価する --- (※6)
score = model.evaluate(X_test[ind_sum], y_test[ind_sum], verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

In [None]:
prob_threshold = 0.6
probs = model.predict_proba(X, verbose=0).T
inds = np.zeros_like(probs, dtype=bool)
for ii in range(0, probs.shape[0]):
    inds[ii] = probs[ii] >= prob_threshold
ind_sum = inds.sum(axis=0) > 0
print("<# of events over threshold>")
print("[high, lose, low]:", inds.sum(axis=1), 
      ",total:", ind_sum.sum(),
      ", percentage:{0:.2f}".format(ind_sum.sum()*100./len(X)))

score = model.evaluate(X[ind_sum], y[ind_sum], verbose=0)
print('loss=', score[0])
print('accuracy=', score[1])

## 所見
k=5,30,60を用いて訓練した。   
どれも乱数の種に対して結果が安定しない。