In [1]:
from datetime import datetime
import gc
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sqlite3
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
from eli5.sklearn import PermutationImportance
import time
import eli5

import mpl_finance as mpf
from matplotlib import ticker
import matplotlib.dates as mdates
from pyti.exponential_moving_average import exponential_moving_average as ema
from pyti.moving_average_convergence_divergence import moving_average_convergence_divergence as macd
from pyti.simple_moving_average import simple_moving_average as sma
import talib as ta
from tslearn.clustering import KShape, TimeSeriesKMeans
from tslearn.preprocessing import TimeSeriesScalerMeanVariance

gc.enable()
pd.options.display.max_columns = None

# SQL

In [2]:
main = ['USD_JPY']
instruments = ['EUR_JPY', 'GBP_JPY', 'GBP_USD', 'EUR_USD', 'SGD_JPY', 'JP225_USD', 'US30_USD','NAS100_USD','SPX500_USD',
               'EU50_EUR','DE30_EUR','AU200_AUD','USB05Y_USD','USB10Y_USD','XAU_EUR','XAG_GBP', 'XAU_XAG', 'WHEAT_USD']
granularities = ['M5']
where = ['2014']

### Main

In [3]:
dbname = "C:/Users/goomo/Desktop/FX_ProtoType/DB/{}.db".format(main[0])
conn = sqlite3.connect(dbname)
cur = conn.cursor()
df =  pd.read_sql('SELECT * FROM {} WHERE Date > {} '.format(granularities[0], where[0]), conn)
cur.close()
conn.close()
df = df.drop(columns='index')

### Other

In [4]:
%%time
for i, instrument in enumerate(instruments):
    dbname = "C:/Users/goomo/Desktop/FX_ProtoType/DB/{}.db".format(instrument)
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()
    _df = pd.read_sql('SELECT * FROM {} WHERE Date > {} '.format(granularities[0], where[0]), conn)
    _df.columns = ["index", "Date", "{}_Open".format(instrument), 
                                    "{}_High".format(instrument), 
                                    "{}_Low".format(instrument), 
                                    "{}_Close".format(instrument),
                                    "{}_Volume".format(instrument)]
    cur.close()
    conn.close()
    
    _df = _df.drop(columns='index')
    _df['Date'] = pd.to_datetime(_df['Date'])
    
    if instrument  == 'USD_JPY':
        df_USDJPY = _df
    elif instrument == 'EUR_JPY':
        df_EURJPY = _df
    elif instrument == 'GBP_JPY':
        df_GBPJPY = _df
    elif instrument == 'GBP_USD':
        df_GBPUSD = _df
    elif instrument == 'EUR_USD':
        df_EURUSD = _df
    elif instrument == 'SGD_JPY':
        df_SGDJPY = _df
    elif instrument == 'JP225_USD':
        df_JP225  = _df
    elif instrument == 'US30_USD':
        df_US30   = _df
    elif instrument == 'NAS100_USD':
        df_NAS100 = _df
    elif instrument == 'SPX500_USD':
        df_SPX500 = _df
    elif instrument == 'EU50_EUR':
        df_EU50   = _df
    elif instrument == 'DE30_EUR':
        df_DE30   = _df
    elif instrument == 'AU200_AUD':
        df_AU200  = _df
    elif instrument == 'USB05Y_USD':
        df_USB05Y = _df
    elif instrument == 'USB10Y_USD':
        df_USB10Y = _df
    elif instrument == 'XAU_EUR':
        df_XAUEUR = _df
    elif instrument == 'XAG_GBP':
        df_XAGGBP = _df
    elif instrument == 'XAU_XAG':
        df_XAUXAG = _df
    elif instrument == 'WHEAT_USD':
        df_WHEAT = _df

del _df
gc.collect()

Wall time: 22.5 s


In [5]:
df.head(2)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2014-01-01 22:00:00,105.246,105.331,105.242,105.323,142
1,2014-01-01 22:05:00,105.324,105.372,105.321,105.365,116


# 求めるデータを作成

In [6]:
_df = df['Date'].str.split(" ", expand=True)
_df = pd.to_datetime(_df[0], format='%Y/%m/%d')
Date_key = pd.DataFrame({'DATE':_df})
df = pd.concat([df, Date_key],axis=1)

In [7]:
df['AFTER_2H'] = df['Close'].shift(-36)

In [None]:
DIFF_2H  = []
for i in range(len(df)):

    DIFF_2H.append((df['AFTER_2H'][i] - df['Close'][i]))

    
DIFF_2H = pd.DataFrame({'DIFF_AFTER_2H':DIFF_2H})

df = pd.concat([df, DIFF_2H],axis=1)

In [None]:
def Profit_2Classification(df, _DiffDay, window_len):    
    profit   = []
    for i in range(len(df) - window_len):
        if _DiffDay[i] > 0:
            profit.append('1')
        else:
            profit.append('0')
              
    return profit

In [None]:
windows = 24
profit = Profit_2Classification(df, df['DIFF_AFTER_2H'], windows)
PROFIT_2H  = pd.DataFrame({'PROFIT_2H':profit})
df = pd.concat([df, PROFIT_2H],axis=1)

In [None]:
df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,DATE,AFTER_2H,DIFF_AFTER_2H,PROFIT_2H
390135,2019-03-29 20:35:00,110.822,110.822,110.806,110.81,18,2019-03-29,,,
390136,2019-03-29 20:40:00,110.813,110.82,110.813,110.818,13,2019-03-29,,,
390137,2019-03-29 20:45:00,110.814,110.822,110.813,110.818,12,2019-03-29,,,
390138,2019-03-29 20:50:00,110.82,110.828,110.812,110.826,33,2019-03-29,,,
390139,2019-03-29 20:55:00,110.824,110.858,110.818,110.85,34,2019-03-29,,,


In [None]:
df = df[:-24]
df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,DATE,AFTER_2H,DIFF_AFTER_2H,PROFIT_2H
390111,2019-03-29 18:35:00,110.82,110.82,110.813,110.818,10,2019-03-29,,,0
390112,2019-03-29 18:40:00,110.821,110.824,110.8,110.802,35,2019-03-29,,,0
390113,2019-03-29 18:45:00,110.804,110.824,110.802,110.818,32,2019-03-29,,,0
390114,2019-03-29 18:50:00,110.815,110.858,110.815,110.838,66,2019-03-29,,,0
390115,2019-03-29 18:55:00,110.84,110.856,110.806,110.812,132,2019-03-29,,,0


# 階差・対数収益率

In [None]:
def Log_Diff(ts,window):    
    logDiff = np.log(ts) - np.log(ts.shift(window))     
    return logDiff

In [None]:
df['15m_O'] = df['Open'].diff(3)
df['30m_O'] = df['Open'].diff(6)
df['1H_O']  = df['Open'].diff(12)
df['4H_O']  = df['Open'].diff(12*4)
df['1D_O']  = df['Open'].diff(12*24)
df['1W_O']  = df['Open'].diff(12*24*5)
df['1M_O']  = df['Open'].diff(12*24*5*4)

df['15m_H'] = df['High'].diff(3)
df['30m_H'] = df['High'].diff(6)
df['1H_H']  = df['High'].diff(12)
df['4H_H']  = df['High'].diff(12*4)
df['1D_H']  = df['High'].diff(12*24)
df['1W_H']  = df['High'].diff(12*24*5)
df['1M_H']  = df['High'].diff(12*24*5*4)

df['15m_L'] = df['Low'].diff(3)
df['30m_L'] = df['Low'].diff(6)
df['1H_L']  = df['Low'].diff(12)
df['4H_L']  = df['Low'].diff(12*4)
df['1D_L']  = df['Low'].diff(12*24)
df['1W_L']  = df['Low'].diff(12*24*5)
df['1M_L']  = df['Low'].diff(12*24*5*4)

df['15m_C'] = df['Close'].diff(3)
df['30m_C'] = df['Close'].diff(6)
df['1H_C'] = df['Close'].diff(12)
df['4H_C'] = df['Close'].diff(12*4)
df['1D_C'] = df['Close'].diff(12*24)
df['1W_C'] = df['Close'].diff(12*24*5)
df['1M_C'] = df['Close'].diff(12*24*5*4)

In [None]:
df['15m_O_Log'] = Log_Diff(df['Open'],3)
df['30m_O_Log'] = Log_Diff(df['Open'],6)
df['1H_O_Log']  = Log_Diff(df['Open'],12)
df['4H_O_Log']  = Log_Diff(df['Open'],12*4)
df['1D_O_Log']  = Log_Diff(df['Open'],12*24)
df['1W_O_Log']  = Log_Diff(df['Open'],12*24*5)
df['1M_O_Log']  = Log_Diff(df['Open'],12*24*5*4)

df['30m_H_Log'] = Log_Diff(df['High'],6)
df['1H_H_Log']  = Log_Diff(df['High'],12)
df['4H_H_Log']  = Log_Diff(df['High'],12*4)
df['1D_H_Log']  = Log_Diff(df['High'],12*24)
df['1W_H_Log']  = Log_Diff(df['High'],12*24*5)
df['1M_H_Log']  = Log_Diff(df['High'],12*24*5*4)

df['15m_L_Log'] = Log_Diff(df['Low'],3)
df['30m_L_Log'] = Log_Diff(df['Low'],6)
df['1H_L_Log']  = Log_Diff(df['Low'],12)
df['4H_L_Log']  = Log_Diff(df['Low'],12*4)
df['1D_L_Log']  = Log_Diff(df['Low'],12*24)
df['1W_L_Log']  = Log_Diff(df['Low'],12*24*5)
df['1M_L_Log']  = Log_Diff(df['Low'],12*24*5*4)

df['15m_C_Log'] = Log_Diff(df['Close'],3)
df['30m_C_Log'] = Log_Diff(df['Close'],6)
df['1H_C_Log']  = Log_Diff(df['Close'],12)
df['4H_C_Log']  = Log_Diff(df['Close'],12*4)
df['1D_C_Log']  = Log_Diff(df['Close'],12*24)
df['1W_C_Log']  = Log_Diff(df['Close'],12*24*5)
df['1M_C_Log']  = Log_Diff(df['Close'],12*24*5*4)

# 日付

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
# 曜日(0=日曜日、1=月曜日)を数値で取得
df['Day of The Week'] = df['Date'].dt.dayofweek
df['Hour'] = df['Date'].dt.hour
#df['Minute'] = df['Date'].dt.minute
df['Week'] = df['Date'].dt.week

In [None]:
df['5m_mae'] = df['Close'].shift(1)
df['15m_mae']= df['Close'].shift(3)
df['30m_mae']= df['Close'].shift(6)
df['1H_mae'] = df['Close'].shift(12)
df['4H_mae'] = df['Close'].shift(12*4)
df['1D_mae'] = df['Close'].shift(12*24)
df['1W_mae'] = df['Close'].shift(12*24*5)
df['1M_mae'] = df['Close'].shift(12*24*5*4)

# テクニカル指標

In [None]:
class Technical_Index(object):
    """
    テクニカル指標のクラス
    """
    def __init__(self):
        pass
        
    # 単純移動平均(SMA: Simple Moving Average)
    def get_SMA(price_arr, period):
        print('SMA:',period)
        data = price_arr.values.tolist()
        SMA = sma(data, period)
        return SMA
    
    # 指数移動平均(EMA: Exponential Moving Average)
    def get_EMA(price_arr, period):
        print('EMA:',period)
        data = price_arr.values.tolist()
        EMA = ema(data, period)
        return EMA 
    
    # 指数移動平均(EMA: Exponential Moving Average)
    def get_MACD(price_arr, long_period, short_period):
        print('MACD:',long_period, short_period)
        data = price_arr.values.tolist()
        MACD = macd(data, long_period, short_period)
        return MACD 
    
    # ボリンジャーバンド(BB: Bollinger Bands)
    def get_BB(price_arr, period):
        print('BB:',period)
        mean = price_arr.rolling(period).mean()
        std  = price_arr.rolling(period).std()
        upp1  = mean + (std * 1)
        low1  = mean - (std * 1)
        upp2  = mean + (std * 2)
        low2  = mean - (std * 2)
        upp3  = mean + (std * 3)
        low3  = mean - (std * 3)
        return(mean, std, upp1, low1, upp2, low2, upp3, low3)

In [None]:
#SMAを計算
sma_200 = Technical_Index.get_SMA(df['Close'], period=200)
df['SMA_200'] = sma_200

sma_300 = Technical_Index.get_SMA(df['Close'], period=300)
df['SMA_300'] = sma_300


#EMAを計算
ema_150 = Technical_Index.get_EMA(df['Close'], period=150)
df['EMA_150'] = ema_150

ema_250 = Technical_Index.get_EMA(df['Close'], period=250)
df['EMA_250'] = ema_250

#SMAを計算
sma_25 = Technical_Index.get_SMA(df['Close'], period=25)
df['SMA_25'] = sma_25

#EMAを計算
ema_10 = Technical_Index.get_EMA(df['Close'], period=10)
df['EMA_10'] = ema_10

# MACDを計算
_macd = Technical_Index.get_MACD(df['Close'], long_period=26, short_period=12)
df['MACD'] = _macd

# BolongerBandを計算
BBand = Technical_Index.get_BB(df['Close'], period=25)
df['BBand_Mead'] = BBand[0]
df['BBand_Std']  = BBand[1]
df['BB_Upp1']    = BBand[2]
df['BB_Low1']    = BBand[3]
df['BB_Upp2']    = BBand[4]
df['BB_Low2']    = BBand[5]
df['BB_Upp3']    = BBand[6]
df['BB_Low3']    = BBand[7]

SMA: 200
SMA: 300
EMA: 150


In [None]:
#SMAを計算
sma_25 = Technical_Index.get_SMA(df['1D_mae'], period=25)
df['1D_maeSMA_25'] = sma_25

#EMAを計算
ema_10 = Technical_Index.get_EMA(df['1D_mae'], period=10)
df['1D_maeEMA_10'] = ema_10

# MACDを計算
_macd = Technical_Index.get_MACD(df['1D_mae'], long_period=26, short_period=12)
df['1D_maeMACD'] = _macd

# BolongerBandを計算
BBand = Technical_Index.get_BB(df['1D_mae'], period=25)
df['1D_maeBBand_Mead'] = BBand[0]
df['1D_maeBBand_Std']  = BBand[1]
df['1D_maeBB_Upp1']    = BBand[2]
df['1D_maeBB_Low1']    = BBand[3]
df['1D_maeBB_Upp2']    = BBand[4]
df['1D_maeBB_Low2']    = BBand[5]
df['1D_maeBB_Upp3']    = BBand[6]
df['1D_maeBB_Low3']    = BBand[7]

In [None]:
df = df.fillna(0)

# 外部要因

In [102]:
def get_other(_df, df_merge, instrument):
    
    print('start:',instrument)
    
    df_merge = pd.merge_asof(df_merge, _df, left_on='Date',
                         right_on='Date', by='Date')

    df_merge['{}_5m_mae'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].shift(1)
    df_merge['{}_15m_mae'.format(instrument)] = df_merge['{}_Close'.format(instrument)].shift(3)
    df_merge['{}_30m_mae'.format(instrument)] = df_merge['{}_Close'.format(instrument)].shift(6)
    df_merge['{}_1H_mae'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].shift(12)
    df_merge['{}_4H_mae'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].shift(12*4)
    df_merge['{}_1D_mae'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].shift(12*24)
    df_merge['{}_1W_mae'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].shift(12*24*5)
    df_merge['{}_1M_mae'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].shift(12*24*5*4)
    
    df_merge['{}_15m_C'.format(instrument)] = df_merge['{}_Close'.format(instrument)].diff(3)
    df_merge['{}_30m_C'.format(instrument)] = df_merge['{}_Close'.format(instrument)].diff(6)
    df_merge['{}_1H_C'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].diff(12)
    df_merge['{}_4H_C'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].diff(12*4)
    df_merge['{}_1D_C'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].diff(12*24)
    df_merge['{}_1W_C'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].diff(12*24*5)
    df_merge['{}_1M_C'.format(instrument)]  = df_merge['{}_Close'.format(instrument)].diff(12*24*5*4)

    df_merge['{}_15m_C_Log'.format(instrument)] = Log_Diff(df_merge['{}_Close'.format(instrument)],3)
    df_merge['{}_30m_C_Log'.format(instrument)] = Log_Diff(df_merge['{}_Close'.format(instrument)],6)
    df_merge['{}_1H_C_Log'.format(instrument)]  = Log_Diff(df_merge['{}_Close'.format(instrument)],12)
    df_merge['{}_4H_C_Log'.format(instrument)]  = Log_Diff(df_merge['{}_Close'.format(instrument)],12*4)
    df_merge['{}_1D_C_Log'.format(instrument)]  = Log_Diff(df_merge['{}_Close'.format(instrument)],12*24)
    df_merge['{}_1W_C_Log'.format(instrument)]  = Log_Diff(df_merge['{}_Close'.format(instrument)],12*24*5)
    df_merge['{}_1M_C_Log'.format(instrument)]  = Log_Diff(df_merge['{}_Close'.format(instrument)],12*24*5*4)
    
    #SMA
    #df_merge['{}_SMA_25'.format(instrument)] = Technical_Index.get_SMA(df_merge['{}_Close'.format(instrument)], period=25)
    #df_merge['{}_SMA_200'.format(instrument)] = Technical_Index.get_SMA(df_merge['{}_Close'.format(instrument)], period=200)
    #df_merge['{}_SMA_300'.format(instrument)] = Technical_Index.get_SMA(df_merge['{}_Close'.format(instrument)], period=300)
    
    #EMA
    #df_merge['{}_EMA_15'.format(instrument)] = Technical_Index.get_EMA(df_merge['{}_Close'.format(instrument)], period=15)
    df_merge['{}_EMA_150'.format(instrument)] = Technical_Index.get_EMA(df_merge['{}_Close'.format(instrument)], period=150)
    #df_merge['{}_EMA_250'.format(instrument)] = Technical_Index.get_EMA(df_merge['{}_Close'.format(instrument)], period=250)
      
    df_merge = df_merge.interpolate()
    
    return df_merge

In [103]:
print(df_EURJPY['Date'].dtype)
print(df['Date'].dtype)

datetime64[ns]
datetime64[ns]


In [104]:
%%time
df['Date'] = pd.to_datetime(df['Date'])

df = get_other(df_EURJPY, df, 'EUR_JPY')
df = get_other(df_GBPJPY, df, 'GBP_JPY')
df = get_other(df_GBPUSD, df, 'GBP_USD')
df = get_other(df_EURUSD, df, 'EUR_USD')
df = get_other(df_SGDJPY, df, 'SGD_JPY')
df = get_other(df_JP225,  df, 'JP225_USD')
df = get_other(df_US30,   df, 'US30_USD')
df = get_other(df_NAS100, df, 'NAS100_USD')
df = get_other(df_SPX500, df, 'SPX500_USD')
df = get_other(df_EU50,   df, 'EU50_EUR')
df = get_other(df_DE30,   df, 'DE30_EUR')
df = get_other(df_AU200,  df, 'AU200_AUD')
df = get_other(df_USB05Y, df, 'USB05Y_USD')
df = get_other(df_USB10Y, df, 'USB10Y_USD')
df = get_other(df_XAUEUR, df, 'XAU_EUR')
df = get_other(df_XAGGBP, df, 'XAG_GBP')
df = get_other(df_XAUXAG, df, 'XAU_XAG')
df = get_other(df_WHEAT,  df, 'WHEAT_USD')

df = df.interpolate()

Wall time: 5min 35s


In [106]:
del df_EURJPY
del df_GBPJPY
del df_GBPUSD
del df_EURUSD
del df_SGDJPY
del df_JP225
del df_US30
del df_NAS100
del df_SPX500
del df_EU50
del df_DE30
del df_AU200
del df_USB05Y
del df_USB10Y
del df_XAUEUR
del df_XAGGBP
del df_XAUXAG
del df_WHEAT
gc.collect()

139

In [None]:
df.isnull().sum()

In [None]:
print(df.shape)

In [None]:
df.tail(2)

In [None]:
df.columns.values

In [None]:
df1 = df[df['Date'] > '2016-01-04']

In [None]:
df1.head(2)

In [None]:
split_date = '2018/6/01 00:00:00'
train, test = df1[df1['Date'] < split_date], df1[df1['Date']>=split_date]

In [114]:
from sklearn.preprocessing import StandardScaler

XX = [ 'Open', 'High', 'Low', 'Close', 'Volume', '15m_O', '30m_O', '1H_O',
       '4H_O', '1D_O', '1W_O', '1M_O', '15m_H', '30m_H', '1H_H', '4H_H',
       '1D_H', '1W_H', '1M_H', '15m_L', '30m_L', '1H_L', '4H_L', '1D_L',
       '1W_L', '1M_L', '15m_C', '30m_C', '1H_C', '4H_C', '1D_C', '1W_C',
       '1M_C', '15m_O_Log', '30m_O_Log', '1H_O_Log', '4H_O_Log',
       '1D_O_Log', '1W_O_Log', '1M_O_Log', '30m_H_Log', '1H_H_Log',
       '4H_H_Log', '1D_H_Log', '1W_H_Log', '1M_H_Log', '15m_L_Log',
       '30m_L_Log', '1H_L_Log', '4H_L_Log', '1D_L_Log', '1W_L_Log',
       '1M_L_Log', '15m_C_Log', '30m_C_Log', '1H_C_Log', '4H_C_Log',
       '1D_C_Log', '1W_C_Log', '1M_C_Log', 'Year', 'Month', 'Day',
       'Day of The Week', 'Hour', 'Week', '5m_mae', '15m_mae', '30m_mae',
       '1H_mae', '4H_mae', '1D_mae', '1W_mae', '1M_mae', 'SMA_200',
       'SMA_300', 'EMA_150', 'EMA_250', 'SMA_25', 'EMA_10', 'MACD',
       'BBand_Mead', 'BBand_Std', 'BB_Upp1', 'BB_Low1', 'BB_Upp2',
       'BB_Low2', 'BB_Upp3', 'BB_Low3', '1D_maeSMA_25', '1D_maeEMA_10',
       '1D_maeMACD', '1D_maeBBand_Mead', '1D_maeBBand_Std',
       '1D_maeBB_Upp1', '1D_maeBB_Low1', '1D_maeBB_Upp2', '1D_maeBB_Low2',
       '1D_maeBB_Upp3', '1D_maeBB_Low3']

standardscaler = StandardScaler()
X_train = standardscaler.fit_transform(train.loc[:,XX])
X_test = standardscaler.fit_transform(test.loc[:,XX])

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [115]:
y_train = np.array(train.loc[:,['PROFIT_2H']],int)
y_test  = np.array(test.loc[:,['PROFIT_2H']],int)

In [116]:
yy=test.loc[:,XX]

In [117]:
print(X_train.shape)
print(X_train.dtype)
print(y_train.shape)
print(y_train.dtype)
print(X_test.shape)
print(y_test.shape)

(179830, 586)
float64
(179830, 1)
int32
(61466, 586)
(61466, 1)


In [118]:
del train
del test
gc.collect()

94

# Deeplearning

## DENSE

In [119]:
#モデルの定義
from keras import models
from keras import layers
from keras.layers import LeakyReLU, Dense, Dropout, BatchNormalization
from keras import regularizers
from keras.initializers import he_normal,lecun_normal,he_uniform

model = models.Sequential()
model.add(Dense(512, input_shape=(X_train.shape[1],), kernel_initializer=he_uniform()))
model.add(layers.LeakyReLU(alpha=0.4))
model.add(Dropout(0.3))
model.add(Dense(256,kernel_regularizer=regularizers.l2(1.)))
model.add(BatchNormalization())
model.add(layers.LeakyReLU(alpha=0.4))
model.add(Dropout(0.3))
model.add(Dense(128,kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.LeakyReLU(alpha=0.4))
model.add(Dropout(0.3))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer='adam', loss ='binary_crossentropy', metrics=['acc'])

model.summary()

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               300544    
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               131328    
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1024      
_________________________________________________________________
leaky_re_lu_2 (LeakyR

In [120]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.hdf5".format('Dense_2H_1')

checkpoint = ModelCheckpoint(weight_path, monitor='val_acc', verbose=1,
                             save_best_only=True, mode='max', save_weights_only = True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_acc', factor=0.8, patience=5,
                                   verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
callbacks_list = [checkpoint, reduceLROnPlat]



In [121]:
num_epochs = 100
batch_size = 256

#モデルをサイレントモードで適合
history = model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          epochs=num_epochs, batch_size=batch_size, verbose=1,
          callbacks=callbacks_list)

Instructions for updating:
Use tf.cast instead.
Train on 179830 samples, validate on 61466 samples
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.51149, saving model to Dense_2H_1_weights.hdf5
Epoch 2/100

Epoch 00002: val_acc improved from 0.51149 to 0.53226, saving model to Dense_2H_1_weights.hdf5
Epoch 3/100

Epoch 00003: val_acc improved from 0.53226 to 0.53273, saving model to Dense_2H_1_weights.hdf5
Epoch 4/100

Epoch 00004: val_acc did not improve from 0.53273
Epoch 5/100

Epoch 00005: val_acc did not improve from 0.53273
Epoch 6/100

Epoch 00006: val_acc did not improve from 0.53273
Epoch 7/100

Epoch 00007: val_acc did not improve from 0.53273
Epoch 8/100

Epoch 00008: val_acc did not improve from 0.53273

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.000800000037997961.
Epoch 9/100

Epoch 00009: val_acc improved from 0.53273 to 0.53548, saving model to Dense_2H_1_weights.hdf5
Epoch 10/100

Epoch 00010: val_acc did not improve from 0.53548
Epoch 11/100


KeyboardInterrupt: 

In [328]:
#from keras.models import load_weights
model.load_weights('Dense_2H_1_weights.hdf5')
X_predict = model.predict(X_test)
X_evaluate = model.evaluate(X_test,y_test)
print(X_evaluate)

[0.795475702083212, 0.5366706797273161]


## LSTM

In [130]:
window_len = 36        # 入力系列数
n_in = X_train.shape[1]   # 学習データ（＝入力）の列数
len_seq = X_train.shape[0] - window_len + 1
data = []

for i in range(0, len_seq):
    data.append(X_train[i:i+window_len, :])
x = np.array(data).reshape(len(data), window_len, n_in)
print(x.shape)

(179795, 36, 586)


In [131]:
window_len = 36            # 入力系列数
n_in = X_test.shape[1]   # 学習データ（＝入力）の列数
len_seq = X_test.shape[0] - window_len + 1
target = []

for i in range(0, len_seq):
    target.append(X_test[i:i+window_len, :])
t = np.array(target).reshape(len(target), window_len, n_in)

print(t.shape)

(61431, 36, 586)


In [132]:
yy_train = y_train[window_len -1 :x.shape[0] + window_len +1]
yy_test = y_test[window_len -1 :t.shape[0] + window_len +1]

## Simple LSTM

In [133]:
from keras import models
from keras import layers
from keras.layers import LeakyReLU, Dense
from keras import regularizers

model = models.Sequential()
model.add(BatchNormalization(input_shape=(x.shape[1], x.shape[2])))
#model.add(layers.GRU(32,activation='relu',
model.add(layers.LSTM(36,input_shape=(x.shape[1], x.shape[2]),kernel_regularizer=regularizers.l2(0.01),
                     dropout = 0.3, recurrent_dropout= 0.3, return_sequences = True))
model.add(layers.LeakyReLU(alpha=0.3))
#model.add(layers.GRU(32,kernel_regularizer=regularizers.l2(0.01),dropout = 0.3, recurrent_dropout= 0.3,return_sequences = False))
#model.add(layers.LeakyReLU(alpha=0.3))
model.add(layers.LSTM(36,input_shape=(x.shape[1], x.shape[2]),kernel_regularizer=regularizers.l2(0.01),
                     dropout = 0.3, recurrent_dropout= 0.3, return_sequences = False))
model.add(layers.LeakyReLU(alpha=0.3))
model.add(layers.Dense(18,kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.LeakyReLU(alpha=0.3))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'adam', 
              loss = 'binary_crossentropy', 
              metrics = ['accuracy'])

model.summary() 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_2 (Batch (None, 36, 586)           2344      
_________________________________________________________________
lstm_1 (LSTM)                (None, 36, 36)            89712     
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU)    (None, 36, 36)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 36)                10512     
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 36)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 18)                666       
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 18)                0         
__________

In [134]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('lstm_2h')

checkpoint = ModelCheckpoint(weight_path, monitor='val_acc', verbose=1,
                             save_best_only=True, mode='max', save_weights_only = True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_acc', factor=0.8, patience=7,
                                   verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
callbacks_list = [checkpoint, reduceLROnPlat]

In [135]:
num_epochs = 30
batch_size = 512

#モデルをサイレントモードで適合
history = model.fit(x, yy_train,
          validation_data=(t, yy_test),
          epochs=num_epochs, batch_size=batch_size, verbose=1,shuffle=True,
          callbacks=callbacks_list)

Train on 179795 samples, validate on 61431 samples
Epoch 1/30

Epoch 00001: val_acc improved from -inf to 0.51181, saving model to lstm_2h_weights.best.hdf5
Epoch 2/30

Epoch 00002: val_acc did not improve from 0.51181
Epoch 3/30
 30720/179795 [====>.........................] - ETA: 3:31 - loss: 0.6936 - acc: 0.5143

KeyboardInterrupt: 

In [None]:
#from keras.models import load_weights
model.load_weights('lstm_2h_weights.hdf5')
X_predict = model.predict(t)
X_evaluate = model.evaluate(t,yy_test)
print(X_evaluate)

## Bidirectional LSTM

In [136]:
from keras.models import Sequential
from keras.layers import BatchNormalization, Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed, GlobalAveragePooling1D
from keras import regularizers
from keras.layers import LeakyReLU

stroke_read_model = Sequential()
stroke_read_model.add(BatchNormalization(input_shape=(x.shape[1], x.shape[2])))
stroke_read_model.add(Conv1D(128, (5,)))
stroke_read_model.add(LeakyReLU(alpha=0.3))
stroke_read_model.add(Dropout(0.2))
stroke_read_model.add(Conv1D(256, (5,)))
stroke_read_model.add(LeakyReLU(alpha=0.3))
stroke_read_model.add(Dropout(0.2))
stroke_read_model.add(Conv1D(512, (3,)))
stroke_read_model.add(LeakyReLU(alpha=0.3))
stroke_read_model.add(Dropout(0.2))
stroke_read_model.add(Bidirectional(LSTM(256, dropout = 0.3, recurrent_dropout= 0.3,  return_sequences = True)))
stroke_read_model.add(Bidirectional(LSTM(256, dropout = 0.3, recurrent_dropout= 0.3, return_sequences = True)))
stroke_read_model.add(Bidirectional(LSTM(256, dropout = 0.3, kernel_regularizer=regularizers.l2(0.001),
                                         recurrent_dropout= 0.3, return_sequences = True)))
stroke_read_model.add(TimeDistributed(Dense(256)))
stroke_read_model.add(LeakyReLU(alpha=0.3))
stroke_read_model.add(GlobalAveragePooling1D())
stroke_read_model.add(Dense(64,kernel_regularizer=regularizers.l2(0.01)))
stroke_read_model.add(LeakyReLU(alpha=0.3))
stroke_read_model.add(Dropout(0.2))
stroke_read_model.add(Dense(1, activation = 'sigmoid'))
stroke_read_model.compile(optimizer = 'adam', 
                          loss = 'binary_crossentropy', 
                          metrics = ['accuracy'])
stroke_read_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_3 (Batch (None, 36, 586)           2344      
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 32, 128)           375168    
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 32, 128)           0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 32, 128)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 28, 256)           164096    
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 28, 256)           0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 28, 256)           0         
__________

In [137]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('lstm_bid_time2h')

checkpoint = ModelCheckpoint(weight_path, monitor='val_acc', verbose=1, 
                             save_best_only=True, mode='max', save_weights_only = True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_acc', factor=0.8, patience=5, 
                                   verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
callbacks_list = [checkpoint, reduceLROnPlat]

In [138]:
num_epochs = 30
history = stroke_read_model.fit(x, yy_train,
          validation_data=(t, yy_test),
          epochs=num_epochs, batch_size=512, shuffle= True,
          callbacks=callbacks_list)

Train on 179795 samples, validate on 61431 samples
Epoch 1/30
 30208/179795 [====>.........................] - ETA: 4:32 - loss: 1.7651 - acc: 0.5061

MemoryError: 

In [368]:
stroke_read_model.load_weights('lstm_bid_time2h_weights.best.hdf5')
X_predict =stroke_read_model.predict(t)
X_evaluate = stroke_read_model.evaluate(t,yy_test)
print(X_evaluate)

[0.6941858820172835, 0.5208004298488985]
