In [79]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential, load_model, save_model

import seaborn as sns
from pylab import rcParams
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
import time

%matplotlib inline

sns.set(style='whitegrid', palette='muted', font_scale=1.5)

rcParams['figure.figsize'] = 14, 8

WINDOW = 22

In [15]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model

In [25]:
df = pd.read_csv("data/24h_candle.csv", delimiter=',', index_col=0)
df.head()

Unnamed: 0_level_0,id,date_time,open,close,low,high,volume,pair_id
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,10362847,2015-08-01 07:00:00.000000,284.0,279.845023,275.724508,284.0,301.376392,83
2,10362848,2015-08-02 07:00:00.000000,280.0,286.438075,280.0,286.438075,106.563859,83
3,10362849,2015-08-03 07:00:00.000000,285.4383,288.965644,284.286275,288.965644,75.706794,83
4,10362850,2015-08-04 07:00:00.000000,281.104486,288.0,281.104486,288.0,11.947218,83
5,10362851,2015-08-05 07:00:00.000000,288.2,287.5698,279.793314,288.965425,9.297527,83


# Learn many feature

In [45]:
def load_data(X, seq_len, train_size=0.9):
    
    amount_of_features = 4 
    X_mat = X.as_matrix() 
    
    sequence_length = seq_len + 1 
    data = []
    

    for index in range(len(X_mat) - sequence_length):
        data.append(X_mat[index: index + sequence_length])
    
    data = np.array(data)
    train_split = int(round(train_size * data.shape[0]))
    train_data = data[:train_split, :]
    
    x_train = train_data[:, :-1]
    y_train = train_data[:, -1][:,-1]
    
    x_test = data[train_split:, :-1] 
    y_test = data[train_split:, -1][:,-1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], amount_of_features))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], amount_of_features))  

    return x_train, y_train, x_test, y_test


def build_model(input_shape):
    d = 0.2
    model = Sequential()
    
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(d))
        
    model.add(LSTM(128, input_shape=input_shape, return_sequences=False))
    model.add(Dropout(d))
        
    model.add(Dense(32,kernel_initializer="normal",activation='relu'))        
    model.add(Dense(1,kernel_initializer="normal",activation='linear'))
    
    model.compile(loss='mse',optimizer='adam', metrics=['accuracy'])
    
    return model

def modelPredictFeature(feature, df):
    
    data = df.copy()
    featurelist = ['open', 'close', 'low', 'high']
    featurelist.remove(feature)
    
    x_scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()

    data.drop(['id'], axis=1, inplace=True)

    x = data[featurelist].copy()
    y = data[feature].copy()

    x[featurelist] = x_scaler.fit_transform(x)
    y = y_scaler.fit_transform(y.values.reshape(-1, 1))
    
    X_mat = x.as_matrix()
    

    
    x[feature] = y
    
    X_train, y_train, X_test, y_test = load_data(x, WINDOW)
    
    model = build_model(input_shape=(WINDOW, 4))
    
    """
    check if you want visualise NN structure
    """
    # SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg')) 
    
    print('START FIT MODEL...')

    model.fit(X_train, y_train, batch_size=32, epochs=500,
              verbose=0)
    
    save_model(model, "model_btc_" + feature + ".h5")
    return X_train, y_train, X_test, y_test, model

In [46]:
X_train_Close, y_train_Close, X_test_Close, y_test_Close, model_Close = modelPredictFeature('close', df)

START FIT MODEL...


In [90]:
d = pd.DataFrame(X_train_Close)

ValueError: Must pass 2-d input

In [101]:
def saveFeatureVector(X_train, y_train, X_test, y_test, feature, directory):
    np.save(directory + "/X_train_" + feature, X_train)
    np.save(directory + "/y_train_" + feature, y_train)
    np.save(directory + "/X_test_" + feature, X_test)
    np.save(directory + "/y_test_" + feature, y_test)    


In [103]:
saveFeatureVector(X_train_Close, y_train_Close, X_test_Close, y_test_Close, 'close', './computedFeature')

In [80]:
import plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)
    
# make visualisation 
def makePrediction(X_train, y_train, X_test, y_test, model, data):
    
    frame_data = pd.DataFrame(df.date_time.str.split(' ',1).tolist(), columns = ['date','time'],index = df.index)

    y_scaler = MinMaxScaler()

    y = df['close'].copy()
    y = y_scaler.fit_transform(y.values.reshape(-1, 1))   

    trainPredict = model.predict(X_train)
    testPredict = model.predict(X_test)
    trainPredict = y_scaler.inverse_transform(trainPredict)
    
    trainY = y_scaler.inverse_transform([y_train])
    testPredict = y_scaler.inverse_transform(testPredict)
    testY = y_scaler.inverse_transform([y_test])
    
    trainScore = metrics.mean_squared_error(trainY[0], trainPredict[:,0]) ** .5
    print('Train Score: %.2f RMSE' % (trainScore))
    testScore = metrics.mean_squared_error(testY[0], testPredict[:,0]) ** .5
    print('Test Score: %.2f RMSE' % (testScore))
    
    all_df = data.copy()
    prices = all_df.close.values.astype('float32')
    prices = prices.reshape(len(prices), 1)
    
    trainPredictPlot = np.empty_like(prices)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[WINDOW:len(trainPredict)+WINDOW, :] = trainPredict

    testPredictPlot = np.empty_like(prices)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[(len(prices) - testPredict.shape[0])-1:len(prices)-1, :] = testPredict
    

    Actual = pd.DataFrame(prices, columns=["close"], index=all_df.index).close
    Training = pd.DataFrame(trainPredictPlot, columns=["close"], index=all_df.index).close
    Testing = pd.DataFrame(testPredictPlot, columns=["close"], index=all_df.index).close

    ActualValues = go.Scatter( x = frame_data.date, y = Actual, name = 'ActualValues')
    TrainingValues = go.Scatter( x = frame_data.date, y = Training, name = 'TrainingValues')
    TestingValues = go.Scatter( x = frame_data.date, y = Testing, name = 'PredictedValues')

    iplot([ActualValues,TrainingValues, TestingValues])

In [81]:
# Предсказываем стоимость с учетом новых поправок

makePrediction(X_train_Close, y_train_Close, X_test_Close, y_test_Close, model_Close,df)

Train Score: 21.45 RMSE
Test Score: 178.85 RMSE


# Prediction in real-time

In [None]:
def getPrediction (lastBatch, model):
    
    pred_close = model_Close.predict(lastBatch)
    pred_open = model_Open.predict(lastBatch)
    predict_high = model_High.predict(lastBatch)
    predict_low = model_Low.predict(lastBatch)
    
    return np.array([pred_open, pred_close, predict_low, predict_high])


def next_prediction(lastDataFrame,  model):
    
    new_prediction = getPrediction(lastDataFrame, model) 
    new_frame = np.vstack([lastDataFrame[1:], new_prediction])
    model.train on batch(new_frame.reshape(1,22,4))
    
    return model,new_prediction, new_frame


def makePredictionToDay(numbersofDay, lastDataFrame, model):
    
    prediction_vector = []
    
    for i in np.arange(numbersofDay):
        model, new_prediction, lastDataFrame = next_prediction(lastDataFrame, prediction_vector, model)
        prediction_vector.append(new_prediction)
    
    return prediction_vector

In [None]:
l_df = [] # последний фрейм из тренировочного набора

makePredictionToDay(5, l_df, model_close)


In [None]:
X_train_Open, y_train_Open, X_test_Open, y_test_Open, model_Open = modelPredictFeature('open', df)

In [None]:
X_train_High, y_train_High, X_test_High, y_test_High, model_High = modelPredictFeature('high', df)

In [None]:
X_train_Low, y_train_Low, X_test_Low, y_test_Low, model_Low = modelPredictFeature('low', df)