In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential, load_model, save_model
import datetime 

import seaborn as sns
from pylab import rcParams
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
import time

import plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

import sqlalchemy

init_notebook_mode(connected=True)

%matplotlib inline

sns.set(style='whitegrid', palette='muted', font_scale=1.5)

rcParams['figure.figsize'] = 14, 8

RANDOM_SEED = 42

WINDOW = 22

Using TensorFlow backend.


In [2]:
"""
Парсинг данных

"""
def connect(user, password, db, host: str, port: int, echo=True):
    url = 'postgresql+psycopg2://{}:{}@{}:{}/{}'
    url = url.format(user, password, host, port, db)
    eng = sqlalchemy.create_engine(url, client_encoding='utf8', echo=echo)
    meta = sqlalchemy.MetaData(bind=eng)

    return eng, meta


def get_data_frame(pair: str = 'USDT_BTC') -> pd.DataFrame:
    """Метод стягивания данных из базы в датафрейм.
    По умолчанию тянет все значения в базе для валютной пары доллар биткоин.
    Список спаршенных пар смотри в таблице Pair
    Цепляться будет отовсюду где есть инетрнет"""
    engine, meta = connect(user='postgres', password='password', db='btccandles', host='176.212.125.30', port=16432)
    df = pd.read_sql_query(
        'select date, time, open, close, low, high, volume, pair."name" from candlestick, pair '
        'where candlestick.pair_id=pair.id and pair."name"=\'' + pair + '\';',
        con=engine)
    return df

"""
Загрузка данных
"""
def load_data(X, seq_len, train_size=1):
    # Определяем число фич
    amount_of_features = X.shape[1] 
    
    sequence_length = seq_len + 1 
    data = []
    
    # Бьем тренировочные данные на блоки по размеру окна
    for index in range(len(X) - sequence_length):
        data.append(X[index: index + sequence_length])
    
    data = np.array(data)
    train_split = int(round(train_size * data.shape[0]))
    train_data = data[:train_split, :]
    
    x_train = train_data[:, :-1]
    y_train = train_data[:, -1][:,-1]
    
    # Меняем размер входного фрейма на [dim, window, featureNumber]
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], amount_of_features))
    return x_train, y_train

"""
Собираем LSTM
"""

def build_model(input_shape):
    d = 0.2
    model = Sequential()
    
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(d))
        
    model.add(LSTM(128, input_shape=input_shape, return_sequences=False))
    model.add(Dropout(d))
        
    model.add(Dense(32,kernel_initializer="normal",activation='relu'))        
    model.add(Dense(1,kernel_initializer="normal",activation='linear'))
    
    model.compile(loss='mse',optimizer='adam', metrics=['accuracy'])
    
    return model

def predictNumDay(num, pathModel, data, scaler):
    m = load_model(pathModel)
    
    prediction = []
    lastbatch = (data[-WINDOW:])
    for i in np.arange(num):    
        res = m.predict([lastbatch.reshape(1,22, 1)])
        prediction.append(scaler.inverse_transform(res))
        lastbatch = np.concatenate([lastbatch[1:],res])
        m.train_on_batch(lastbatch.reshape(1,22,1), res)
           
    return np.array(prediction).reshape(num)   

In [5]:
"""
typeBlockchain:
USDT_BTC
USDT_LTC
USDT_ETH
USDT_ETC
USDT_XRP
"""

def nextDayPrediction(typeBlockchain, N = 1):    
    
    df = get_data_frame(typeBlockchain)

    scaler = MinMaxScaler()


    x = df['close'].copy()
    y = df['close'].copy()

    x = scaler.fit_transform(x.values.reshape(-1, 1))

    y = scaler.fit_transform(y.values.reshape(-1, 1))
    
    X_train, y_train = load_data(x, WINDOW)
    
    #print (X_train.shape, y_train.shape)
    
    model = build_model(input_shape=(WINDOW, 1))
    
    print('START FIT MODEL...')
    
    start = time.time()
    #model.fit(X_train, y_train, batch_size=32, epochs=500,
    #          verbose=0)
    end = time.time()

    print ('Learning time: ', end-start)
    
    today = time.strftime("_%d_%m_%Y")
    
    pathModel = "../models/model_1f_" + typeBlockchain + today +".h5"
    #save_model(model, pathModel)
    
    model = load_model(pathModel)
    
    trainPredict = model.predict(X_train)
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([y_train])

    trainScore = metrics.mean_squared_error(trainY[0], trainPredict[:,0]) ** .5
    print('Train Score: %.2f RMSE' % (trainScore))
    
    prices = df.close.values.astype('float32')
    prices = prices.reshape(len(prices), 1)
    
    trainPredictPlot = np.empty_like(prices)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[WINDOW:len(trainPredict)+WINDOW, :] = trainPredict

    """
    
    plt.plot(pd.DataFrame(prices, columns=["close"], index=df.index).close, label='Actual')
    plt.plot(pd.DataFrame(trainPredictPlot, columns=["close"], index=df.index).close, label='Training')
    plt.legend(loc='best')
    plt.show()
    
    """
    
    lastDate =str(df.date[df.last_valid_index()]).split('-')
    currentData = datetime.date(int(lastDate[0]),int(lastDate[1]),int(lastDate[2])+1)
    predictionDate = pd.date_range(currentData,periods=N)
    predictNday =  (predictNumDay(N, pathModel, x, scaler))

    prediction = pd.DataFrame(predictNday, columns=["predictionPrice"], index = predictionDate.values)
    
    Actual = pd.DataFrame(prices, columns=["close"], index=df.date).close
    Training = pd.DataFrame(trainPredictPlot, columns=["close"], index=df.date).close
    pred = pd.DataFrame(trainPredictPlot, columns=["close"], index=df.date).close

    """ActualValues = go.Scatter( x = df.date, y = Actual, name = 'ActualValues')
    TrainingValues = go.Scatter( x = df.date, y = Training, name = 'TrainingValues')

    iplot([ActualValues,TrainingValues])
    
    our_Predict = go.Scatter( x = prediction.index, y = prediction.predictionPrice, name = 'Next5DayValues')

    iplot([our_Predict, ActualValues])"""
    
    return prediction

In [6]:
USDT_BTC = nextDayPrediction('USDT_BTC', N = 1)

2017-07-19 20:29:06,229 INFO sqlalchemy.engine.base.Engine select version()
2017-07-19 20:29:06,230 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:06,231 INFO sqlalchemy.engine.base.Engine select current_schema()
2017-07-19 20:29:06,232 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:06,234 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:29:06,234 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:06,235 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:29:06,236 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:06,237 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2017-07-19 20:29:06,237 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:06,239 INFO sqlalchemy.engine.base.Engine select date, time, open, close, low, high, volume, pair."name" from candlestick, pair where candlestick.pair_id=pair.id and pair."name"='USDT_

In [7]:
USDT_LTC = nextDayPrediction('USDT_LTC', N = 1)

2017-07-19 20:29:12,423 INFO sqlalchemy.engine.base.Engine select version()
2017-07-19 20:29:12,423 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:12,425 INFO sqlalchemy.engine.base.Engine select current_schema()
2017-07-19 20:29:12,425 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:12,427 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:29:12,427 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:12,428 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:29:12,429 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:12,430 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2017-07-19 20:29:12,431 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:12,432 INFO sqlalchemy.engine.base.Engine select date, time, open, close, low, high, volume, pair."name" from candlestick, pair where candlestick.pair_id=pair.id and pair."name"='USDT_

In [8]:
USDT_ETH = nextDayPrediction('USDT_ETH', N = 1)

2017-07-19 20:29:20,348 INFO sqlalchemy.engine.base.Engine select version()
2017-07-19 20:29:20,349 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:20,351 INFO sqlalchemy.engine.base.Engine select current_schema()
2017-07-19 20:29:20,352 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:20,354 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:29:20,354 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:20,355 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:29:20,356 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:20,357 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2017-07-19 20:29:20,358 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:20,359 INFO sqlalchemy.engine.base.Engine select date, time, open, close, low, high, volume, pair."name" from candlestick, pair where candlestick.pair_id=pair.id and pair."name"='USDT_

In [9]:
USDT_ETC = nextDayPrediction('USDT_ETC', N = 1)

2017-07-19 20:29:29,827 INFO sqlalchemy.engine.base.Engine select version()
2017-07-19 20:29:29,827 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:29,828 INFO sqlalchemy.engine.base.Engine select current_schema()
2017-07-19 20:29:29,829 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:29,831 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:29:29,831 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:29,832 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:29:29,833 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:29,834 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2017-07-19 20:29:29,834 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:29:29,835 INFO sqlalchemy.engine.base.Engine select date, time, open, close, low, high, volume, pair."name" from candlestick, pair where candlestick.pair_id=pair.id and pair."name"='USDT_

In [4]:
USDT_XRP = nextDayPrediction('USDT_XRP', N = 1)

2017-07-19 20:19:41,280 INFO sqlalchemy.engine.base.Engine select version()
2017-07-19 20:19:41,281 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:19:41,285 INFO sqlalchemy.engine.base.Engine select current_schema()
2017-07-19 20:19:41,286 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:19:41,288 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:19:41,289 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:19:41,290 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-07-19 20:19:41,291 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:19:41,292 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2017-07-19 20:19:41,292 INFO sqlalchemy.engine.base.Engine {}
2017-07-19 20:19:41,294 INFO sqlalchemy.engine.base.Engine select date, time, open, close, low, high, volume, pair."name" from candlestick, pair where candlestick.pair_id=pair.id and pair."name"='USDT_

In [10]:
print ('USDT_BTC:\n', USDT_BTC)
print ('USDT_LTC:\n', USDT_LTC)
print ('USDT_ETH:\n', USDT_ETH)
print ('USDT_ETC:\n', USDT_ETC)
print ('USDT_XRP:\n', USDT_XRP)

USDT_BTC:
             predictionPrice
2017-07-20       2374.87915
USDT_LTC:
             predictionPrice
2017-07-20        47.055084
USDT_ETH:
             predictionPrice
2017-07-20       217.862534
USDT_ETC:
             predictionPrice
2017-07-20        16.678938
USDT_XRP:
             predictionPrice
2017-07-20          0.18293
