In [2]:
import src.get_data as get_data
import src.load_data as load
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import datetime 
from dateutil import parser

from keras.callbacks import History 
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential, load_model, save_model

import time
from sklearn import metrics

import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams

%matplotlib inline

sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 14, 8


import plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)

WINDOW = 22


In [69]:
def variability(feature):
    w = 288
    
    length = int(len(feature)/w) + 1
    s = np.zeros(length)

    for j in np.arange (1,length+ 1 ):
        
        d = feature[j*w - w: j * w]

        for i in np.arange(1, d.shape[0]):
            s[j-1] += np.abs(d[i] - d[i-1])

    return s

def getViriabilityDataframe():
    
    #Сюда подцепить данные по ETH
    df = pd.read_csv('../../data/5_minutes_dump.csv', names= ['open', 'close','low', 'high', \
                                                              'volume', 'date_time', 'ex', 'typeBlockchain'])

    splitted = pd.DataFrame(df.date_time.str.split(' ',1).tolist(),
                                       columns = ['date','time'])

    df = df.join(splitted.date)
    df.index = df.date_time

    df = df.sort_index()
    df = df[df.index != df.index[1]]

    computed_variability = variability(df.volume.values) 
    variabilityDataframe= pd.DataFrame(computed_variability.T, index=[ parser.parse(x) for  x in df.date.unique()[1:]], columns=['variability'])
    variabilityDataframe.index.names = ['date']
    
    return variabilityDataframe

def getSearchDataframe(feature = 'ethereum'):
    searchRequest = pd.read_csv('../../data/multiTimeline_2.csv', names= ['date','blockchain', 'eth','ethereum','btc','bitcoin'])
    searchRequest = searchRequest[2:]

    searchRequest.index = [ parser.parse(x) for  x in searchRequest.date]
    searchRequest = searchRequest[[feature]]
    searchRequest = searchRequest.astype('int')
    #searchRequest.plot()

    unsampled = searchRequest.resample('D').interpolate(method='cubic')
    
    return unsampled

In [79]:
df = get_data.get_data_frame()
df.index = df.date

all_df = df[['close']].join(getSearchDataframe().join(getViriabilityDataframe())).dropna()
features = ['ethereum', 'variability']

x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

x = all_df[features].copy()    
y = all_df['close'].copy()

In [84]:
def nextDayPrediction(typeBlockchain, stock):    
    
    plot = True
    plotHictory = True
    interactiveGrapth = True
    plotForTrain = False
    
    df = get_data.get_data_frame()
    df.index = df.date

    all_df = df[['close']].join(getSearchDataframe().join(getViriabilityDataframe())).dropna()
    features = ['ethereum', 'variability']

    x_scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()

    x = all_df[features].copy()    
    y = all_df['close'].copy()
    
    #x = pd.ewma(x,2)
    #y = pd.ewma(y,2)
    
    NUM_FEATURES = x.shape[1]
    
    x[features] = x_scaler.fit_transform(x)

    y = y_scaler.fit_transform(y.values.reshape(-1, 1))
    #x['close'] = y
    
    #X_train, y_train = load.load_data(x, WINDOW, TrainTest = False)
    X_train, y_train, X_test, y_test = load.load_data(x, WINDOW, train_size= 0.96, TrainTest = True)
    
    model = build_model(input_shape=(WINDOW, NUM_FEATURES))
    
    print('START FIT MODEL...')
    
    start = time.time()
    
    history = History()
    history= model.fit(X_train, y_train, validation_data=(X_test, y_test),  batch_size=32, epochs=2,verbose=1,
              callbacks=[history])
    
    #model.fit(X_train, y_train, batch_size=32, epochs=500, verbose=1)
    end = time.time()

    print ('Learning time: ', end-start)
    
    today = time.strftime("_%d_%m_%Y")
    
    pathModel = "../../models/model_SEARCH_" + typeBlockchain + today +".h5"
    save_model(model, pathModel)
    
    #model = load_model(pathModel)
    # one day prediction. get last batch known data (now we didnt need in y value and can predict it)    
    lastbatch = np.array(x[-WINDOW:])
    pred = model.predict([lastbatch.reshape(1,22, NUM_FEATURES)])
    pred =  np.array(y_scaler.inverse_transform(pred)) # predicted value

    # now we make dataframe and create row names in date

    lastDate =str(df.date[df.last_valid_index()]).split('-')
    currentData = datetime.date(int(lastDate[0]),int(lastDate[1]),int(lastDate[2])) + datetime.timedelta(1)
    predictionDate = pd.date_range(currentData, periods=1)
    prediction = pd.DataFrame(pred, columns=["predictionPrice"], index = predictionDate.values)

    print (prediction)
    
    
    if plotForTrain:
        
        trainPredict = model.predict(X_train)
        trainPredict = y_scaler.inverse_transform(trainPredict)
        prices = df.close.values.astype('float32')
        prices = prices.reshape(len(prices), 1)
        trainPredictPlot = np.empty_like(prices)
        trainPredictPlot[:, :] = np.nan
        trainPredictPlot[WINDOW:len(trainPredict)+WINDOW, :] = trainPredict
        Actual = pd.DataFrame(prices, columns=["close"], index=df.index).close
        Training = pd.DataFrame(trainPredictPlot, columns=["close"], index=df.date).close
        ActualValues = go.Scatter( x = df.date, y = Actual, name = 'ActualValues')
        TrainingValues = go.Scatter( x = df.date, y = Training, name = 'TrainingValues')

        iplot([ActualValues,TrainingValues])
        plt.show()
        
    if plot:
        trainPredict = model.predict(X_train)
        testPredict = model.predict(X_test)

        trainPredict = y_scaler.inverse_transform(trainPredict)
        trainY = y_scaler.inverse_transform([y_train])

        testPredict = y_scaler.inverse_transform(testPredict)
        testY = y_scaler.inverse_transform([y_test])

        trainScore = metrics.mean_squared_error(trainY[0], trainPredict[:,0]) ** .5
        print('Train Score: %.2f RMSE' % (trainScore))

        testScore = metrics.mean_squared_error(testY[0], testPredict[:,0]) ** .5
        print('Test Score: %.2f RMSE' % (testScore))
        prices = df.close.values.astype('float32')
        prices = prices.reshape(len(prices), 1)
        trainPredictPlot = np.empty_like(prices)
        trainPredictPlot[:, :] = np.nan
        trainPredictPlot[WINDOW:len(trainPredict)+WINDOW, :] = trainPredict

        testPredictPlot = np.empty_like(prices)
        testPredictPlot[:, :] = np.nan
        testPredictPlot[(len(prices) - testPredict.shape[0]):len(prices), :] = testPredict

        plt.plot(pd.DataFrame(prices, columns=["close"], index=df.index).close, label='Actual')
        plt.plot(pd.DataFrame(trainPredictPlot, columns=["close"], index=df.index).close, label='Training')
        plt.plot(pd.DataFrame(testPredictPlot, columns=["close"], index=df.index).close, label='Testing')
        plt.legend(loc='best')
        plt.show()

        interactiveGrapth = 1
        if interactiveGrapth:

            Actual = pd.DataFrame(prices, columns=["close"], index=df.index).close
            Training = pd.DataFrame(trainPredictPlot, columns=["close"], index=df.date).close
            Testing = pd.DataFrame(testPredictPlot, columns=["close"], index=df.date).close

            ActualValues = go.Scatter( x = df.date, y = Actual, name = 'ActualValues')
            TrainingValues = go.Scatter( x = df.date, y = Training, name = 'TrainingValues')
            TestingValues = go.Scatter( x = df.date, y = Testing, name = 'PredictedValues')

            iplot([ActualValues,TrainingValues, TestingValues])
            plt.show()

        if plotHictory:

            plt.plot(history.history['loss'], label = 'TrainLoss')
            plt.plot(history.history['val_loss'], label = 'TestLoss')
            plt.legend()
            plt.show()

    return prediction

In [None]:
nextDayPrediction('USDT_ETH', 'poloniex')