In [7]:
from binance.client import Client
import numpy as np
import plotly as py
import plotly.graph_objs as go
from filterpy.kalman import KalmanFilter
from copy import deepcopy
import datetime
import os.path
import collections
import glob

py.offline.init_notebook_mode(connected=True)

ModuleNotFoundError: No module named 'binance'

# Data loader

In [2]:
def isStrFloat(string):
    try:
        float(string)
        return True
    except:
        return False

class DataLoader:
    __rawDatadict = collections.OrderedDict([
        ('open'                  , 1),
        ('high'                  , 2),
        ('low'                   , 3),
        ('close'                 , 4),
        ('volume'                , 5),
        ('quoteAssetVolume'      , 7),
        ('tradeNum'              , 8),
        ('takerBaseAssetVolume'  , 9),
        ('takerQouteAssetVolume' , 10)
                                ])
    
    def __init__(self, klineHistoryDir, startDate=None, interval=1, verbose=False):
        self.__klineHistoryDir = klineHistoryDir
        self.__loadPoint = {}
        self.__validSeqList = []
        
        klineHistoryFileList = []
        for file in os.listdir(klineHistoryDir):
            if file.endswith('.npy'):
                klineHistoryFileList.append(file)
        klineHistoryFileList = sorted(
            klineHistoryFileList,
            key=lambda x: datetime.datetime.strptime(x, '%d-%b-%Y.npy')
        )
        
        # check and record valid sequence in case of corrupted data
        self.__dataLength = 0
        
        # start from a dummy datum
        thisCandleStick = ['0'] * len(self.__rawDatadict)
        seqStart = 0
        for klineHistoryFile in klineHistoryFileList:
            tmpKline = np.load(klineHistoryDir + klineHistoryFile)
            tmpKlineLength = len(tmpKline)
            if verbose:
                print(klineHistoryFile, tmpKlineLength)
            
            for i in range(tmpKlineLength - 1):
                lastCandleStick = thisCandleStick
                thisCandleStick = tmpKline[i, :]

                if self.isNewSeq(lastCandleStick, thisCandleStick, interval):
                    self.__validSeqList.append([seqStart, self.__dataLength + i - 1])
                    seqStart = self.__dataLength + i
                    
            # drop the last datum because it's the same as the first one of the next klineHistoryFile
            self.__loadPoint[self.__dataLength] = klineHistoryFile
            self.__dataLength += tmpKlineLength - 1 
                    
        self.__validSeqList = self.__validSeqList[1:]
        self.__validSeqList.append([seqStart, self.__dataLength - 1])
        
    def readData(self, batchSize, timeStep):
        self.__batchSize = batchSize
        self.__timeStep = timeStep
        self.__subseqNum = 0
        count = 0
        for [seqStart, seqEnd] in self.__validSeqList:
            if seqEnd - seqStart >= timeStep:
                self.__subseqNum += seqEnd - seqStart - timeStep + 1
                count += seqEnd - seqStart + 1
        self.__batchNum = self.__subseqNum // batchSize    
        
        self.__inputDim = len(self.__rawDatadict.keys())
        data = np.zeros([count, self.__inputDim])
        
        # [fileName, start, end]
        count = 0
        self.__lastSubseqStart = []
        for [seqStart, seqEnd] in self.__validSeqList: 
            if seqEnd - seqStart >= timeStep:
                for i in self.__readSchdule(seqStart, seqEnd):
                    tmpKline = np.load(self.__klineHistoryDir + i[0])
                    tmpKline = tmpKline[i[1] : i[2], :]
                    for candleStick in tmpKline:
                        tmpData = np.array([
                            float(candleStick[self.__rawDatadict[key]])
                            for key in self.__rawDatadict.keys()
                        ])
                        
                        data[count, :] = tmpData
                        count += 1
                
                self.__lastSubseqStart.append(count - timeStep - 1)
        
        self.__average = np.average(data, axis=0)
        data = data - self.__average
        self.__max = np.abs(data).max(axis=0)
        data /= self.__max
        self.__data = data
    
    def __getNextSubseqPtr(self, subseqPtr):
        if subseqPtr in self.__lastSubseqStart:
            subseqPtr += self.__timeStep + 1
        else:
            subseqPtr += 1
        return subseqPtr
    
    def separateTestBatch(self, ratio):
        self.__testBatchNum = int(np.ceil(self.__batchNum * ratio))
        self.__trainBatchNum = self.__batchNum - self.__testBatchNum
        print('Total {0:d} batch(es) separated into {1:d} train batch(es) and {2:d} test batch(es)'.format(
            self.__batchNum, self.__trainBatchNum, self.__testBatchNum) 
              )
        
        self.__trainSubseqPtr = 0
        subseqPtr = 0
        count = 0
        while(count < self.__trainBatchNum * self.__batchSize):
            subseqPtr = self.__getNextSubseqPtr(subseqPtr)
            count += 1
        self.__trainSubseqPtr = 0
        self.__trainSubseqPtrEnd = subseqPtr
        self.__testSubseqPtr = self.__testSubseqPtrStart = self.__getNextSubseqPtr(subseqPtr)
    
    def getNextTrainBatch(self):
        x = np.zeros((self.__batchSize, self.__timeStep, self.__inputDim))
        y = np.zeros((self.__batchSize, self.__inputDim))
        
        for i in range(self.__batchSize):
            x[i, :, :] = self.__data[self.__trainSubseqPtr : self.__trainSubseqPtr + self.__timeStep, :] 
            y[i, :] = self.__data[self.__trainSubseqPtr + self.__timeStep + 1, :]
            self.__trainSubseqPtr = self.__getNextSubseqPtr(self.__trainSubseqPtr)
        if self.__trainSubseqPtr > self.__trainSubseqPtrEnd:
            self.__trainSubseqPtr = 0
        return x, np.reshape(np.average(y[:, [0, 3]], axis=1), (self.__batchSize, 1))
    
    def resetTrainBatchPtr(self):
        self._trainBatchPtr = 0
        
    def getNextTestBatch(self):
        x = np.zeros((self.__batchSize, self.__timeStep, self.__inputDim))
        y = np.zeros((self.__batchSize, self.__inputDim))
        
        for i in range(self.__batchSize):
            x[i, :, :] = self.__data[self.__testSubseqPtr : self.__testSubseqPtr + self.__timeStep, :] 
            y[i, :] = self.__data[self.__testSubseqPtr + self.__timeStep + 1, :]
            self.__testSubseqPtr = self.__getNextSubseqPtr(self.__testSubseqPtr)
        if self.__testSubseqPtr > self.__batchNum * self.__batchSize:
            self.__testSubseqPtr = self.__testSubseqPtrStart
        return x, np.reshape(np.average(y[:, [0, 3]], axis=1), (self.__batchSize, 1))
    
    def resetTestBatchPtr(self):
        self.__testBatchPtr = self.__testSubseqPtrStart
        
    def getTestData(self):
        return np.average(self.__data[self.__trainBatchNum :, :, :, [1, 2]], axis=3)
            
    def __readSchdule(self, readStart, readEnd):
        loadPoint = sorted(self.__loadPoint.keys())
        for i in range(len(loadPoint)):
            if loadPoint[i] > readStart:
                break
        start = loadPoint[i - 1]
        startFile = self.__loadPoint[start]

        tmp = [] 
        for i in loadPoint:
            if i > readStart and i <= readEnd:
                tmp.append(i)

        readSchedule = []
        if len(tmp) > 0:
            readSchedule.append([startFile, readStart - start, tmp[0] - start])
            for i in range(len(tmp) - 1):
                readSchedule.append([self.__loadPoint[tmp[i]], 0, tmp[i + 1] - tmp[i]])
            readSchedule.append([self.__loadPoint[tmp[-1]], 0, readEnd - tmp[-1] + 1])
        else:
            readSchedule.append([startFile, readStart - start, readEnd - start + 1])
            
        return readSchedule
    
    def normalize(self, data):
        return (data - self.__average) / self.__max
    
    def invNormalize(self, data, select=[1, 2]):
        return data * self.__max[select] + self.__average[select]

    @property
    def totalBatchNum(self):
        return self.__batchNum
    
    @property
    def testSubseqPtr(self):
        return self.__testSubseqPtr
    
    @property
    def trainSubseqPtr(self):
        return self.__trainSubseqPtr
    
    @property
    def trainBatchNum(self):
        return self.__trainBatchNum
    
    @property
    def testBatchNum(self):
        return self.__testBatchNum
    
    @property
    def validSeqList(self):
        return self.__validSeqList
    
    @property
    def loadPoint(self):
        return self.__loadPoint
    
    @property
    def inputDim(self):
        return self.__inputDim
    
    @property
    def dataLength(self):
        return self.__dataLength
    
    @property
    def batchSize(self):
        return self.__batchSize
    
    @property
    def timeStep(self):
        return self.__timeStep
    
    @property
    def average(self):
        return self.__average
    
    @property
    def max(self):
        return self.__max
        
    def isNewSeq(self, lastCandleStick, thisCandleStick, interval):
        if int(thisCandleStick[0]) - int(lastCandleStick[0]) == 60000 * interval:
            return False
        else:
            return True
        
    

In [None]:
def isStrFloat(string):
    try:
        float(string)
        return True
    except:
        return False

class DataLoader:
    __rawDatadict = collections.OrderedDict([
        ('open'                  , 1),
        ('high'                  , 2),
        ('low'                   , 3),
        ('close'                 , 4),
        ('volume'                , 5),
        ('quoteAssetVolume'      , 7),
        ('tradeNum'              , 8),
        ('takerBaseAssetVolume'  , 9),
        ('takerQouteAssetVolume' , 10)
                                ])
    
    def __init__(self, klineHistoryDir, startDate=None, interval=1, verbose=False):
        self.__klineHistoryDir = klineHistoryDir
        self.__loadPoint = {}
        self.__validSeqList = []
        
        klineHistoryFileList = []
        for file in os.listdir(klineHistoryDir):
            if file.endswith('.npy'):
                klineHistoryFileList.append(file)
        klineHistoryFileList = sorted(
            klineHistoryFileList,
            key=lambda x: datetime.datetime.strptime(x, '%d-%b-%Y.npy')
        )
        
        # check and record valid sequence in case of corrupted data
        self.__dataLength = 0
        
        # start from a dummy datum
        thisCandleStick = ['0'] * len(self.__rawDatadict)
        seqStart = 0
        for klineHistoryFile in klineHistoryFileList:
            tmpKline = np.load(klineHistoryDir + klineHistoryFile)
            tmpKlineLength = len(tmpKline)
            if verbose:
                print(klineHistoryFile, tmpKlineLength)
            
            for i in range(tmpKlineLength - 1):
                lastCandleStick = thisCandleStick
                thisCandleStick = tmpKline[i, :]

                if self.isNewSeq(lastCandleStick, thisCandleStick, interval):
                    self.__validSeqList.append([seqStart, self.__dataLength + i - 1])
                    seqStart = self.__dataLength + i
                    
            # drop the last datum because it's the same as the first one of the next klineHistoryFile
            self.__loadPoint[self.__dataLength] = klineHistoryFile
            self.__dataLength += tmpKlineLength - 1 
                    
        self.__validSeqList = self.__validSeqList[1:]
        self.__validSeqList.append([seqStart, self.__dataLength - 1])
        
    def readData(self, batchSize, timeStep, shift=1):
        self.__batchSize = batchSize
        self.__timeStep = timeStep
        self.__subseqNum = 0
        self.__shift = shift
        count = 0
        for [seqStart, seqEnd] in self.__validSeqList:
            if seqEnd - seqStart + 1 >= timeStep + shift:
                self.__subseqNum += seqEnd - seqStart + 1 - (timeStep + shift) + 1
                count += seqEnd - seqStart + 1
        self.__batchNum = self.__subseqNum // batchSize    
        
        self.__inputDim = len(self.__rawDatadict.keys())
        data = np.zeros([count, self.__inputDim])
        
        # [fileName, start, end]
        count = 0
        self.__lastSubseqStart = []
        for [seqStart, seqEnd] in self.__validSeqList: 
            if seqEnd - seqStart + 1 >= timeStep + shift:
                for i in self.__readSchdule(seqStart, seqEnd):
                    tmpKline = np.load(self.__klineHistoryDir + i[0])
                    tmpKline = tmpKline[i[1] : i[2], :]
                    for candleStick in tmpKline:
                        tmpData = np.array([
                            float(candleStick[self.__rawDatadict[key]])
                            for key in self.__rawDatadict.keys()
                        ])
                        
                        data[count, :] = tmpData
                        count += 1
                
                self.__lastSubseqStart.append(count - shift - (timeStep - 1))
        
        self.__average = np.average(data, axis=0)
        data = data - self.__average
        self.__max = np.abs(data).max(axis=0)
        data /= self.__max
        self.__data = data
        
        # adjust losses basing on 2nd diff
        # self.__lossAdjust = self.lossAdjust(self.diff2nd())
        #self.__lossAdjust = np.reshape(self.__lossAdjust, (len(self.__lossAdjust), 1))
    
    def diff2nd(self):
        """
        features are map to 1d
        pad each sequence with 0 at both head and tail
        """
        seqStart = [0] 
        for i in self.__lastSubseqStart:
            seqStart.append(self.__getNextSubseqPtr(i))
        seqStart = seqStart[:-1]
        seqEnd = [i + self.__timeStep + self.__shift - 1 for i in self.__lastSubseqStart]
        seqList = zip(seqStart, seqEnd)
        
        diff2ndSeq = np.zeros(len(self.__data))
        tmpData = np.average(self.__data[:, [0, 1, 2, 3]], axis=1)
        
        for seqStart, seqEnd in seqList:
            diff2ndSeq[seqStart : seqEnd] = np.pad(
                np.diff(tmpData[seqStart : seqEnd], n=2),
                (1, 1), 
                'constant', 
                constant_values=(0., 0.)
            )
            
        return diff2ndSeq
    
    def lossAdjust(self, diff2ndSeq, 
                   binsOption=[0, 0.02, 0.0004],
                   limit=8):
        hist, bins = np.histogram(
            np.abs(diff2ndSeq), 
            bins=np.arange(binsOption[0], binsOption[1] + binsOption[2], binsOption[2])
        )
        tmp = hist / (np.sum(hist) - len(self.__validSeqList) * 2)
        binNum = (binsOption[1] - binsOption[0]) / binsOption[2] + 1
        tmp = 1. / binNum / (tmp + 1. / binNum / limit)
        bins = np.pad(bins[1:] - 0.0002, (1, 1), 'constant', constant_values=(0, 0.02)),
        hist = np.pad(tmp, (1, 0), 'edge')
        hist = np.pad(hist, (0, 1), 'constant', constant_values=(0, limit))
        fit = np.poly1d(np.polyfit(bins[0], hist, 8))
        result = fit(np.abs(diff2ndSeq))
        for i in range(len(diff2ndSeq)):
            if np.abs(diff2ndSeq[i]) > binsOption[1]:
                result[i] = limit
        return result
    
    def __getNextSubseqPtr(self, subseqPtr):
        if subseqPtr in self.__lastSubseqStart:
            subseqPtr += self.__timeStep + self.__shift
        else:
            subseqPtr += 1
            
        if subseqPtr == self.__subseqNum:
            subseqPtr = 0
        return subseqPtr
    
    def separateTestBatch(self, batchNum):
        self.__testBatchNum = int(batchNum)
        self.__trainBatchNum = self.__batchNum - self.__testBatchNum
        print('Total {0:d} batch(es) separated into {1:d} train batch(es) and {2:d} test batch(es)'.format(
            self.__batchNum, self.__trainBatchNum, self.__testBatchNum) 
              )
        
        self.__trainSubseqPtr = 0
        subseqPtr = 0
        count = 0
        while(count < self.__trainBatchNum * self.__batchSize):
            subseqPtr = self.__getNextSubseqPtr(subseqPtr)
            count += 1
        self.__trainSubseqPtrEnd = subseqPtr
        self.__testSubseqPtr = self.__testSubseqPtrStart = self.__getNextSubseqPtr(subseqPtr)
        
        self.shuffleTrainSeq()
    
    def shuffleTrainSeq(self):
        self.__trainSubseqPtr = 0
        self.shuffleIndex = []
        for i in range(self.__trainBatchNum * self.__batchSize):
            self.shuffleIndex.append(self.__trainSubseqPtr)
            self.__trainSubseqPtr = self.__getNextSubseqPtr(self.__trainSubseqPtr)
        self.shuffleIndex = np.array(self.shuffleIndex, dtype=int)
        np.random.shuffle(self.shuffleIndex)
        self.shuffleIndex = np.reshape(self.shuffleIndex, (self.__trainBatchNum, self.__batchSize))
        self.__trainBatchPtr = -1
    
    def getNextTrainBatch(self):
        x = np.zeros((self.__batchSize, self.__timeStep, self.__inputDim))
        y = np.zeros((self.__batchSize, self.__timeStep, self.__inputDim))
        lossAdjust = np.zeros((self.__batchSize, self.__timeStep, 1))
        
        self.__trainBatchPtr += 1
        if self.__trainBatchPtr == self.__batchNum:
            self.__trainBatchPtr = 0
        for i in range(self.__batchSize):
            x[i, :, :] = self.__data[self.shuffleIndex[self.__trainBatchPtr, i]                : 
                                     self.shuffleIndex[self.__trainBatchPtr, i] + self.__timeStep               , :] 
            y[i, :, :] = self.__data[self.shuffleIndex[self.__trainBatchPtr, i] + self.__shift : 
                                     self.shuffleIndex[self.__trainBatchPtr, i] + self.__timeStep + self.__shift, :] 
            # lossAdjust[i, :, :] = self.__lossAdjust[self.shuffleIndex[self.__trainBatchPtr, i] + 1 : 
            #                                         self.shuffleIndex[self.__trainBatchPtr, i] + self.__timeStep + 1, :]
#         return x, np.reshape(np.average(y[:, :, [0, 3]], axis=2), (self.__batchSize, self.__timeStep, 1))
        return x, y #, lossAdjust
    
    def resetTrainBatchPtr(self):
        self._trainBatchPtr = -1
        
    def getNextTestBatch(self):
        x = np.zeros((self.__batchSize, self.__timeStep, self.__inputDim))
        y = np.zeros((self.__batchSize, self.__timeStep, self.__inputDim))
        
        for i in range(self.__batchSize):
            x[i, :, :] = self.__data[self.__testSubseqPtr                : 
                                     self.__testSubseqPtr + self.__timeStep               , :] 
            y[i, :, :] = self.__data[self.__testSubseqPtr + self.__shift : 
                                     self.__testSubseqPtr + self.__timeStep + self.__shift, :]
            self.__testSubseqPtr = self.__getNextSubseqPtr(self.__testSubseqPtr)
        if self.__testSubseqPtr > self.__batchNum * self.__batchSize:
            self.__testSubseqPtr = self.__testSubseqPtrStart
#         return x, np.reshape(np.average(y[:, :, [0, 3]], axis=2), (self.__batchSize, self.__timeStep, 1))
        return x, y
    
    def resetTestBatchPtr(self):
        self.__testBatchPtr = self.__testSubseqPtrStart
        
    def getTestData(self):
        return np.average(self.__data[self.__trainBatchNum :, :, :, [0, 1, 2, 3]], axis=3)
            
    def __readSchdule(self, readStart, readEnd):
        loadPoint = sorted(self.__loadPoint.keys())
        for i in range(len(loadPoint)):
            if loadPoint[i] > readStart:
                break
        start = loadPoint[i - 1]
        startFile = self.__loadPoint[start]

        tmp = [] 
        for i in loadPoint:
            if i > readStart and i <= readEnd:
                tmp.append(i)

        readSchedule = []
        if len(tmp) > 0:
            readSchedule.append([startFile, readStart - start, tmp[0] - start])
            for i in range(len(tmp) - 1):
                readSchedule.append([self.__loadPoint[tmp[i]], 0, tmp[i + 1] - tmp[i]])
            readSchedule.append([self.__loadPoint[tmp[-1]], 0, readEnd - tmp[-1] + 1])
        else:
            readSchedule.append([startFile, readStart - start, readEnd - start + 1])
            
        return readSchedule
    
    def normalize(self, data):
        return (data - self.__average) / self.__max
    
    def invNormalize(self, data, select=[1, 2]):
        return data * self.__max[select] + self.__average[select]
    
    @property
    def data(self):
        return self.__data
    
    @property
    def totalBatchNum(self):
        return self.__batchNum
    
    @property
    def testSubseqPtr(self):
        return self.__testSubseqPtr
    
    @property
    def trainSubseqPtr(self):
        return self.__trainSubseqPtr
    
    @property
    def trainBatchNum(self):
        return self.__trainBatchNum
    
    @property
    def testBatchNum(self):
        return self.__testBatchNum
    
    @property
    def validSeqList(self):
        return self.__validSeqList
    
    @property
    def loadPoint(self):
        return self.__loadPoint
    
    @property
    def inputDim(self):
        return self.__inputDim
    
    @property
    def dataLength(self):
        return self.__dataLength
    
    @property
    def batchSize(self):
        return self.__batchSize
    
    @property
    def timeStep(self):
        return self.__timeStep
    
    @property
    def average(self):
        return self.__average
    
    @property
    def max(self):
        return self.__max
        
    def isNewSeq(self, lastCandleStick, thisCandleStick, interval):
        if int(thisCandleStick[0]) - int(lastCandleStick[0]) == 60000 * interval:
            return False
        else:
            return True
        
    

In [6]:
data = DataLoader('./1min/', interval=1)
data.readData(512, 32)
data.separateTestBatch(100)

Total 663 batch(es) separated into 563 train batch(es) and 100 test batch(es)


In [8]:
hist, bins = np.histogram(
    np.abs(data.finiteDiff), 
    bins=np.arange(0, 0.02 + 0.0004, 0.0004)
)
a = 5
tmp = hist / np.sum(hist)
tmp = 0.04 / (tmp + 0.04 / a)
bins1 = np.pad(bins[1:] - 0.0002, (1, 1), 'constant', constant_values=(0, 0.02)),
hist1 = np.pad(tmp, (1, 0), 'edge')
hist1 = np.pad(hist1, (0, 1), 'constant', constant_values=(0, a))
fit = np.poly1d(np.polyfit(bins1[0], hist1, 8))

line1 = go.Scatter(x=bins[1:] - 0.0002,
                   y=hist,
                   mode='lines+markers'
                  )
line2 = go.Scatter(x=bins[1:] - 0.0002,
                   y=tmp,
                   mode='markers',
                   yaxis='y2'
                  )
line3 = go.Scatter(x=bins1[0],
                   y=fit(bins1[0]),
                   mode='lines',
                   yaxis='y2'
                  )

layout = go.Layout(
    yaxis=dict(
        title='hist'
    ),
    yaxis2=dict(
        title='transform',
        overlaying='y',
        side='right'
    )
)
fig = go.Figure(data=[line1, line2, line3], layout=layout)
py.offline.iplot(fig)

In [112]:
len(np.arange(0, 0.02 + 0.0004, 0.0004))

51

# Model

In [None]:
# TensorFlow Graph visualizer code
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script src="//cdnjs.cloudflare.com/ajax/libs/polymer/0.3.3/platform.js"></script>
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [None]:
import tensorflow as tf
from tensorflow.contrib import rnn
import tensorflow.contrib.layers as layers
from tensorflow.python.ops import functional_ops as fn_ops
import time

def raw2output(data, raw):
    return np.reshape(np.average(raw[:, :, [0, 1, 2, 3]], axis=2), (data.batchSize, data.timeStep, 1))

def genLossWeight(a, b, batchSize, timeStep, flip=False, pos=6):
    lossWeight = np.zeros((batchSize, timeStep, 1))
    for i in range(batchSize):
        for j in range(timeStep // pos):
            lossWeight[i, j, :] = np.power(a, (1 - j / (timeStep - 1.)) ** b)
            
        for j in range(timeStep // pos, timeStep):
            lossWeight[i, j, :] = np.power(
                np.power(a, (1 - (timeStep // pos) / (timeStep - 1.)) ** (b - 1))
                , 1 - j / (timeStep - 1.))
    if flip:
        return np.flip(lossWeight, 1) / np.average(lossWeight)
    else:
        return lossWeight / np.average(lossWeight)
 
class StatelessLSTM:
    def __init__(self,
                 inputDim = 9,
                 outputDim = 1,
                 timeStep = 16, 
                 batchSize = 1, 
                 stateSize = 128,
                 rnnLayerNum = 2,
                 learningRate = 0.0001
                ):
        
        self.__map_fn = fn_ops.map_fn
        
        # parameters
        self.__inputDim = inputDim
        self.__outputDim = outputDim
        self.__timeStep = timeStep
        self.__batchSize = batchSize
        self.__stateSize = stateSize
        self.__rnnLayerNum = rnnLayerNum
        self.__learningRate = learningRate
        
        self.__graph = tf.Graph()
        self.__initGraph()
    
    def __initGraph(self):
        tf.reset_default_graph()
        linearLayerDim = 32
        with self.__graph.as_default():
            # weight variables and placeholders
            self.__x = tf.placeholder(tf.float32, [None, self.__timeStep, self.__inputDim ], name='inputs')
            self.__y = tf.placeholder(tf.float32, [None, self.__timeStep, self.__outputDim], name='outputs')
            self.__lossWeight = tf.placeholder(tf.float32, [None, self.__timeStep, self.__outputDim], name='lossWeight')
            self.__dropoutKeepProb = tf.placeholder(tf.float32, name='dropoutKeepProb')
            
            # self.__lossWeight = tf.placeholder(tf.float32, [None, self.__timeStep, self.__outputDim], name='outputs')
            # self.weightReLU = tf.get_variable('weightReLU', 
            #                               shape=[self.__stateSize, linearLayerDim], 
            #                               initializer = tf.random_normal_initializer)
            # self.biasReLU = tf.get_variable('biasReLU', 
            #                             shape=[linearLayerDim], 
            #                             initializer = tf.random_normal_initializer)
            # self.weight = tf.get_variable('weight', 
            #                               shape=[linearLayerDim, self.__outputDim], 
            #                               initializer = tf.random_normal_initializer)
            # self.bias = tf.get_variable('bias', 
            #                             shape=[self.__outputDim], 
            #                             initializer = tf.random_normal_initializer)
            
            # multilaer lstm
            cells = []
            for i in range(self.__rnnLayerNum):
                with tf.name_scope('LayerNormBasicLSTMCell' + str(i)):
                    cells.append(
                        rnn.LayerNormBasicLSTMCell(
                            self.__stateSize, 
                            dropout_keep_prob=self.__dropoutKeepProb)
                    )
            
            lstmStacked = tf.contrib.rnn.MultiRNNCell(
                cells, 
                state_is_tuple=True
            )

            output, state = tf.nn.dynamic_rnn(
                lstmStacked, self.__x, 
                dtype=tf.float32
            )
#             state = tf.reshape(output[:, -1, :], [tf.shape(output)[0], 1, self.__stateSize])
            
            # linear layer                
#             dense1 = lambda x: layers.linear(x, num_outputs=32, activation_fn=tf.nn.relu)
#             dense2 = lambda x: layers.linear(x, num_outputs=self.__outputDim, activation_fn=None)
            dense1 = lambda x: layers.linear(x, num_outputs=32, activation_fn=tf.nn.tanh)
            dense2 = lambda x: layers.linear(x, num_outputs=self.__outputDim, activation_fn=None)
    
    
            # applyer the linear layer to the rnn outputs
            # self.__predict = tf.nn.leaky_relu(
            #     tf.matmul(
            #         tf.reshape(output, [tf.shape(output)[0] * tf.shape(output)[1], self.__stateSize]),
            #         self.weightReLU
            #     ) + self.biasReLU
            # )
            # self.__predict = tf.reshape(
            #     tf.matmul(
            #         self.__predict,
            #         self.weight
            #     ) + self.bias,
            #     [tf.shape(output)[0], tf.shape(output)[1], self.__outputDim]
            # )
            self.__predict = tf.map_fn(
                dense2,
                tf.map_fn(dense1, output)
            )
#             self.__predict = self.__map_fn(dense1, output)
#             print(self.__predict.get_shape())
#             self.__predict = self.__map_fn(dense2, self.__predict)
#             print(self.__predict.get_shape())
#             self.__predict = self.__map_fn(dense2, state)
#             self.__predict = tf.reshape(self.__predict, [tf.shape(self.__predict)[0], 1])

            # loss and optimizer
            self.__loss = tf.reduce_mean(
                tf.multiply(
                    self.__lossWeight,
                    tf.squared_difference(self.__predict, self.__y)
                )
            )
            self.__trainOp = tf.train.AdamOptimizer(learning_rate=self.__learningRate).minimize(self.__loss)
        tf.summary.FileWriter("logs", self.__graph).close()
        
    def train(self, data, epochNum, saveFile, 
              resume=None, 
              lossWeight=None, 
              raw2outputFn=raw2output, 
              targetLossDist=None,
              checkProb=0.8,
              seqLength=None):
        if lossWeight is None:
            lossWeight = genLossWeight(1/16, 1, data.batchSize, data.timeStep)
        
        with tf.Session(graph=self.__graph) as session:
            if resume is None:
                session.run(tf.global_variables_initializer())
            else:
                self.loadModel(session, resume)
                
            
            
            epochErrorList = []
            epochCount = 0
            while(epochCount < epochNum):
                startTime = time.time()
                epochError = 0
                data.shuffleTrainSeq()
                for i in range(data.trainBatchNum):
                    x, y = data.getNextTrainBatch()
                    y = raw2outputFn(data, y)
                    tmp, _ = session.run([self.__loss, self.__trainOp], {
                            self.__x: x,
                            self.__y: y,
                            self.__lossWeight: lossWeight,
                            self.__dropoutKeepProb: 0.5
                        })
                    epochError += tmp
                epochCount += 1
                epochError /= data.trainBatchNum
                epochErrorList.append(epochError)

                printingText = 'Epoch {0:d}, train error: {1:f}, time: {2:4.6f}s'
                print(
                    printingText.format(
                        epochCount, 
                        epochError, 
                        time.time() - startTime,
                    )
                )
                
            self.saveModel(session, saveFile)
            return epochErrorList
        
    def test(self, data, fileName):
        with tf.Session(graph=self.__graph) as session:
            self.loadModel(session, fileName)

            lossWeight = np.zeros((data.batchSize, data.timeStep, self.__outputDim))
            for i in range(data.batchSize):
                for j in range(data.timeStep):
                    lossWeight[i, j, :] = np.power(8., -1. + j / (data.timeStep - 1.))
            
            predict = np.zeros((data.testBatchNum * data.batchSize, self.__outputDim))
            for i in range(data.testBatchNum):
                x, y = data.getNextTestBatch()
                tmp = session.run(self.__predict, {
                            self.__x: x,
                            self.__y: y,
                            self.__dropoutKeepProb: 1.0
                        })
                predict[i, :, :, :] = tmp
                tmp = session.run(self.__loss, {
                            self.__x: x,
                            self.__y: y,
                            self.__dropoutKeepProb: 0.5
                        })
                print(tmp)
        return predict
        
    def profile(self, data, fn):
        with tf.Session(graph=self.__graph) as session:
            session.run(tf.global_variables_initializer())
            options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            
            lossWeight = genLossWeight(1. / 4., 1., data.batchSize, data.timeStep)
            data.shuffleTrainSeq()
            x, y = data.getNextTrainBatch()
            y = raw2output(data, y)
            tmp, _ = session.run([self.__loss, self.__trainOp], {
                self.__x: x,
                self.__y: y,
                self.__lossWeight: lossWeight
            })
            
            x, y = data.getNextTrainBatch()
            y = raw2output(data, y)
            tmp, _ = session.run([self.__loss, self.__trainOp], {
                                      self.__x: x,
                                      self.__y: y,
                                      self.__lossWeight: lossWeight
                                  },
                                  options=options, 
                                  run_metadata=run_metadata                
                                )
            
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(fn, 'w') as f:
                f.write(chrome_trace)
        
    def saveModel(self, session, fileName):
        saver = tf.train.Saver()
        tmp = saver.save(session, fileName)
        print('Model saved at ' + tmp)
        
    def loadModel(self, session, fileName):
        saver = tf.train.Saver()
        saver.restore(session, fileName)
    
    def showGraph(self):
        show_graph(self.__graph)
    
    def setLearningRate(self, learningRate):
        self.__learningRate = learningRate
    
    @property
    def graph(self):
        return self.__graph
    
    @property
    def predict(self):
        return self.__predict
    
    @property
    def x(self):
        return self.__x
    
    @property
    def y(self):
        return self.__y
    
    @property
    def dropoutKeepProb(self):
        return self.__dropoutKeepProb
    
    @property
    def loss(self):
        return self.__loss
    
    @property
    def lossWeight(self):
        return self.__lossWeight
    


In [26]:
tmp = np.array([0.023400, 
0.008828, 
0.006540, 
0.005353, 
0.004467, 
0.003283, 
0.002396,
0.001213,
0.000030])

tmp / tmp[0]

array([1.        , 0.37726496, 0.27948718, 0.22876068, 0.19089744,
       0.14029915, 0.10239316, 0.05183761, 0.00128205])

In [30]:
model.showGraph()

# Verification

In [12]:
timeStep = 32
batchSize = 512
interval = '5min'

data = DataLoader('./klineHistory/' + interval + '/', interval=5)
data.readData(batchSize, timeStep)
data.separateTestBatch(0.08)

model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 2
)

    
model.setLearningRate(0.08)  
lossWeight = genLossWeight(1. / 1., 1, data.batchSize, data.timeStep)
epochError = model.train(data, 100, 
                          './klineHistory/' + interval + '/test.ckpt',
                          lossWeight=lossWeight
                         )

model.setLearningRate(0.02)   
epochError = model.train(data, 100, 
                          './klineHistory/' + interval + '/test200.ckpt', 
                          resume='./klineHistory/' + interval + '/test.ckpt',
                          lossWeight=lossWeight
                         )

model.setLearningRate(0.008)
lossWeight = genLossWeight(1. / 2., 1, data.batchSize, data.timeStep)
epochError = model.train(data, 100, 
                          './klineHistory/' + interval + '/test300.ckpt', 
                          resume='./klineHistory/' + interval + '/test200.ckpt',
                          lossWeight=lossWeight,
                          reverseLossWeight=0.3
                         )

model.setLearningRate(0.002)
epochError = model.train(data, 100, 
                          './klineHistory/' + interval + '/test400.ckpt', 
                          resume='./klineHistory/' + interval + '/test300.ckpt',
                          lossWeight=lossWeight,
                          reverseLossWeight=0.2
                         )

model.setLearningRate(0.0008)
lossWeight = genLossWeight(1. / 4., 1, data.batchSize, data.timeStep)
epochError = model.train(data, 100, 
                          './klineHistory/' + interval + '/test500.ckpt', 
                          resume='./klineHistory/' + interval + '/test400.ckpt',
                          lossWeight=lossWeight,
                          reverseLossWeight=0.1
                         )

model.setLearningRate(0.0002)
lossWeight = genLossWeight(1. / 8., 1, data.batchSize, data.timeStep)
epochError = model.train(data, 100, 
                          './klineHistory/' + interval + '/test600.ckpt', 
                          resume='./klineHistory/' + interval + '/test500.ckpt',
                          lossWeight=lossWeight
                         )

Total 32 batch(es) separated into 32 train batch(es) and 0 test batch(es)


KeyboardInterrupt: 

In [54]:
timeStep = 16
batchSize = 512
interval = '1min'

data = DataLoader(interval + '/', interval=1)
data.readData(batchSize, timeStep, shift=2)
data.separateTestBatch(128)

model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 3
)

model.setLearningRate(0.01)
epochError = model.train(data, 64, '1min-model3.ckpt')

Total 672 batch(es) separated into 544 train batch(es) and 128 test batch(es)
Epoch 1, train error: 0.027673, time: 231.252800s
Epoch 2, train error: 0.007658, time: 229.111557s
Epoch 3, train error: 0.004444, time: 224.198827s
Epoch 4, train error: 0.003099, time: 224.434464s
Epoch 5, train error: 0.002343, time: 223.995814s
Epoch 6, train error: 0.001859, time: 224.159560s
Epoch 7, train error: 0.001527, time: 224.175323s
Epoch 8, train error: 0.001281, time: 224.260738s
Epoch 9, train error: 0.001101, time: 224.457181s
Epoch 10, train error: 0.000962, time: 224.014934s
Epoch 11, train error: 0.000856, time: 224.000647s
Epoch 12, train error: 0.000770, time: 224.308122s
Epoch 13, train error: 0.000697, time: 224.067852s
Epoch 14, train error: 0.000635, time: 224.047353s
Epoch 15, train error: 0.000577, time: 224.338018s
Epoch 16, train error: 0.000525, time: 223.875896s
Epoch 17, train error: 0.000474, time: 224.650597s
Epoch 18, train error: 0.000429, time: 224.373821s
Epoch 19, tra

In [55]:
timeStep = 16
batchSize = 512
interval = '1min'

data = DataLoader(interval + '/', interval=1)
data.readData(batchSize, timeStep, shift=2)
data.separateTestBatch(128)

model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 2
)

model.setLearningRate(0.01)
epochError = model.train(data, 64, '1min-model4.ckpt')

Total 672 batch(es) separated into 544 train batch(es) and 128 test batch(es)
Epoch 1, train error: 0.030595, time: 202.211849s
Epoch 2, train error: 0.006727, time: 201.664327s
Epoch 3, train error: 0.004016, time: 201.552992s
Epoch 4, train error: 0.002860, time: 200.903225s
Epoch 5, train error: 0.002209, time: 201.668910s
Epoch 6, train error: 0.001778, time: 201.318371s
Epoch 7, train error: 0.001470, time: 201.022713s
Epoch 8, train error: 0.001246, time: 201.012858s
Epoch 9, train error: 0.001071, time: 200.783087s
Epoch 10, train error: 0.000934, time: 201.125429s
Epoch 11, train error: 0.000822, time: 201.240660s
Epoch 12, train error: 0.000734, time: 201.153074s
Epoch 13, train error: 0.000655, time: 201.437453s
Epoch 14, train error: 0.000589, time: 201.126477s
Epoch 15, train error: 0.000530, time: 200.951184s
Epoch 16, train error: 0.000478, time: 200.765418s
Epoch 17, train error: 0.000429, time: 201.477406s
Epoch 18, train error: 0.000388, time: 201.358827s
Epoch 19, tra

In [135]:
timeStep = 16
batchSize = 512
interval = '1min'

data = DataLoader(interval + '/', interval=1)
data.readData(batchSize, timeStep, shift=1)
data.separateTestBatch(128)

model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 2
)

model.setLearningRate(0.01)
epochError = model.train(data, 32, '1min-model5.ckpt', lossWeight=lossWeight)

Total 672 batch(es) separated into 544 train batch(es) and 128 test batch(es)
Epoch 1, train error: 0.022469, time: 210.292240s
Epoch 2, train error: 0.005423, time: 209.195273s
Epoch 3, train error: 0.003260, time: 228.348344s
Epoch 4, train error: 0.002332, time: 231.152606s
Epoch 5, train error: 0.001799, time: 230.656479s
Epoch 6, train error: 0.001456, time: 255.318973s
Epoch 7, train error: 0.001217, time: 247.368022s
Epoch 8, train error: 0.001039, time: 247.530292s
Epoch 9, train error: 0.000906, time: 234.360381s
Epoch 10, train error: 0.000801, time: 227.017660s
Epoch 11, train error: 0.000719, time: 250.976976s
Epoch 12, train error: 0.000645, time: 226.855087s
Epoch 13, train error: 0.000582, time: 222.778391s
Epoch 14, train error: 0.000522, time: 258.379664s
Epoch 15, train error: 0.000469, time: 208.869830s
Epoch 16, train error: 0.000420, time: 208.537973s
Epoch 17, train error: 0.000380, time: 208.420954s
Epoch 18, train error: 0.000346, time: 208.111659s
Epoch 19, tra

In [142]:
timeStep = 16
batchSize = 512
interval = '1min'

# data = DataLoader(interval + '/', interval=1)
# data.readData(batchSize, timeStep, shift=2)
# data.separateTestBatch(128)
lossWeight = genLossWeight(1/16, 1, data.batchSize, data.timeStep)

model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 2
)

with tf.Session(graph=model.graph) as session:
    model.loadModel(session, '1min-model5.ckpt')

    x, y = data.getNextTestBatch()
    y1 = raw2output(data, y)
    predict = session.run(model.predict, {
                            model.x: x,
                            model.dropoutKeepProb: 1.0
                        })

#     x, y = data.getNextTestBatch()
#     x, y = data.getNextTestBatch()
#     y1 = raw2output(data, y)
    print(
        session.run(model.loss, {
                            model.x: x,
                            model.y: y1,
                            model.lossWeight: lossWeight,
                            model.dropoutKeepProb: 1.0
                        }))
    
# line1 = go.Scatter(
#     x = np.array(range(len(y))),
#     y = np.reshape(y[:, -1, :], (len(y))),
#     mode = 'lines+markers',
#     name='open&close avg'
# )

line1 = go.Candlestick(
    x = np.array(range(len(y))),
    open=y[:, -1, 0],
    high=y[:, -1, 1],
    low=y[:, -1, 2],
    close=y[:, -1, 3],
    name='EOS/BTC'
)

line2 = go.Scatter(
    x = np.array(range(len(predict))),
    y = np.reshape(predict[:, -1, :], (len(predict))),
    mode = 'lines+markers',
    name='predict'
)

fig = go.Figure(data=[line1, line2])
py.offline.iplot(fig)

INFO:tensorflow:Restoring parameters from 1min-model5.ckpt
5.469027e-05


In [17]:
def solution(A, K):
    n = len(A)
    best = 0
    count = 1
    for i in range(n - K - 1):
        if (A[i] == A[i + 1]):
            count = count + 1
            print(i, count)
        else:
            count = 0
        best = max(best, count)
    result = best + 1 + K

    return result

In [18]:
solution([1, 1, 3, 3, 3, 4, 5, 5, 5, 5], 2)

0 2
2 1
3 2
6 1


5

In [141]:
i = np.random.randint(data.batchSize)

line1 = go.Scatter(
    x = np.array(range(len(y1[i]))),
    y = np.reshape(y1[i, :, :], (len(y1[i]))),
    mode = 'lines+markers',
    name='ground truth'
)

line2 = go.Scatter(
    x = np.array(range(len(predict[i]))),
    y = np.reshape(predict[i, :, :], (len(predict[i]))),
    mode = 'lines+markers',
    name='predict'
)

fig = go.Figure(data=[line1, line2])
py.offline.iplot(fig)

In [None]:
timeStep = 32
batchSize = 512
interval = '1min'

data = DataLoader('./klineHistory/' + interval + '/', interval=1)
data.readData(batchSize, timeStep, shift=2)
data.separateTestBatch(16)

model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 2
)

model.setLearningRate(0.01)
epochError = model.train(data, './klineHistory/' + interval + '/test2',
                         resume='./klineHistory/1min/test2-0.02.ckpt',
                         recipe=[0.01, 0.008])

Total 79 batch(es) separated into 63 train batch(es) and 16 test batch(es)
INFO:tensorflow:Restoring parameters from ./klineHistory/1min/test2-0.02.ckpt
Epoch 1, train error: 0.000049, time: 16.750798s, progress(0/2, 9/10), target error: 0.000200
Epoch 2, train error: 0.000050, time: 15.999686s, progress(0/2, 9/10), target error: 0.000200
Epoch 3, train error: 0.016169, time: 16.047131s, progress(0/2, 0/10), target error: 0.010000
Epoch 4, train error: 0.015278, time: 15.872758s, progress(0/2, 0/10), target error: 0.010000
Epoch 5, train error: 0.000240, time: 15.949814s, progress(0/2, 9/10), target error: 0.000200
Epoch 6, train error: 0.000051, time: 15.985389s, progress(0/2, 9/10), target error: 0.000200
Epoch 7, train error: 0.005200, time: 15.912400s, progress(0/2, 1/10), target error: 0.003800
Epoch 8, train error: 0.000103, time: 16.006103s, progress(0/2, 9/10), target error: 0.000200
Epoch 9, train error: 0.000049, time: 15.986370s, progress(0/2, 9/10), target error: 0.000200
E

Epoch 86, train error: 0.000034, time: 15.917134s, progress(0/2, 9/10), target error: 0.000200
Epoch 87, train error: 0.000035, time: 16.050484s, progress(0/2, 9/10), target error: 0.000200
Epoch 88, train error: 0.000036, time: 15.954243s, progress(0/2, 6/10), target error: 0.001020
Epoch 89, train error: 0.000034, time: 15.966275s, progress(0/2, 9/10), target error: 0.000200
Epoch 90, train error: 0.000034, time: 16.026685s, progress(0/2, 9/10), target error: 0.000200
Epoch 91, train error: 0.000035, time: 15.981542s, progress(0/2, 7/10), target error: 0.000620
Epoch 92, train error: 0.000034, time: 15.926491s, progress(0/2, 9/10), target error: 0.000200
Epoch 93, train error: 0.000036, time: 15.970994s, progress(0/2, 9/10), target error: 0.000200
Epoch 94, train error: 0.000034, time: 15.988883s, progress(0/2, 8/10), target error: 0.000400
Epoch 95, train error: 0.000035, time: 16.003287s, progress(0/2, 8/10), target error: 0.000400
Epoch 96, train error: 0.000033, time: 15.961298s,

Epoch 172, train error: 0.003492, time: 15.924954s, progress(0/2, 1/10), target error: 0.003800
Epoch 173, train error: 0.000030, time: 15.948190s, progress(0/2, 9/10), target error: 0.000200
Epoch 174, train error: 0.000028, time: 15.909201s, progress(0/2, 9/10), target error: 0.000200
Epoch 175, train error: 0.000169, time: 15.976883s, progress(0/2, 2/10), target error: 0.002800
Epoch 176, train error: 0.000030, time: 15.913798s, progress(0/2, 9/10), target error: 0.000200
Epoch 177, train error: 0.000027, time: 15.911076s, progress(0/2, 9/10), target error: 0.000200
Epoch 178, train error: 0.000060, time: 15.932206s, progress(0/2, 3/10), target error: 0.002380
Epoch 179, train error: 0.000028, time: 15.976615s, progress(0/2, 9/10), target error: 0.000200
Epoch 180, train error: 0.000027, time: 15.892952s, progress(0/2, 9/10), target error: 0.000200
Epoch 181, train error: 0.000037, time: 15.867356s, progress(0/2, 4/10), target error: 0.001900
Epoch 182, train error: 0.000028, time: 

Epoch 258, train error: 0.000023, time: 15.872571s, progress(0/2, 9/10), target error: 0.000200
Epoch 259, train error: 0.000024, time: 15.889090s, progress(0/2, 8/10), target error: 0.000400
Epoch 260, train error: 0.000023, time: 15.994422s, progress(0/2, 8/10), target error: 0.000400
Epoch 261, train error: 0.000022, time: 15.990640s, progress(0/2, 9/10), target error: 0.000200
Epoch 262, train error: 0.000023, time: 15.886797s, progress(0/2, 9/10), target error: 0.000200
Epoch 263, train error: 0.000024, time: 15.984021s, progress(0/2, 9/10), target error: 0.000200
Epoch 264, train error: 0.000024, time: 15.928702s, progress(0/2, 9/10), target error: 0.000200
Epoch 265, train error: 0.000024, time: 15.987294s, progress(0/2, 9/10), target error: 0.000200
Epoch 266, train error: 0.000023, time: 15.949796s, progress(0/2, 9/10), target error: 0.000200
Epoch 267, train error: 0.010508, time: 15.949527s, progress(0/2, 0/10), target error: 0.010000
Epoch 268, train error: 0.009934, time: 

Epoch 343, train error: 0.000040, time: 15.954182s, progress(1/2, 3/10), target error: 0.001904
Epoch 344, train error: 0.000024, time: 15.849496s, progress(1/2, 9/10), target error: 0.000160
Epoch 345, train error: 0.000022, time: 15.948593s, progress(1/2, 9/10), target error: 0.000160
Epoch 346, train error: 0.000026, time: 15.837485s, progress(1/2, 4/10), target error: 0.001520
Epoch 347, train error: 0.000022, time: 15.901548s, progress(1/2, 9/10), target error: 0.000160
Epoch 348, train error: 0.000022, time: 15.874031s, progress(1/2, 9/10), target error: 0.000160
Epoch 349, train error: 0.000024, time: 15.867966s, progress(1/2, 5/10), target error: 0.001120
Epoch 350, train error: 0.000022, time: 15.897871s, progress(1/2, 9/10), target error: 0.000160
Epoch 351, train error: 0.000021, time: 16.017116s, progress(1/2, 9/10), target error: 0.000160
Epoch 352, train error: 0.000022, time: 15.936503s, progress(1/2, 6/10), target error: 0.000816
Epoch 353, train error: 0.000021, time: 

In [None]:
timeStep = 32
batchSize = 512
interval = '5min'

data = DataLoader('./klineHistory/' + interval + '/', interval=5)
data.readData(batchSize, timeStep)
data.separateTestBatch(2)

model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 2
)

model.setLearningRate(0.001)
cutoff = 4
lossWeight = genLossWeight(1. / 1., 1, data.batchSize, data.timeStep)
lossWeight[:, cutoff:, :] = 0
lossWeight = lossWeight / np.average(lossWeight)
epochError = model.train(data, 20, 
                         './klineHistory/' + interval + '/test2.ckpt',
                         resume='./klineHistory/' + interval + '/test1.ckpt',
                         lossWeight=lossWeight,
                         seqLength=cutoff
                        )

Total 31 batch(es) separated into 29 train batch(es) and 2 test batch(es)
INFO:tensorflow:Restoring parameters from ./klineHistory/5min/test1.ckpt
0.0
Epoch 0, train error: 0.001970, time: 8.701102s


In [11]:
[i for i in range(8, 33, 8)]

[8, 16, 24, 32]

In [45]:
line1 = go.Scatter(
    x = np.array(range(len(epochError))),
    y = epochError,
    mode = 'lines'
)

fig = go.Figure(data=[line1])
py.offline.iplot(fig)

In [55]:
data = DataLoader('./klineHistory/5min/', interval=5)
data.readData(batchSize, timeStep)
data.separateTestBatch(0.1)

model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 2,
    learningRate = 0.0001
)

Total 27 batch(es) separated into 24 train batch(es) and 3 test batch(es)
(?, 64, 32)
(?, 64, 1)


In [61]:
# timeStep = 32
# batchSize = 512
# interval = '1min'

# data = DataLoader('./klineHistory/' + interval + '/', interval=1)
# data.readData(batchSize, timeStep)
# data.separateTestBatch(4)

# model = StatelessLSTM(
#     inputDim = data.inputDim,
#     timeStep = data.timeStep,
#     batchSize = data.batchSize,
#     rnnLayerNum = 2
# )

lossWeight = genLossWeight(1., 1, data.batchSize, data.timeStep)
lossWeight[:, :-1, :] = 0
lossWeight = lossWeight / np.average(lossWeight)

with tf.Session(graph=model.graph) as session:
    model.loadModel(session, './klineHistory/1min/test2-0.02.ckpt')

#     x, y = data.getNextTrainBatch()
    x, y = data.getNextTestBatch()
    y1 = raw2output(data, y)
    print(
        session.run(model.loss, {
                            model.x: x,
                            model.y: y1,
                            model.lossWeight: lossWeight
                        }))

#     x, y = data.getNextTestBatch()
    predict = session.run(model.predict, {
                            model.x: x,
                            model.y: y1
                        })
    
# line1 = go.Scatter(
#     x = np.array(range(len(y))),
#     y = np.reshape(y[:, -1, :], (len(y))),
#     mode = 'lines+markers',
#     name='open&close avg'
# )

line1 = go.Candlestick(
    x = np.array(range(len(y))),
    open=y[:, -1, 0],
    high=y[:, -1, 1],
    low=y[:, -1, 2],
    close=y[:, -1, 3],
    name='ADA/BTC'
)

line2 = go.Scatter(
    x = np.array(range(len(predict))),
    y = np.reshape(predict[:, -1, :], (len(predict))),
    mode = 'lines+markers',
    name='predict'
)

fig = go.Figure(data=[line1, line2])
py.offline.iplot(fig)

INFO:tensorflow:Restoring parameters from ./klineHistory/1min/test2-0.02.ckpt
0.00023135677


In [63]:
i = np.random.randint(data.batchSize)

line1 = go.Scatter(
    x = np.array(range(len(y1[i]))),
    y = np.reshape(y1[i, :, :], (len(y1[i]))),
    mode = 'lines+markers',
    name='ground truth'
)

line2 = go.Scatter(
    x = np.array(range(len(predict[i]))),
    y = np.reshape(predict[i, :, :], (len(predict[i]))),
    mode = 'lines+markers',
    name='predict'
)

fig = go.Figure(data=[line1, line2])
py.offline.iplot(fig)

In [26]:
# timeStep = 32
# batchSize = 512

# data = DataLoader('./klineHistory/1min/', interval=1)
# data.readData(batchSize, timeStep)
# data.separateTestBatch(0.1)

_, tmp = data.getNextTestBatch()

In [44]:
_, tmp = data.getNextTestBatch()

line1 = go.Candlestick(
    x = np.array(range(len(y))),
    open=tmp[:, -1, 0],
    high=tmp[:, -1, 1],
    low=tmp[:, -1, 2],
    close=tmp[:, -1, 3],
    name='ADA/BTC'
)

line2 = go.Scatter(
    x = np.array(range(len(predict))),
    y = np.reshape(predict[:, -1, :], (len(predict))),
    mode = 'lines+markers',
    name='predict',
    opacity = 0.7
)

fig = go.Figure(data=[line1, line2])
py.offline.iplot(fig)

In [47]:
model = StatelessLSTM(
    inputDim = data.inputDim,
    timeStep = data.timeStep,
    batchSize = data.batchSize,
    rnnLayerNum = 2,
    learningRate = 0.0001
)

(?, 64, 32)
(?, 64, 1)


In [8]:
batchSize, timeStep = 16, 16

data = DataLoader('./klineHistory/1min/')
data.readData(batchSize, timeStep)
data.separateTestBatch(0.1)
# tmp = np.reshape(data.getTestData(), (data.testBatchNum * data.batchSize * data.timeStep, 2))
true = np.reshape(data.getNextTrainBatch()[1], (data.batchSize * data.timeStep, 1))
tmpTrue = true[0 : 512]
# line1 = go.Candlestick(
#     x = np.array(range(len(tmpTrue))),
#     open=tmpTrue[:, 0],
#     high=tmpTrue[:, 0],
#     low=tmpTrue[:, 1],
#     close=tmpTrue[:, 1]
# )

# tmpPredict = predict[0 : 512, :]
# line2 = go.Candlestick(
#     x = np.array(range(len(tmpPredict))),
#     open=tmpPredict[:, 0],
#     high=tmpPredict[:, 0],
#     low=tmpPredict[:, 1],
#     close=tmpPredict[:, 1],
#     increasing=dict(line=dict(color= '#17BECF')),
#     decreasing=dict(line=dict(color= '#7F7F7F')),
#     opacity=0.4,
# )

# fig = go.Figure(data=[line1, line2])
# fig = go.Figure(data=[line1])

line1 = go.Scatter(
    x = np.array(range(len(tmpTrue))),
    y = np.reshape(tmpTrue, (len(tmpTrue))),
    mode = 'lines'
)

# predict = predict[0 : 512]
# line2 = go.Scatter(
#     x = np.array(range(len(predict))),
#     y = np.reshape(predict, (len(predict))),
#     mode = 'lines'
# )

fig = go.Figure(data=[line1])
py.offline.iplot(fig)

Total 55 batch(es) separated into 49 train batch(es) and 6 test batch(es)
