In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
import pandas as pd
import numpy as np
import scipy.stats as sp_stats
import requests
from datetime import timedelta
import matplotlib.pyplot as plt
import time
import math

In [4]:
POLONIEX_OHLCV_BASEURL = 'https://poloniex.com/public?command=returnChartData&currencyPair='

"""
https://poloniex.com/public?command=returnChartData&currencyPair=BTC_POT&start=1435699200&end=9999999999&period=14400

"""

def get_ohlcv_poloniex(pair='USDT_BTC', start=1435699200, end=9999999999, period=900):
    """
    returns ohlcv data for poloniex as pandas dataframe
    convert to unix timestamp using https://coderstoolbox.net/unixtimestamp/
    :param pair: str pair on poloniex
    :param start: int unix timestamp of beginning time
    :param end: int unix timestamp of ending time
    :param period: int candle width in seconds
    :return: pandas df of ohlcv data from poloniex for specified pair, times, and period
    """
    query = POLONIEX_OHLCV_BASEURL + pair + '&start=' + str(start) + '&end=' + str(end) + '&period=' + str(period)
    resp = requests.get(query,verify=False)

    if resp.status_code != 200:
        raise requests.ApiError('GET /tasks/ {}'.format(resp.status_code))

    return pd.DataFrame(resp.json())

In [5]:
class QuantileEncoder:
    """
    Performs quantile encoding for a real valued source.  The assigned value
    for each bins is chosen as to minimize the mean square error introduced
    by the quantization (conditional expectation within the bin).
    Quantizer bins and mappings are computed from the empirical distribution
    of the training data.
    """
    def __init__(self, alphabet_size=2):
        assert alphabet_size >= 2

        self._quantiles = [n / float(alphabet_size)
                           for n in range(alphabet_size + 1)]
        self._bins = None
        self._bin_mappings = None
        self._suffix = '_right'

    def fit(self, data: pd.Series):
        """
        Fit the quantizer parameters from training data.
        :param data:
        :return:
        """
        self.fit_transform(data)

    def transform(self, data: pd.Series):
        """
        Previously fit encoder performs the transformation on new data.
        :param data:
        :return:
        """
        assert self._bins is not None
        assert self._bin_mappings is not None

        ser = pd.cut(data, self._bins, labels=False)
        ser.name = 'labels'

        df = pd.concat([ser, data], axis=1)
        df_joined = df.join(self._bin_mappings, 'labels', rsuffix=self._suffix)

        return df_joined[data.name + self._suffix]

    def fit_transform(self, data: pd.Series):
        """
        Train with then return the transformation of some data.
        :param data:
        :return:
        """
        ser, bins = pd.qcut(data, self._quantiles, retbins=True, labels=False)
        ser.name = 'labels'

        df = pd.concat([ser, data], axis=1)
        bin_mappings = df.groupby('labels').mean()
        df_joined = df.join(bin_mappings, 'labels', rsuffix=self._suffix)

        self._bins = bins
        self._bin_mappings = bin_mappings

        return df_joined[data.name + self._suffix]

In [6]:
def plugIn(msg, w):
    # Compute plug-in (ML) entropy rate
    pmf = pmf1(msg, w)
    out = - sum([pmf[i] * np.log2(pmf[i]) for i in pmf]) / w
    return out, pmf

def pmf1(msg, w):
    # Compute the prob mass function for a 1D discrete RV
    # len(msg)-w occurances
    lib = {}
    if not isinstance(msg, str): msg = ''.join(map(str, msg))
    for i in range(w, len(msg)):
        msg_ = msg[i-w: i]
        if msg_ not in lib: 
            lib[msg_] = [i-w]
        else: 
            lib[msg_] = lib[msg_] + [i-w]
    pmf = float(len(msg) - w)
    pmf = {i: len(lib[i])/pmf for i in lib}
    return pmf

In [7]:
class EntropyEstimatorLz:
    """
    Kontoyiannis' LZ entropy estimate, 2013 version (centered window). Inverse
    of the avg length of the shortest non-redundant substring. If non-redundant
    substrings are short, the text is highly entropic. window==None for
    expanding window, in which case
    len(msg) % 2 == 0
    If the end of msg is more relevant, try estimate_entropy(msg[::-1])
    """
    @classmethod
    def estimate_entropy(cls, *args, **kwargs):
        return cls.konto(*args, **kwargs)['h']

    @classmethod
    def konto(cls, msg, window=None):
        """
        :param msg:
        :param window:
        :return:
        """
        out = {'num': 0, 'sum': 0, 'sub_str': []}
        if not isinstance(msg, str):
            msg = ''.join(map(str, msg))

        if window is None:
            points = range(1, len(msg) // 2 + 1)

        else:
            window = min(window, len(msg) // 2)
            points = range(window, len(msg) - window + 1)

        for i in points:
            if window is None:
                l, msg_ = cls.match_length(msg, i, i)
                out['sum'] += math.log2(i + 1) / l

            else:
                l, msg_ = cls.match_length(msg, i, window)
                out['sum'] += math.log2(window + 1) / l

            out['sub_str'].append(msg_)
            out['num'] += 1

        out['h'] = (out['sum'] / out['num']) / math.log(2)
        out['r'] = 1 - out['h'] / math.log2(len(msg))  # redundancy, 0 <= r <= 1

        return out

    @staticmethod
    def match_length(msg, i, n):
        """
        Maximum matched length + 1, with overlap.
        i >= n & len(msg) >= i + n
        :param msg:
        :param i:
        :param n:
        :return:
        """
        sub_str = ''
        for l in range(n):
            msg1 = msg[i:i + l + 1]

            for j in range(i - n, i):
                msg0 = msg[j:j + l + 1]

                if msg1 == msg0:
                    sub_str = msg1
                    break  # search for higher l.

        return len(sub_str) + 1, sub_str  # matched length + 1


if __name__ == '__main__':
    # Messages produces entropies of 0.97 and 0.84 as highlighted in
    # "Advances in Financial Machine Learning" section 18.4
    for m in ('11100001', '01100001'):
        h = EntropyEstimatorLz.estimate_entropy(m) * math.log(2)
print('message: %s, entropy: %.2f' % (m, h))

message: 01100001, entropy: 0.84


In [8]:
df = get_ohlcv_poloniex(pair='USDT_BTC', start=0, end=9999999999, period=900)
df['ret'] = df['quoteVolume']



In [34]:
df2 = df

In [36]:
df2.drop(df.index, inplace=True)

In [41]:
#df = get_ohlcv_poloniex(pair='USDT_BTC', start=0, end=9999999999, period=300)
df['sumVolume'] = 0
sumVolume = 0
volumeBars = df2
rw = 0
for i, row in df.iterrows():
    rw = rw+1
    sumVolume += df.loc[i,'quoteVolume']
    df.loc[i,'sumVolume'] = sumVolume
    if sumVolume > 100:
        #volumeBars.loc[df.index[i]] = df.iloc[i]
        volumeBars.append(df.loc[i])
        sumVolume = sumVolume - 100
        #i = i+1
        
    #df.loc[i,'sumVolume'] += df.loc[i,'quoteVolume'] + df.loc[i-1,'sumVolume']
    #if df.loc[i,'volume'] % 100 == 0:
       # volumeBars.append(df.iloc[i])

In [42]:
print(volumeBars)

Empty DataFrame
Columns: [close, date, high, low, open, quoteVolume, volume, weightedAverage, ret, sumVolume]
Index: []


In [None]:
df = df[df['ret'] != 0]
df = df[df['ret'] != np.inf]
df = df[df['ret'].notnull()]
df['binary'] = df.apply(lambda row: int(row['ret']*(row['ret']-1) > 0), axis = 1)

In [None]:
partitions = segment(df['binary'])
num_segments = len(df['binary'])//1000
print(num_segments)

In [19]:
print("Plug in entropies")
# for part in partitions:
#     print(part)
#     print(str(plugIn(part, 10)[0]))
results = pd.DataFrame()
results['plug-in'] = [plugIn(part, 10)[0] for part in partitions]

Plug in entropies


NameError: name 'partitions' is not defined

In [None]:
print("Kontoyiannis’ method entropies using window size of 100")
partitions = segment(df['binary'])
# for part in partitions:
#     print(part)
#     print("binary: " + str(EntropyEstimatorLz.estimate_entropy(part, window=100)))
results['kontoyiannis'] = [EntropyEstimatorLz.estimate_entropy(part, window=100) for part in partitions]

In [None]:
print(results['kontoyiannis'].corr(results['plug-in']))