In [1]:
import utils
import scipy.stats as ss
import numpy as np
import pandas as pd

In [2]:
def calc_probs(model, time=None, tickers='all', stats_data=None, \
               auto_update_tolerances=False, *args, **kwargs):

    import matplotlib.pyplot as plt
    if tickers == 'all':
        tickers = model.tickers
    else:
        tickers = utils.check_ticker_input(tickers_input=tickers, \
                                           tickers_avail=model.tickers)
    try:
        timezone = kwargs['timezone']
    except KeyError:
        timezone = None
    try:
        start = kwargs['start']
    except KeyError:
        start = None
    for ticker in tickers:
        utils._print_issue(None, '=' * 80)
        utils._print_issue('INFO', 'Current ticker: {}'.format(ticker))
        z_values, tols, means = _create_z_values(model=model, ticker=ticker, \
                                                 stats_data=stats_data, timezone=timezone, \
                                                 start=start, \
                                                 auto_update_tolerances=auto_update_tolerances)

        freq_range, frequencies = _create_freq()
        delta_t = model.data.index[-1].to_datetime64() - pd.Timestamp.now().to_datetime64()
        delta_t = pd.Timedelta(delta_t).seconds / 3600

        arg = np.argsort(tols)
        value_arg = np.argsort(model.break_values[ticker])
        probs = ss.norm.cdf(z_values) * 100
        # do 1 - if:
        flip_arg = np.where(z_values > 0)
        probs[np.where(z_values > 0)] = (1 - ss.norm.cdf(z_values[flip_arg])) * 100
        poly_deg = 5
        poly_probs = np.zeros(2)
        fig, axs = plt.subplots(2, 1, figsize=(16, 9), sharex=True, sharey=True)
        for n, ax in enumerate(axs):
            ax.plot(frequencies, probs[n], \
                    label='Probability')
            ax.vlines(delta_t, np.min(probs), np.max(probs), label='Time to deadline')
            poly_line = np.poly1d(np.polyfit(freq_range, probs[n], poly_deg))
            ax.plot(frequencies, poly_line(freq_range), 'r', label='Polyfit of deg {}'.format(poly_deg))
            title = 'Ticker: {} - Break Value: {} - Tolerance: {}'.format(ticker, \
            model.break_values[ticker][value_arg[n]], tols[arg[n]])
            current_prob = poly_line(delta_t)
            ax.text(x=delta_t - .25, y=(np.max(probs) + np.min(probs))*.5, \
                    s='{:.2f}%'.format(current_prob), fontsize='larger')
            ax.set_title(title, fontsize='large')
            ax.legend()
            ax.grid()
            ax.yaxis.get_label().set_fontsize('larger')
            ax.xaxis.get_label().set_fontsize('larger')
            poly_probs[n] = current_prob

        ax.invert_xaxis()
        plt.setp(axs[-1], xlabel='Time to break value [h]')
        plt.setp(axs, ylabel='Probability [%]')
        prob_between = np.abs(np.diff(poly_probs))[0]
        for n, prob in enumerate(poly_probs):
            utils._print_issue('STATS-EVAL', \
                               'Probability for tol={:.5f}: {:.2f}%'.format(tols[arg][n], prob))

        utils._print_issue('STATS-EVAL', \
                           'Probability between: {:.2f}%'.format(prob_between))
        plt.show()


In [3]:
def _create_z_values(model, ticker, stats_data=None, \
                     auto_update_tolerances=False, *args, **kwargs):
    freq_range, frequencies = _create_freq()
    try:
        timezone = kwargs['timezone']
    except KeyError:
        timezone = None
    try:
        start = kwargs['start']
    except KeyError:
        start = None
    _, means, stds = _get_price_moves_and_stats(ticker=ticker, \
                                                stats_data=stats_data,
                                                timezone=timezone, \
                                                start=start)
    if auto_update_tolerances:
        utils._print_issue('STATS-INFO', 'Auto update of tolerances!')
        current_value = utils.download_data(tickers=ticker, \
                                            start=(pd.Timestamp.today() - pd.Timedelta('1 days')), \
                                            value='Close').values[-1]
        current_tols = model.break_values[ticker] - current_value
        utils._print_issue('STATS-INFO', 'Current value: {}!'.format(current_value))
        utils._print_issue('STATS-INFO', 'New tolerances: {}!'.format(current_tols))
        tol_unten = np.sort(current_tols)[0]
        tol_oben = np.sort(current_tols)[1]
    else:
        tol_unten = np.sort(model.tolerances[ticker])[0]
        tol_oben = np.sort(model.tolerances[ticker])[1]
    z_values_unten = (tol_unten - means) / stds
    z_values_oben = (tol_oben - means) / stds
    return np.array([z_values_unten, z_values_oben]), np.array([tol_unten, tol_oben]), means


In [26]:
def _get_price_moves_and_stats(ticker, stats_data=None, \
                               timezone=None, start=None):
    if timezone is None:
        timezone = 'Europe/London'
    if start is None:
        start = pd.Timestamp(2019, 1, 1, 0)
    if stats_data is None:
        stats_data = utils.download_data(tickers=ticker, start=start, \
                                         interval='60m', value='Close')
    freq_range, frequencies = _create_freq()
    price_movements = dict.fromkeys(frequencies)
    means = np.zeros(freq_range.shape)
    stds = np.zeros(freq_range.shape)
    for index, freq in enumerate(frequencies):
        current_time = start
        current_rng = pd.date_range(start=current_time, end=pd.Timestamp.today(), \
                                    freq=freq, tz=timezone, name='Datetime')
        current_moves = np.diff(stats_data[current_rng])
        current_moves = current_moves[~np.isnan(current_moves)]
        absolut_prices = stats_data[current_rng] 
        absolut_prices = absolut_prices[~np.isnan(absolut_prices)]
        means[index] = np.mean(current_moves)
        stds[index] = np.std(current_moves)
        price_movements[freq] = current_moves
    return price_movements, means, stds, np.shape(current_moves), np.shape(absolut_prices)

In [27]:
ticker = 'BTC-USD'

_get_price_moves_and_stats(ticker, stats_data=None, timezone=None, start=None)

[*********************100%***********************]  1 of 1 completed


({'10h': array([  5.59008789,  28.23999023,  55.81005859, ...,  47.87890625,
         -49.92285156,  52.87890625]),
  '11h': array([  1.16992188,  28.86010742,  71.38989258, ..., -83.90917969,
          50.22460938, -42.41503906]),
  '12h': array([ 14.84008789,  94.70996094,  13.54003906, ..., -38.16894531,
         -16.34082031,  67.64550781]),
  '13h': array([ 2.15400391e+01,  8.88798828e+01,  2.90400391e+01,  3.90900879e+01,
         -9.55900879e+01,  1.87800293e+01, -3.88300781e+01,  7.52202148e+01,
          7.81982422e+00, -2.00200195e+01,  2.27179932e+02, -8.85998535e+01,
          2.28400879e+01,  6.25200195e+01, -3.03500977e+01, -2.93200684e+01,
          3.91601562e+01, -4.23760010e+02,  2.35498047e+01,  1.31701660e+01,
         -3.74201660e+01,  1.16101074e+01, -8.00000000e+00, -1.03330078e+02,
          2.12199707e+01,  1.31470215e+02, -3.61301270e+01, -3.34399414e+01,
          1.22700195e+01, -2.77700195e+01,  4.02099609e+01,  5.20019531e-01,
         -1.20200195e+01,  1.

In [5]:
def _create_freq():
    freq_range = np.arange(1, 25, 1)
    frequencies = [freq + 'h' for freq in freq_range.astype(str)]
    return freq_range, frequencies

In [7]:
_create_freq()

(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24]),
 ['1h',
  '2h',
  '3h',
  '4h',
  '5h',
  '6h',
  '7h',
  '8h',
  '9h',
  '10h',
  '11h',
  '12h',
  '13h',
  '14h',
  '15h',
  '16h',
  '17h',
  '18h',
  '19h',
  '20h',
  '21h',
  '22h',
  '23h',
  '24h'])

In [16]:
stats_data[current_rng]

NameError: name 'stats_data' is not defined