In [1]:
from ctrade import *
from plot import *
from datetime import datetime
import warnings; warnings.simplefilter('ignore')
%matplotlib inline

In [2]:
p = Poloniex()

In [3]:
major = ['ETH', 'ETC', 'BCN', 'DASH', 'DOGE', 'LTC', 'NXT', 'REP', 'ZEC', 'ARDR', 'XRP', 'STR']

In [4]:
pairs = [i for i in p.currency_pairs if 'USDT_' in i]
pairs += [i for i in p.currency_pairs if any([j for j in major if 'BTC_'+j in i])]

In [5]:
inf = lambda x: np.isinf(x)

def build_dataset(currency_pairs, days_back, period):
    out = pd.DataFrame()
    for i in currency_pairs:
        print(i)
        t = p.chart(i, days_back, period).df
        t[i+'_PC'] = (t['close'] - t['close'].shift(1))/t['close']
        t = t.rename(columns={'close': i})
        t[i+'_volume'] = t['volume']
        t[i+'_volume_PC'] = (t[i+'_volume'] - t[i+'_volume'].shift(1))/t[i+'_volume']
        t[i+'_volume_PC'] = t[i+'_volume_PC'].fillna(0)
        mask = t[i+'_volume_PC'].apply(inf)
        t.loc[mask, i+'_volume_PC'] = 0
        out = pd.concat([out, t[[i+'_PC', i+'_volume_PC', i, i+'_volume']]], axis=1)
    
    df = out[currency_pairs + [i+'_volume' for i in currency_pairs]]
    out = out[out[i+'_PC'].notnull()]
    return out[[i for i in out.columns if i not in currency_pairs]], df

In [255]:
df = p.chart('BTC_LTC', 60, '15m').df
df['value'] = (df['open'] + df['close'])*0.5

This dictionary set the parameter for the indicators which I am using to build my model.
Daily or weekly pivot levels are used. 

In [256]:
indicators = {
    'macd': ('macd', {'slow_window': 50, 'fast_window': 15}),
    'rsi': ('rsi', {'window': 15}),
    'fstoc': ('fstoc', {'k_smooth': 8, 'd_smooth': 3}),
    'atr': ('atr', None),
    'bbands': ('bbands', {'mode':'spread'}),
    'pivot_daily': ('pivot', {'mode': 'day'}),
    'pivot_weekly': ('pivot', {'mode': 'week'}),
    'consecutive_periods': ('consecutive_periods', {'add_periods': ['1h', '4h']}),
}

In [257]:
class Model(object):
    
    def __init__(self, indicators, currency):
        self.indicators = indicators
        self.currency = currency
        self.indicator_func = {}
        
    def set_indicators(self):
        
        def feed(x):
            return {} if x is None else x
        
        for k,v in self.indicators.items():
            if v[0] in ['macd', 'rsi', 'bbands', 'pivot', 'consecutive_periods']:
                self.indicator_func[k] = with_series(self.currency)(indicator_partial(globals()[v[0]], **feed(v[1])))
            else:
                self.indicator_func[k] = indicator_partial(globals()[v[0]], **feed(v[1]))
        
    def get_data(self, df):
        
        return reduce(lambda x,y: pd.concat([x,y], axis=1), 
                      [func(df) for func in self.indicator_func.values()])
    
    def get_target(self, Y, span=[2, 5, 10, 25, 50, 100]):
        
        _Y = pd.DataFrame(index=Y.index)
        for s in span:
            _Y[s] = np.nan
            _Y.iloc[:-s, -1] = (Y - Y.shift(s)).iloc[s:].values
        return _Y

In [258]:
m = Model(indicators, 'value')
m.set_indicators()

This is the dataset to build the model on where all indicators are calculated at each timeframe.

In [None]:
X = m.get_data(df).dropna()
X.tail(20)

Calculating weekly pivot levels
Calculating daily pivot levels


Here I am calculating the price difference of the crypto pair at different periods to build the model. You can see here I am using 2, 5, 10, 25, 50 and 100 periods.

In [None]:
Y = m.get_target(df['value']).dropna()
Y.tail(20)

In [None]:
X_train = X.iloc[:-1000]
Y_train = Y.iloc[:-1000]
X_test = X.iloc[-1000:]
Y_test = Y.iloc[-1000:]


In [None]:
mask = Y_train.index.isin(X_train.index)
Y = Y_train[mask]

mask = X_train.index.isin(Y.index)
save_last = X_train[~mask] 
X = X_train[mask]

In [None]:
X.shape[0], Y.shape[0], save_last.shape[0]

Here I am choosing to build the first model using the 5 period value difference (aka the price difference from each timeframe and that of 5 period later, 1h15m)

In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from model_utils import *
from preprocessing import *

if len(Y[5].unique())>2:
    est = RandomForestRegressor(n_estimators=50, min_samples_leaf=10, max_depth=4)
    est = GradientBoostingRegressor(n_estimators=100, min_samples_leaf=10, max_depth=4)
else:
    est = RandomForestClassifier(n_estimators=50, min_samples_leaf=10, max_depth=3)
    est = GradientBoostingClassifier(n_estimators=20, min_samples_leaf=10, max_depth=4)   

res = do_easy_crossval(est, X, Y[5]*1000, folds=10, refit=True)
pred = res[0]
est = res[1]

In [None]:
fig, ax = plt.subplots(1,1,figsize=(16,6))
pred['true'].plot(kind='hist', ax=ax)
ax.set_title('True price changes ')

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,8))
ax.scatter(pred['true'], pred['pred'])
ax.set_xlabel('true difference')
ax.set_ylabel('predicted difference')

np.corrcoef(pred['true'], pred['pred'])[0][1]

The correlattion between the change in the cryptocurrency value and the predicted one is quite good as you can see from the scatter plot. Note that the predicted value are always out of sample and the model was not build on the point where it is predicting.

Below you can see which are the indicators sorted by there contribution to the overall prediction.

In [None]:
plot_feature_importances(est, X.columns, top=20)

Ideally one would use a combination of the predicted differences at different timeframes to base the buy-sell strategy.

I am going to add to this the following:
    * same indicators for another cryptocurrency, such as USD_BCT as I thing there is signal also there
    * Wrap mode that one model together and calibrate
    * Create a probabilistic model to output a probability
    

In [None]:
class StackModels(object):
    
    def __init__(self, estimator):
        self.estimator = estimator
        self.fitted_estimators = []
        self.oob_predictions = []
        self.labels = []
        
    def fit(self, X, Y):
                
        for col in Y.columns:
            res = do_easy_crossval(self.estimator, X, Y[col]*1000, folds=10, refit=True, plot=False)
            self.fitted_estimators.append(copy.deepcopy(res[1]))
            self.oob_predictions.append(res[0].sort_index()
                                        .rename(columns={k:'{}_{}'.format(k, col) for k in res[0].columns}))
            self.labels.append('pred_{}'.format(col))
            
    def predict(self, X):
    
        predictions = []
    
        for iest,est in enumerate(self.fitted_estimators):
            
            pred = pd.DataFrame(est.predict(X), index=X.index, columns=[self.labels[iest]])
            predictions.append(pred)
            
        return pd.concat(predictions, axis=1)
            
    def stack_predictions(self):
    
        out = self.oob_predictions[0]
        columns = [i for i in out.columns if 'pred' in i]
        out = out[columns]
        for df in self.oob_predictions[1:]:
            columns = [i for i in df.columns if 'pred' in i]
            out = out.join(df[columns], how='inner')
            
        return out.dropna()

In [None]:
stack = StackModels(est)

stack.fit(X, Y)

In [None]:
stack.fitted_estimators[0] is stack.fitted_estimators[1]

In [None]:
class Signals(object):
    
    def __init__(self):
    
        self.quantiles = {}

    def fit(self, X):
        
        for col in X.columns:
            df, Q = tag_ranges(X, col, quantiles=(0.3, 0.7))
            self.quantiles[col] = Q
    
        signal_tags = [i for i in X.columns if 'tag' in i]
        X['main'] = X[signal_tags].sum(axis=1)
        X['signal'] = 0
        mask = X['main'] >3
        X.loc[mask, 'signal'] = 1
        mask = X['main'] <-3
        X.loc[mask, 'signal'] = -1

        return X
    
    @staticmethod
    def apply_tag(df, column, Q):
        
        df[column+'_tag'] = 0
        mask = df[column]<Q[0]
        df.loc[mask, column+'_tag'] = -1
        mask = df[column]>Q[1]
        df.loc[mask, column+'_tag'] = 1
        
        return df
    
    def predict(self, X):
        
        for col in X.columns:
            
            X = self.apply_tag(X, col, self.quantiles[col])
            
        signal_tags = [i for i in X.columns if 'tag' in i]
        X['main'] = X[signal_tags].sum(axis=1)
        
        X['signal'] = 0
        mask = X['main'] >3
        X.loc[mask, 'signal'] = 1
        mask = X['main'] <-3
        X.loc[mask, 'signal'] = -1

        return X

In [None]:
U = stack.stack_predictions()
signals = Signals()
U = signals.fit(U)

In [None]:
price = U.join(df[['value']], how='inner')['value']
a, b, c = simulate_buy_sell(U['signal'].tolist(), price.tolist(), U.index.tolist())

In [None]:
res = pd.DataFrame(np.array([0] + b[1:])+1, index=range(len(b)), columns=['perf'])
res['pnl'] = res['perf'].cumprod()
plt.plot(range(len(res)), res['pnl'])

In [None]:
trades = pd.DataFrame(c[:-1], columns=['trade', 'open', 'close'])
trades_sell = trades[trades['trade'] == 'S']
trades_bought = trades[trades['trade'] == 'B']

In [None]:
fig = plt.figure(figsize=(16,6))
# fig.subplots_adjust(hspace=0)
ax = fig.add_subplot(211)
ax = plot_fts(df, 'value', ax=ax, plot_args={'color': 'black'})

ax = ax.twinx()

ymin, ymax = ax.get_ylim()[0], ax.get_ylim()[1]

opened = [df.index.get_loc(i) for i in trades_bought['open']]
closed = [df.index.get_loc(i) for i in trades_bought['close']]

for iop, op in enumerate(opened):
    ax.axvspan(op, closed[iop], ymin, ymax, facecolor='g', alpha=.3 )

opened = [df.index.get_loc(i) for i in trades_sell['open']]
closed = [df.index.get_loc(i) for i in trades_sell['close']]
for iop, op in enumerate(opened):
    ax.axvspan(op, closed[iop], ymin, ymax, facecolor='r', alpha=.3 )

In [None]:
test_pred = stack.predict(X_test)

In [None]:
test_pred = signals.predict(test_pred)

In [None]:
price = test_pred.join(df[['value']], how='inner')['value']
a, b, c = simulate_buy_sell(test_pred['signal'].tolist(), price.tolist(), test_pred.index.tolist())

In [None]:
res = pd.DataFrame(np.array([0] + b[1:])+1, index=range(len(b)), columns=['perf'])
res['pnl'] = res['perf'].cumprod()
plt.plot(range(len(res)), res['pnl'])

In [None]:
trades = pd.DataFrame(c[:-1], columns=['trade', 'open', 'close'])
trades_sell = trades[trades['trade'] == 'S']
trades_bought = trades[trades['trade'] == 'B']

In [None]:
price = price.to_frame('value')

fig = plt.figure(figsize=(16,6))
# fig.subplots_adjust(hspace=0)
ax = fig.add_subplot(211)
ax = plot_fts(price, 'value', ax=ax, plot_args={'color': 'black'})

ax = ax.twinx()

ymin, ymax = ax.get_ylim()[0], ax.get_ylim()[1]

opened = [price.index.get_loc(i) for i in trades_bought['open']]
closed = [price.index.get_loc(i) for i in trades_bought['close']]

for iop, op in enumerate(opened):
    ax.axvspan(op, closed[iop], ymin, ymax, facecolor='g', alpha=.3 )

opened = [price.index.get_loc(i) for i in trades_sell['open']]
closed = [price.index.get_loc(i) for i in trades_sell['close']]
for iop, op in enumerate(opened):
    ax.axvspan(op, closed[iop], ymin, ymax, facecolor='r', alpha=.3 )