In [43]:
import numpy as np
import pandas as pd
import json

from tsflex.features import MultipleFeatureDescriptors, FeatureCollection
from tsflex.features.utils import make_robust
import scipy.stats as ss

from catboost import CatBoostClassifier
import pickle

In [None]:
N = 6

def slope(x): return (x[-1] - x[0]) / x[0] if x[0] else 0
def abs_diff_mean(x): return np.mean(np.abs(x[1:] - x[:-1])) if len(x) > 1 else 0
def diff_std(x): return np.std(x[1:] - x[:-1]) if len(x) > 1 else 0

funcs  = [np.min, np.max,np.argmin, np.argmax, np.std,slope , abs_diff_mean, ss.skew, diff_std]
funcs = [make_robust(f) for f in funcs]

dpth_feats = ['takevol', 'bids_init_vol','bids_init_std_vol','asks_init_vol','asks_init_std_vol']+\
    ['bid_vol_'+str(i) for i in range(N)]+ ['asks_vol_'+str(i) for i in range(N)] + [ 'bids_init_sum','bids_init_std_price', 'asks_init_sum', 'asks_init_std_price']+\
    ['bid_prc_'+str(i) for i in range(N)]+ ['asks_prc_'+str(i) for i in range(N)] +['bids_init_ln','asks_init_ln']

use_feats = ['delt_mean', 'max_feat', 'min_feat', 'inout', 'delt_count', 'delt_vol'] + dpth_feats

In [59]:
def getFeats(dframe):
    wins = [2,5,10,30]

    data_feats = MultipleFeatureDescriptors(
        functions=funcs,
        series_names=use_feats,
        windows=wins,
        strides=[1]
    )

    feat_collection = FeatureCollection(feature_descriptors=[data_feats])

    df_feats = feat_collection.calculate(data=[dframe.reset_index().sort_values(
        by=['index'],ascending=False).reset_index()], include_final_window=True, approve_sparsity=True, 
                            window_idx="begin", return_df=True, show_progress=False)

    buf_df = pd.DataFrame([[np.NaN] * df_feats.shape[1]] * (min(wins)-1))
    buf_df.columns = df_feats.columns
    df_feats = pd.concat([buf_df,df_feats]).fillna(0)
    
    return df_feats


def getX(file):

    data = []
    for i in range(len(file)):
#         print('\r'+str(i) +' from '+str(len(file)-1), end='', flush=True)

        kline = json_data[i]['candlestickEvents'][-1]
        time, close_time,maxPrice,minPrice,inPrice, outPrice,vol, \
        qvol, count, takevol, takevolq = int(kline['openTime']['$numberLong']), \
        int(file[i]['candlestickEvents'][-1]['closeTime']['$numberLong']), \
        float( kline['high']), float(kline['low']), float(kline['open']), \
        float(kline['close']), float(kline['volume']), float(kline['quoteAssetVolume']), \
        int(kline['numberOfTrades']), float(kline['takerBuyBaseAssetVolume']), \
        float(kline['takerBuyQuoteAssetVolume'])

        if len(file[i]['depthEvents'])>0:
            if len(file[i]['depthEvents'][-1]['bids']) > 0:
                bids = pd.DataFrame(file[i]['depthEvents'][-1]['bids'])
            else:
                bids  = pd.DataFrame({"price":[0]*N,"qty":[0]*N})

            if len(json_data[i]['depthEvents'][-1]['asks']) > 0:
                asks = pd.DataFrame(file[i]['depthEvents'][-1]['asks'])
            else:
                asks  = pd.DataFrame({"price":[0]*N,"qty":[0]*N})
        else:
            bids  = pd.DataFrame({"price":[0]*N,"qty":[0]*N})
            asks  = pd.DataFrame({"price":[0]*N,"qty":[0]*N})

        for col in bids.columns:
            bids[col] = bids[col].astype(float)
            asks[col] = asks[col].astype(float)

        bids = bids.sort_values(by=['qty'])
        asks = asks.sort_values(by=['qty'])

        bids_init_ln = len(bids)
        asks_init_ln = len(asks)

        bids_init_sum = sum(bids.price)
        bids_init_vol = sum(bids.qty)
        bids_init_std_price = np.std(bids.price.values)
        bids_init_std_vol = np.std(bids.qty.values)

        asks_init_sum = sum(asks.price)
        asks_init_vol = sum(asks.qty)
        asks_init_std_price = np.std(asks.price.values)
        asks_init_std_vol = np.std(asks.qty.values)

        last_bids_price = list(bids.tail(N).price.values)
        last_bids_vol = list( bids.tail(N).qty.values)

        last_asks_price = list( bids.tail(N).price.values)
        last_asks_vol =  list(asks.tail(N).qty.values)


        feats = [time, close_time,maxPrice,minPrice,inPrice, \
                 outPrice,vol, qvol, count, takevol, takevolq,\
                 bids_init_ln,asks_init_ln, bids_init_sum,bids_init_vol, \
                 bids_init_std_price, bids_init_std_vol, asks_init_sum, \
                 asks_init_vol, asks_init_std_price, asks_init_std_vol]+\
        last_bids_price+last_bids_vol+last_asks_price+last_asks_vol

        data.append(feats)


    cols = ['time','close_time','maxPrice','minPrice','inPrice','outPrice','vol','qvol','count','takevol','takevolq',
                'bids_init_ln','asks_init_ln','bids_init_sum','bids_init_vol','bids_init_std_price',
                'bids_init_std_vol','asks_init_sum','asks_init_vol','asks_init_std_price','asks_init_std_vol']+\
    ['bid_prc_'+str(i) for i in range(len(last_bids_price))]+['bid_vol_'+str(i) for i in range(len(last_bids_vol))]+\
    ['asks_prc_'+str(i) for i in range(len(last_asks_price))]+['asks_vol_'+str(i) for i in range(len(last_asks_vol))]

    data = pd.DataFrame(data)
    data.columns = cols
    data['mean_price']=data['qvol']/data['vol']
    
    for col in ['takevol', 'bids_init_vol','bids_init_std_vol','asks_init_vol','asks_init_std_vol']+\
        ['bid_vol_'+str(i) for i in range(N)]+ ['asks_vol_'+str(i) for i in range(N)]:
        data[col]=data[col]/data['vol']

    for col in [ 'bids_init_sum','bids_init_std_price', 'asks_init_sum', 'asks_init_std_price']+\
        ['bid_prc_'+str(i) for i in range(N)]+ ['asks_prc_'+str(i) for i in range(N)]:

        data[col]=data[col]/data['mean_price']

    for col in ['bids_init_ln','asks_init_ln']:
        data[col]=data[col]/data['count']
        
    data['delt_mean'] = ((data['mean_price']-data['mean_price'].shift(1))/data['mean_price']).fillna(0)
    data['max_feat'] = (data['maxPrice']-data['mean_price'])/data['mean_price']
    data['min_feat'] = (data['minPrice']-data['mean_price'])/data['mean_price']
    data['inout']=(data['outPrice']-data['inPrice'])/data['mean_price']
    data['delt_count'] = ((data['count']-data['count'].shift(1))/data['count']).fillna(0)
    data['delt_vol'] = ((data['vol']-data['vol'].shift(1))/data['vol']).fillna(0)

    x = getFeats(data).iloc[-1,:]
    
    return x

def getPred(js):
    with open("model.pkl", "rb") as f:
        model = pickle.load(f)
    
    x = getX(js)
    
    p = model.predict_proba(x)
        
    if p[2]>0.955:
        return 'buy'
    elif p[0]>0.928:
        return 'sell'
    else:
        return 'none'

In [60]:
with open('test.json') as f:
    d = json.load(f)
    
d = d[-30:]

In [61]:
getPred(d)

'none'