In [39]:
import requests
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import time
import datetime
from datetime import datetime, date
from datetime import timedelta
import talib
import pickle
from scipy import stats
from datetime import datetime as dt
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import Imputer, RobustScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score,log_loss
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
import lightgbm as lgb
from scipy.signal import argrelextrema

In [40]:

tickers = ['USDT_BTC','USDT_ETC','USDT_EOS','USDT_XMR','USDT_ETH','USDT_DASH','USDT_BAT','USDT_GNT',
           'USDT_XRP','USDT_LTC','USDT_XLM','USDT_REP','USDT_ZEC','USDT_IOT','USDT_NEO','USDT_BCH',
           'USDT_TRX','USDT_OMG','USDT_XTZ','USDT_BSV','USDT_MKR','USDT_ZRX','USDT_VET','USDT_BTG','USDT_ZIL']

def get_data(date,coin):
    """ Query the API for 2000 days historical price data starting from "date". """
    url = "https://min-api.cryptocompare.com/data/histohour?fsym="+coin+"&tsym=USD&limit=1000&toTs={}".format(date)
    r = requests.get(url)
    ipdata = r.json()
    return ipdata

def get_hourly(date,coin, exchange=''):
    url = 'https://min-api.cryptocompare.com/data/histohour?fsym='+coin+'&tsym=USD&limit=999&toTs={}'\
            .format(date)
    if exchange:
        url += '&e={}'.format(exchange)
    page = requests.get(url)
    data = page.json()#['Data']
    #df = pd.DataFrame(data)
    #df['timestamp'] = [datetime.fromtimestamp(d) for d in df.time]
    return data


def get_df(from_date, to_date,coin):
    date = to_date
    holder = []
    while date > from_date:
        data = get_hourly(date,coin,exchange='Bitfinex')
        holder.append(pd.DataFrame(data['Data']))
        date = data['TimeFrom']

    df = pd.concat(holder, axis = 0)                    
    df = df[df['time']>from_date]                       
    df['time'] = pd.to_datetime(df['time'], unit='s')   
    df.set_index('time', inplace=True)                  
    df.sort_index(ascending=True, inplace=True)
    df.to_csv("live_1h_"+coin + '.csv') 
    print('Processed: ' + coin)
    return df

def get_cap(cap,coin,start,end):
    if cap=='cap':
        url='https://coinmetrics.io/api/v1/get_asset_data_for_time_range/'+coin+'/marketcap(usd)/'+str(start)+'/'+str(end)
    else:
        url='https://coinmetrics.io/api/v1/get_asset_data_for_time_range/'+coin+'/exchangevolume(usd)/'+str(start)+'/'+str(end)

    r=requests.get(url)
    ipdata = r.json()
    ipdata['result']
    cap=pd.DataFrame(ipdata['result'],columns=['date','cap'])
    date=[]
    for i in cap.date:
        date.append(pd.to_datetime(datetime.fromtimestamp(i)))

    cap.index=date
    cap=cap.resample('1H').ffill()
    return cap['cap']    

In [41]:
def divergence(close,RSI,lookback,window,maxx,low):
    x=RSI.values
    peaks=argrelextrema(x, np.greater,order=5)[0]
    troughs=argrelextrema(x, np.less,order=5)[0]
    bear_rsi=[]
    bear_price=[]
    bull_rsi=[]
    bull_price=[]
    for x in peaks:    
        if RSI.iloc[x]>maxx:
            if RSI.iloc[x]<max(RSI.iloc[max(0,x-lookback):x]):
                bear_rsi.append(x)
            if close.iloc[x]>max(close.iloc[max(0,x-lookback):x]):
                bear_price.append(x)
    
    for t in troughs:
        if RSI.iloc[t]<low:
            if close.iloc[t]<min(close.iloc[max(0,t-lookback):t]):
                bull_price.append(t)
    
    for z in range(1,len(troughs)):
        if RSI.iloc[troughs[z]]<low:
            if RSI.iloc[troughs[z]]>RSI.iloc[troughs[z-1]]:
                bull_rsi.append(troughs[z])  
            
    dt=set(bear_rsi)
    dt2=set(bear_price)
    dt3=set(bull_rsi)
    dt4=set(bull_price)
    mergedlist=dt.intersection(dt2)
    mergedlist=np.array(list(mergedlist))
    mergedlist2=dt3.intersection(dt4)
    mergedlist2=np.array(list(mergedlist2))
    bull_div=np.sort(mergedlist2)
    bear_div=np.sort(mergedlist)
    return bull_div,bear_div

def bbp(price):
    up, mid, low = talib.BBANDS(price, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    bbp = (price - low) / (up - low)
    return bbp

In [42]:
def week_start(df):#filter index for sundays 00am
    dayofweek = df.index.dayofweek
    hour = df.index.hour
    x=df.iloc[((dayofweek == 6) & (hour == 0))]
    return x.index


def weekly_momentum(df):#calculate weekly price and volume momentum
    n=(week_start(df)[-1]-week_start(df)[0])/7
    n=n.days
    pmom = pd.DataFrame(0, index=np.arange(n), columns=df['coin'].unique())
    vmom= pd.DataFrame(0, index=np.arange(n), columns=df['coin'].unique())    
    date=week_start(df)[0]
    
    dates=[]
    for i in range(0,n,1):
        dates.append(date+timedelta(hours=i*24*7+7*24))
        cur=df.loc[date+timedelta(hours=i*24*7)]
        fut=df.loc[date+timedelta(hours=i*24*7+7*24)]
        for coin in cur['coin']:
            if len(fut[fut['coin']==coin]['close'])>0:
                pmom[coin].iloc[i]=(fut[fut['coin']==coin]['close'][0]-cur[cur['coin']==coin]['close'][0])/cur[cur['coin']==coin]['close'][0]
                vmom[coin].iloc[i]=(fut[fut['coin']==coin]['volumeto'][0]-cur[cur['coin']==coin]['volumeto'][0])/cur[cur['coin']==coin]['volumeto'][0]

    pmom.index=dates
    vmom.index=dates
    return pmom,vmom


def rank(mom,df):#rank each coin and assign 1,-1 to top and bottom performers
    n=(week_start(df)[-1]-week_start(df)[0])/7
    n=n.days
    pmom = pd.DataFrame(0, index=np.arange(n), columns=mom.columns)
    for i in range(n):
        row=mom.iloc[i,:].copy()
        row=row[abs(row)>0].sort_values(ascending=False)
        if len(row)<7:
            pmom.loc[i,row.index[0]]=1
            pmom.loc[i,row.index[-1]]=-1
        if len(row)>=7 and len(row)<15:
            pmom.loc[i,list(row.index[0:2])]=1
            pmom.loc[i,list(row.index[-2:])]=-1
        if len(row)>=15:
            pmom.loc[i,list(row.index[0:3])]=1
            pmom.loc[i,list(row.index[-3:])]=-1
    pmom.index=mom.index
    return pmom   

def assign_mom(df,p_rnk,v_rnk):#create two new columns in our existing df where each coin is assigned 1,0,-1 depending on their momentum. 
    df1=df.copy()
    df1['pmom']=0
    df1['vmom']=0
    for coin in df1.coin.unique():
        df1.loc[df1['coin']==coin,'pmom']=p_rnk[coin]
        df1.loc[df1['coin']==coin,'vmom']=v_rnk[coin]
    df1.fillna(0,inplace=True)
    return df1




In [43]:
#live data
to_date=datetime.now()
from_date=to_date-timedelta(hours=1500)
start= time.mktime(from_date.timetuple())
end = time.mktime(to_date.timetuple())


for x in tickers:
    get_df(start,end,x[5:])

Processed: BTC
Processed: ETC
Processed: EOS
Processed: XMR
Processed: ETH
Processed: DASH
Processed: BAT
Processed: GNT
Processed: XRP
Processed: LTC
Processed: XLM
Processed: REP
Processed: ZEC
Processed: IOT
Processed: NEO
Processed: BCH
Processed: TRX
Processed: OMG
Processed: XTZ
Processed: BSV
Processed: MKR
Processed: ZRX
Processed: VET
Processed: BTG
Processed: ZIL


In [44]:

def retrieve_symbol_name_v2(coin,window):
    col=['date','close']
    ndf=pd.read_csv("live_1h_"+coin+'.csv',index_col='time')
    ndf=ndf.drop(["volumefrom"],axis=1)
    ndf.index=pd.to_datetime(ndf.index)
    df=pd.DataFrame()
    unique_dates=ndf.index.drop_duplicates(keep='first')
    for i in unique_dates:
        if len(ndf.loc[i])>2:
            df=df.append(ndf.loc[i])
        else:
            df=df.append(ndf.loc[i].iloc[1,:])
    df['returns']=df['close'].pct_change()
    df['coin']=coin
    df['RSI']=talib.RSI(df['close'], timeperiod=window)
    df['RSI_10']=df['RSI'].rolling(10).mean()
    df['RSI_50']=df['RSI'].rolling(50).mean()
    df['RSI_up']= (df['RSI_10']> df['RSI_50']).astype(float)
    div=divergence(df['close'],df['RSI'],24*4,window,70,30)
    df['bull_div']=0
    df['bull_div'].iloc[div[0]]=1
    df['bear_div']=0
    df['bear_div'].iloc[div[1]]=1
    df['10_MA']=df['close'].rolling(10).mean()
    df['50_MA']=df['close'].rolling(50).mean()
    df['MA_up']= (df['10_MA']> df['50_MA']).astype(float)
    df['volume_10']=df['volumeto'].rolling(10).mean()
    df['volume_50']=df['volumeto'].rolling(50).mean()
    df['vol_up']= (df['volume_10']> df['volume_50']).astype(float)
    df['var']=df['returns'].rolling(50).var()
    df['50_var']=df['var'].rolling(50).mean()
    df['200_var']=df['var'].rolling(200).mean()
    df['1000_var']=df['var'].rolling(1000).mean()
    df['var_up_short']= (df['50_var']> df['200_var']).astype(float)
    df['var_up_long']= (df['200_var']> df['1000_var']).astype(float)
    close = df['close']
    high = df['high']
    low = df['low']
    df['ATR'] = talib.ATR(high, low, close, window)
    df['50_Std']=df['returns'].rolling(50).std()
    df['rise']=(df['returns']>2*df['50_Std']).astype(float)
    df['fall']=(df['returns']<-2*df['50_Std']).astype(float)
    df['rise2']=(df['returns']>3.5*df['50_Std']).astype(float)
    df['fall2']=(df['returns']<-3.5*df['50_Std']).astype(float)
    df['ADX'] = talib.ADX(high, low, close, window)
    macd, macdsignal, macdhist = talib.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    df['MACD']=macd
    df['MACD_Hist']=macdhist
    df['MACD_Signal']=macdsignal
    df['MACD_Hist_change']= df['MACD_Hist'].pct_change()
    df['Hist_std']=df['MACD_Hist_change'].rolling(50).std()
    df['MACD_signal']=(df['MACD_Hist_change']>df['Hist_std']).astype(float)
    df['Mom'] = talib.MOM(close, timeperiod=window)
    slowk, slowd = talib.STOCH(high, low, close, fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    df['Stoch_k']=slowk
    df['Stoch_d']=slowd
    df['BB']=bbp(close)
    df.index=pd.to_datetime(df.index)
    df=df.dropna()
    hour=[]
    for i in df.index:
        hour.append(int(str(i)[11:13]))
    df['hour']=hour
    from_date = (df.index[0]-timedelta(hours=24))
    to_date=df.index[-1]
    start= int(time.mktime(from_date.timetuple()))
    end = int(time.mktime(to_date.timetuple()))
    try:
        cap=get_cap('cap',coin.lower(),start,end)
        cap=cap[cap.index>df.index[0]]
        vol=get_cap('vol',coin.lower(),start,end)
        vol=vol[vol.index>df.index[0]]
    except:
        cap=0
        vol=0
    df['Market_Cap']=cap
    df['Total_vol']=vol
    df['vol/cap']=df['volumeto']/df['Market_Cap']
    df['Total_vol/cap']=df['Total_vol']/df['Market_Cap']
    return df

def all_coins_live():
    start=time.time()
    df=retrieve_symbol_name_v2('BTC',14)
    for x in tickers[1:]:
        df=df.append(retrieve_symbol_name_v2(x[5:],14))
    df=df.sort_index()
    print ("Runtime is " +str(time.time()-start)+" seconds")
    return df

live=all_coins_live()
mom=weekly_momentum(live)
live=assign_mom(live,rank(mom[0],live),rank(mom[1],live)) 


  results &= comparator(main, plus)
  results &= comparator(main, minus)
  results &= comparator(main, plus)
  results &= comparator(main, minus)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


Runtime is 102.93405890464783 seconds




In [45]:
features=live.columns
features=list(features)
x=['coin']
features=[e for e in features if e not in x]

X_live=live.loc[:,features]

#load our trained model
filename = 'xg_boost_long'
infile = open(filename,'rb')
bst = pickle.load(infile)
infile.close()

y_pred = bst.predict(X_live)
trades = live.assign(pred = y_pred)
trades = trades[trades.pred >0.5]

    


In [46]:
trades.tail()

Unnamed: 0,close,high,low,open,volumeto,returns,coin,RSI,RSI_10,RSI_50,...,Stoch_d,BB,hour,Market_Cap,Total_vol,vol/cap,Total_vol/cap,pmom,vmom,pred
2019-04-17 09:00:00,603.88,606.29,603.88,606.29,297.29,-0.003975,MKR,19.615814,34.200121,46.992068,...,5.754842,-0.197859,9,0.0,0.0,inf,0.0,0.0,0.0,0.688864
2019-04-17 10:00:00,645.0,645.0,603.88,603.88,2437.27,0.068093,MKR,57.485428,35.999759,47.066279,...,20.421649,0.91788,10,0.0,0.0,inf,0.0,0.0,0.0,0.543088
2019-04-17 13:00:00,645.0,645.0,619.0,645.0,327.06,0.0,MKR,57.485428,41.398671,47.288912,...,90.245437,0.807484,13,0.0,0.0,inf,0.0,0.0,0.0,0.539616
2019-04-17 14:00:00,615.04,645.0,615.04,645.0,65.19,-0.04645,MKR,39.328136,41.382579,46.999977,...,91.904453,0.164022,14,0.0,0.0,inf,0.0,0.0,0.0,0.655869
2019-04-17 18:00:00,628.33,628.33,615.04,615.04,280.91,0.021608,MKR,48.949101,45.581917,45.549506,...,7.944358,0.517863,18,0.0,0.0,inf,0.0,0.0,0.0,0.52644


In [47]:
trades.pred.max()

0.8104762947146409