In [1]:
import datetime as dt
import pandas as pd
import numpy as np
import dask
from dask import delayed
import requests as requests
from statsmodels.tsa.stattools import adfuller

In [2]:
def get_price(crypto):
    
    """Purpose of this function is to download cryptocurencies data in json format.
        It returns panda series with closing price on hourly basis."""
    
    URL = "https://api.kraken.com/0/public/OHLC?pair="+crypto+"EUR"+"&interval=60"
    response = requests.get(URL).json()
    data = response["result"]  
    ticker = list(data.keys())[0] #get key of dict. that contains the price vectors
    data = data[ticker]
    close = [i[4] for i in data][:720] #get only close price, without last (pending) hour. Default limit is 720 datas.
    end = dt.datetime.now()
    start = end - dt.timedelta(hours = 719)
    time = pd.date_range(start=start,end=end,freq="1h").hour #create time index
    s = pd.Series(close,index=time) #create panda series with closing prices and hourly index
    return s

def get_signal(corr_limit,data,price,window,z):
    
    """The function returns long/short signals at the same time for 2 assets. Signals 
        are based on the correlation and cointegration of these assets."""
    corr = data.corr().iloc[1,0] 
    if corr >= corr_limit:
        ratio = price.iloc[:,0]/price.iloc[:,1] #More valuable should be always numerator!
        p = adfuller(ratio)[1] 
        if p <= 0.05: #if p value of ADF test is less then 0.05, series ratio of prices is stationary, we can proceed
            mean = ratio[(window-30):].mean() 
            std = ratio[(window-30):].std() 
            z_score = (ratio.values[-1]-mean)/std #calculate mean, std and Z - score over last 10 values of ratio of prices
            if z_score >= z: 
                return [0] #if the ratio is over it´s std, we have a signal to open a short and long position (respectively to numerator and denumerator) 
            elif z_score <= 0:
                return [1] #if the ratio is equal/bellow mean, we have a signal to close the positions we previously opened (or do nothing)
            else:
                return ["x"] #if the ratio is within mean and std, we do nothing
        else:
            return ["x"] #if there is no stationarity, we do nothing
    else:
        return [1] #if the correlation is lost, we close positions (or do nothing) 

def compile_results(BTCETH,BTCLTC,BTCADA,BTCXRP,ETHLTC,ETHADA,ETHXRP,LTCADA,LTCXRP,ADAXRP):
    
    """The function serves as a compiler for results which we get from the previous function at the same 
        moment as a consequence of parallelisation. It returns a single row dataframe with long/short signals"""
    
    df = pd.DataFrame({"BTCETH":BTCETH,"BTCLTC":BTCLTC,"BTCADA":BTCADA,"BTCXRP":BTCXRP
           ,"ETHLTC":ETHLTC,"ETHADA":ETHADA,"ETHXRP":ETHXRP,"LTCADA":LTCADA
           ,"LTCXRP":LTCXRP,"ADAXRP":ADAXRP})
    return df

In [3]:
BTC = get_price("BTC") #get closing price for cryptos
ETH = get_price("ETH")
LTC = get_price("LTC")
ADA = get_price("ADA")
XRP = get_price("XRP")

In [4]:
close = pd.concat([BTC,ETH,LTC,ADA,XRP],axis=1) #create a df from pd series and rename columns
close.columns = ["BTC","ETH","LTC","ADA","XRP"]

In [5]:
close.head(15)

Unnamed: 0,BTC,ETH,LTC,ADA,XRP
18,33840.0,2450.31,107.62,1.053627,0.59499
19,33775.4,2440.95,107.47,1.054704,0.59104
20,33506.1,2417.78,106.17,1.044428,0.5876
21,33843.8,2424.47,106.82,1.050338,0.58989
22,32365.6,2303.75,100.84,1.006604,0.55827
23,31875.0,2249.93,100.42,0.996754,0.56004
0,32192.0,2266.53,101.16,0.989625,0.56275
1,32209.1,2288.87,101.38,0.994731,0.56364
2,32256.9,2305.14,101.79,1.005845,0.57136
3,32250.0,2293.33,100.77,1.011609,0.56955


In [6]:
for i in close.columns:
    close[i]=pd.to_numeric(close[i], downcast="float") #convert string data to float

In [7]:
returns = close.pct_change().dropna() #calculate returns for calculating correlation

In [8]:
returns.corr()

Unnamed: 0,BTC,ETH,LTC,ADA,XRP
BTC,1.0,0.888361,0.845743,0.786012,0.753342
ETH,0.888361,1.0,0.870243,0.80905,0.742457
LTC,0.845743,0.870243,1.0,0.808293,0.74058
ADA,0.786012,0.80905,0.808293,1.0,0.735449
XRP,0.753342,0.742457,0.74058,0.735449,1.0


In [9]:
price = close.iloc[1:,:] #drop first row in order to match the length with returns dataset

In [10]:
signals = pd.DataFrame({"BTCETH":["x"],"BTCLTC":["x"],"BTCADA":["x"],"BTCXRP":["x"],"ETHLTC":["x"],"ETHADA":["x"],"ETHXRP":["x"],"LTCADA":["x"],"LTCXRP":["x"],"ADAXRP":["x"]})
w, limit,z = 90,0.7,0.8

for i in range(len(returns.index)-w):
    BTCETH_data,BTCETH_price = returns.iloc[i:(i+w),[0,1]],price.iloc[i:(i+w),[0,1]] #looping throught the data in order to create a rolling window.
    BTCLTC_data,BTCLTC_price = returns.iloc[i:(i+w),[0,2]],price.iloc[i:(i+w),[0,2]] #The slices of data are used as inputs to functions created above. 
    BTCADA_data,BTCADA_price = returns.iloc[i:(i+w),[0,3]],price.iloc[i:(i+w),[0,3]]
    BTCXRP_data,BTCXRP_price = returns.iloc[i:(i+w),[0,4]],price.iloc[i:(i+w),[0,4]]
    ETHLTC_data,ETHLTC_price = returns.iloc[i:(i+w),[1,2]],price.iloc[i:(i+w),[1,2]]
    ETHADA_data,ETHADA_price = returns.iloc[i:(i+w),[1,3]],price.iloc[i:(i+w),[1,3]]
    ETHXRP_data,ETHXRP_price = returns.iloc[i:(i+w),[1,4]],price.iloc[i:(i+w),[1,4]]
    LTCADA_data,LTCADA_price = returns.iloc[i:(i+w),[2,3]],price.iloc[i:(i+w),[2,3]]
    LTCXRP_data,LTCXRP_price = returns.iloc[i:(i+w),[2,4]],price.iloc[i:(i+w),[2,4]]
    ADAXRP_data,ADAXRP_price = returns.iloc[i:(i+w),[3,4]],price.iloc[i:(i+w),[3,4]]
    BTCETH_signal = delayed(get_signal)(limit,BTCETH_data,BTCETH_price,w,z) #run the get_signal functions in parallel on each pair
    BTCLTC_signal = delayed(get_signal)(limit,BTCLTC_data,BTCLTC_price,w,z)
    BTCADA_signal = delayed(get_signal)(limit,BTCADA_data,BTCADA_price,w,z)
    BTCXRP_signal = delayed(get_signal)(limit,BTCETH_data,BTCXRP_price,w,z)
    ETHLTC_signal = delayed(get_signal)(limit,ETHLTC_data,ETHLTC_price,w,z)
    ETHADA_signal = delayed(get_signal)(limit,ETHADA_data,ETHADA_price,w,z)
    ETHXRP_signal = delayed(get_signal)(limit,ETHXRP_data,ETHXRP_price,w,z)
    LTCADA_signal = delayed(get_signal)(limit,LTCADA_data,LTCADA_price,w,z)
    LTCXRP_signal = delayed(get_signal)(limit,LTCXRP_data,LTCXRP_price,w,z)
    ADAXRP_signal = delayed(get_signal)(limit,ADAXRP_data,ADAXRP_price,w,z)
    result = delayed(compile_results)(BTCETH_signal,BTCLTC_signal,BTCADA_signal,BTCXRP_signal,ETHLTC_signal,ETHADA_signal,ETHXRP_signal,LTCADA_signal,LTCXRP_signal,ADAXRP_signal)
    df = result.compute()
    signals = pd.concat([signals,df],axis=0)

In [11]:
signals.head(25)

Unnamed: 0,BTCETH,BTCLTC,BTCADA,BTCXRP,ETHLTC,ETHADA,ETHXRP,LTCADA,LTCXRP,ADAXRP
0,x,x,x,x,x,x,x,x,x,x
0,x,x,x,x,x,x,x,x,x,x
0,x,x,x,x,x,x,x,x,x,x
0,x,x,x,x,x,x,x,x,x,x
0,x,x,x,x,x,x,x,x,x,x
0,x,x,x,x,x,x,x,x,x,x
0,x,x,x,x,x,x,x,x,x,x
0,x,x,x,x,0,x,x,x,x,x
0,x,x,x,x,x,x,x,x,x,x
0,x,x,x,x,x,x,x,x,x,x


In [12]:
len(signals.index)

630

In [13]:
signals.index = np.arange(0,len(returns.index)-w+1)

In [14]:
#for index,j in enumerate(signals["BTCETH"]):
        #if j==0:
            #ix=0
            #signals["BTCETH"][index] ="open"
            #for k in signals["BTCETH"][(index+1):]:
                #ix += 1
                #if k==1:
                    #signals["BTCETH"][(index+ix)]="close"
                    #print(index+ix,signals["BTCETH"][(index+ix)])
                    #break
                #else:
                    #signals["BTCETH"][(index+ix)]="pending"
        #elif j==1:
            #signals["BTCETH"][index] ="x"
        #else:
            #pass

In [15]:
#signals["BTCETH"][20]

In [16]:
for i in signals.columns: #loop for each column in signals dataset 
    for index,j in enumerate(signals[i]): #loop for each value and index in the selected column
        if j==0: #when the value is 0, we open positions (short one crypto, long another one) and replace the value 0 by open
            ix=0
            signals[i][index] ="open"
            for k in signals[i][(index+1):]: #next loop starts - it beggins from the  where the zero was found and continue until end of the column
                ix += 1 
                if k==1: #if the 1 is found, the value is replaced by close string. The loops finish and the main loop can continue from the point where the 0 was found
                    signals[i][(index+ix)]="close"
                    break
                else:
                    signals[i][(index+ix)]="pending" #0s or Xs are replaced by pending string. It ensures there will not be opened more than 1 pair of positions on the same pair of cryptos
        elif j==1:
            signals[i][index] ="x" #1s are replaced by X. 
        else:
            pass

In [17]:
signals.iloc[100:150,:]

Unnamed: 0,BTCETH,BTCLTC,BTCADA,BTCXRP,ETHLTC,ETHADA,ETHXRP,LTCADA,LTCXRP,ADAXRP
100,x,x,x,pending,pending,x,pending,x,x,pending
101,x,open,x,pending,pending,x,pending,x,x,close
102,x,close,x,pending,pending,x,pending,x,x,x
103,x,x,x,pending,pending,x,pending,x,x,x
104,x,x,x,pending,pending,x,pending,open,x,x
105,x,x,x,pending,pending,x,pending,pending,x,x
106,x,open,x,pending,pending,x,pending,pending,x,x
107,x,close,x,pending,pending,x,pending,pending,x,x
108,x,x,x,pending,pending,x,pending,pending,x,x
109,x,x,x,pending,pending,x,pending,pending,x,x


In [18]:
signals = signals.iloc[1:,:] #drop first row, artificially created 

In [19]:
returns_bs = returns.iloc[w:,:] #drop observations which were neccesary to get first signal

In [20]:
returns_bs.head()

Unnamed: 0,BTC,ETH,LTC,ADA,XRP
13,0.00291,0.005718,-0.001253,-0.001857,0.007213
14,-0.001063,-0.004504,-0.005437,0.001821,-0.005878
15,-0.006399,-0.015492,-0.009461,-0.014398,-0.00683
16,0.007616,0.011371,0.009976,0.010488,0.006048
17,0.007229,0.005577,0.009772,0.011581,0.006086


In [21]:
returns_bs["BTCETH1"] = -1*returns_bs["BTC"] #get returns for computing the result of long positons, and negative returns for computing short ones
returns_bs["BTCETH2"] =  returns_bs["ETH"]
returns_bs["BTCLTC1"] = -1*returns_bs["BTC"]
returns_bs["BTCLTC2"] =  returns_bs["LTC"]
returns_bs["BTCADA1"] =  -1*returns_bs["BTC"]
returns_bs["BTCADA2"] =   returns_bs["ADA"]
returns_bs["BTCXRP1"] =   -1*returns_bs["BTC"]
returns_bs["BTCXRP2"] =   returns_bs["XRP"]
returns_bs["ETHLTC1"] =   -1*returns_bs["ETH"]
returns_bs["ETHLTC2"] =   returns_bs["LTC"]
returns_bs["ETHADA1"] =   -1*returns_bs["ETH"]
returns_bs["ETHADA2"] =   returns_bs["ADA"]
returns_bs["ETHXRP1"] =   -1*returns_bs["ETH"]
returns_bs["ETHXRP2"] =   returns_bs["XRP"]
returns_bs["LTCADA1"] =   -1*returns_bs["LTC"]
returns_bs["LTCADA2"] =   returns_bs["ADA"]
returns_bs["LTCXRP1"] =   -1*returns_bs["LTC"]
returns_bs["LTCXRP2"] =   returns_bs["XRP"]
returns_bs["ADAXRP1"] =   -1*returns_bs["ADA"]
returns_bs["ADAXRP2"] =   returns_bs["XRP"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  returns_bs["BTCETH1"] = -1*returns_bs["BTC"] #get returns for computing the result of long positons, and negative returns for computing short ones
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  returns_bs["BTCETH2"] =  returns_bs["ETH"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  returns_bs["BTC

In [22]:
returns_bs = returns_bs.iloc[:,5:] +1 #get indexed returns

In [23]:
len(returns_bs.index)

629

In [24]:
signals.index = np.arange(0,len(signals.index))
returns_bs.index = signals.index

In [25]:
for i in signals.columns: #loop for each pair and if there are not opened positions, then replace the returns by 1
    short = i+str(1)
    long = i+str(2)
    for ix,j in enumerate(signals[i]):
        if j=="x" or j=="open":
            returns_bs[short][ix]=1
            returns_bs[long][ix]=1
        else:
            pass

In [26]:
returns_bs.head(100)

Unnamed: 0,BTCETH1,BTCETH2,BTCLTC1,BTCLTC2,BTCADA1,BTCADA2,BTCXRP1,BTCXRP2,ETHLTC1,ETHLTC2,ETHADA1,ETHADA2,ETHXRP1,ETHXRP2,LTCADA1,LTCADA2,LTCXRP1,LTCXRP2,ADAXRP1,ADAXRP2
0,1.0,1.0,1.0,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.0,1.0,1.000000,1.000000,1.0,1.0,1.0,1.0,1.000000,1.000000
1,1.0,1.0,1.0,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.0,1.0,1.000000,1.000000,1.0,1.0,1.0,1.0,1.000000,1.000000
2,1.0,1.0,1.0,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.0,1.0,1.000000,1.000000,1.0,1.0,1.0,1.0,1.000000,1.000000
3,1.0,1.0,1.0,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.0,1.0,1.000000,1.000000,1.0,1.0,1.0,1.0,1.000000,1.000000
4,1.0,1.0,1.0,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.0,1.0,1.000000,1.000000,1.0,1.0,1.0,1.0,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1.0,1.0,1.0,1.0,1.0,1.0,1.001280,0.998936,0.999356,0.996861,1.0,1.0,0.999356,0.998936,1.0,1.0,1.0,1.0,1.003541,0.998936
96,1.0,1.0,1.0,1.0,1.0,1.0,0.997207,1.003248,0.993388,1.006297,1.0,1.0,0.993388,1.003248,1.0,1.0,1.0,1.0,0.994419,1.003248
97,1.0,1.0,1.0,1.0,1.0,1.0,0.992385,1.004802,0.987782,1.011910,1.0,1.0,0.987782,1.004802,1.0,1.0,1.0,1.0,0.990973,1.004802
98,1.0,1.0,1.0,1.0,1.0,1.0,1.012954,0.986754,1.023205,0.980451,1.0,1.0,1.023205,0.986754,1.0,1.0,1.0,1.0,1.020354,0.986754


In [27]:
signals.head(20)

Unnamed: 0,BTCETH,BTCLTC,BTCADA,BTCXRP,ETHLTC,ETHADA,ETHXRP,LTCADA,LTCXRP,ADAXRP
0,x,x,x,x,x,x,x,x,x,x
1,x,x,x,x,x,x,x,x,x,x
2,x,x,x,x,x,x,x,x,x,x
3,x,x,x,x,x,x,x,x,x,x
4,x,x,x,x,x,x,x,x,x,x
5,x,x,x,x,x,x,x,x,x,x
6,x,x,x,x,open,x,x,x,x,x
7,x,x,x,x,pending,x,x,x,x,x
8,x,x,x,x,pending,x,x,x,x,x
9,x,x,x,x,pending,x,x,x,x,x


In [28]:
results = returns_bs.cumprod() #get cumulative return indexes

In [29]:
results.tail(10)

Unnamed: 0,BTCETH1,BTCETH2,BTCLTC1,BTCLTC2,BTCADA1,BTCADA2,BTCXRP1,BTCXRP2,ETHLTC1,ETHLTC2,ETHADA1,ETHADA2,ETHXRP1,ETHXRP2,LTCADA1,LTCADA2,LTCXRP1,LTCXRP2,ADAXRP1,ADAXRP2
619,1.021293,0.952866,0.926986,1.105785,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.066582,0.888674,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
620,1.038583,0.93489,0.94268,1.09023,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.086703,0.870174,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
621,1.037978,0.94106,0.942131,1.095453,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.079532,0.878444,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
622,1.036702,0.938225,0.940973,1.098632,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.082784,0.877077,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
623,1.033005,0.948185,0.937618,1.107829,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.071289,0.883279,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
624,1.035726,0.942292,0.940087,1.105331,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.077947,0.872012,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
625,1.035275,0.944179,0.940087,1.105331,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.075788,0.874968,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
626,1.036781,0.941987,0.940087,1.105331,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.078286,0.872091,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
627,1.032524,0.95263,0.940087,1.105331,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.066103,0.882366,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299
628,1.033369,0.946404,0.940087,1.105331,0.91353,1.022245,0.810012,1.454991,0.7906,1.247889,1.073071,0.877227,0.920307,1.024163,0.916841,1.070615,0.94757,1.088184,0.994435,1.042299


In [30]:
final = sum(results.iloc[-1,:]-1) #sum values in last row in order to get total return

In [31]:
final

0.21917057037353516