In [1]:
import pandas as pd
import yfinance as yf
import pandas_ta as ta
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import coint
from sklearn.model_selection import train_test_split
from itertools import permutations
import numpy as np

In [4]:

url = 'https://coinmarketcap.com/all/views/all/'

html = pd.read_html(url)

df = html[2]

names = df['Name']

symbol = df['Symbol']

symbol = symbol.dropna()


symbol = [f'{s}-USD' for s in symbol]
#for s in symbol:
    
df = yf.download(symbol, start = '2022-02-24', end = "2022-03-24", interval="60m", progress=False)['Close']

#print(df.columns)

df.columns = [d[0:d.index('-')] for d in df.columns]

df = df.dropna()
df_train = df[int(len(df)*.7):]
df_test = df[:int(len(df)*.3)]

In [5]:


def find_cointegrated_pairs(p1, p2):

    result = coint(p1, p2)
    
    if result[1] < 0.05:
        return True
    return False
    
def stationarity_test(X, cutoff=0.05):
    # H_0 in adfuller is unit root exists (non-stationary)
    # We must observe significant p-value to convince ourselves that the series is stationary
    pvalue = adfuller(X)[1]
    if pvalue < cutoff:
        return True
    return False
        

perms = list(permutations([i for i in df.columns], 2))

pairs = []

for p in perms:
    
    if find_cointegrated_pairs(df_train[p[0]], df_train[p[1]]) and stationarity_test(df_train[p[0]]/df_train[p[1]]):
        pairs.append(p)
        

In [6]:

pairs


[('BNB', 'BTC'),
 ('BTC', 'BNB'),
 ('BTC', 'WBTC'),
 ('BUSD', 'DAI'),
 ('BUSD', 'LEO'),
 ('BUSD', 'USDC'),
 ('BUSD', 'USDT'),
 ('CRO', 'SHIB'),
 ('DAI', 'BUSD'),
 ('DAI', 'LEO'),
 ('DAI', 'USDC'),
 ('DAI', 'USDT'),
 ('LEO', 'USDT'),
 ('MATIC', 'SOL'),
 ('SHIB', 'CRO'),
 ('SOL', 'CRO'),
 ('SOL', 'DOGE'),
 ('SOL', 'MATIC'),
 ('USDC', 'BUSD'),
 ('USDC', 'DAI'),
 ('USDC', 'LEO'),
 ('USDC', 'USDT'),
 ('WBTC', 'BNB'),
 ('WBTC', 'BTC')]

In [10]:

def run_strategy_1(data, p, lookback, width):
    
    initial_capital = 1000
    
    hr_lookback = 63

    df = pd.DataFrame()
    
    df[p[0]] = data[p[0]]
    df[p[1]] = data[p[1]]
    
    
    df['hedge_ratio'] = df[p[0]].rolling(hr_lookback).corr(df[p[1]]) * df[p[0]].rolling(hr_lookback).std() / df[p[1]].rolling(hr_lookback).std()
    df['spread'] = df[p[0]] - df['hedge_ratio'] * df[p[1]]
    
    df['rolling_spread'] = df['spread'].rolling(lookback).mean() #lookback-day SMA of spread
    df['rolling_spread_std'] = df['spread'].rolling(lookback).std() #lookback-day rolling STD of spread
    df['upper_band'] = df['rolling_spread'] + (width * df['rolling_spread_std']) #upper = SMA + width * STD
    df['lower_band'] = df['rolling_spread'] - (width * df['rolling_spread_std']) #lower = SMA - width * STD
    
    df['Position 1'] = np.nan #create a column for position 1 so we can fill it
    #df['Position 2'] = np.nan #create a column for position 2 so we can fill it
    
    stop_loss = .05
    short_val_1 = None
    long_val_1 = None
    short_val_2 = None
    long_val_2 = None
    
    for num, date in enumerate(df.index): #note: this for-loop method is easier to follow, but using np.where() gives a more efficient solution. Look up the documentation!
        
        if df.loc[date, 'spread'] > df.loc[date, 'upper_band']:
            
            # -1 * (initial_capital//max(df[p[0]][:date]))
            df.loc[date, 'Position 1'] = -1 * (initial_capital//max(df[p[0]][:date]))
            
            #if df['Position 1'][num-1] == 0:
                #print('short_val')
                #short_val = df.iloc[num]
                #short_val_1 = df.iloc[num]
                #short_val_2 = df[p[1]].iloc[num]
            
        elif df.loc[date, 'spread'] < df.loc[date, 'lower_band']:
            df.loc[date, 'Position 1'] = 1 * (initial_capital//max(df[p[0]][:date]))
            
            #if df['Position 1'][num-1] == 0:
                #print('long_val')
                #long_val = df.iloc[num]
                #long_val_1 = df.iloc[num]
            
        elif (df.loc[date, 'spread'] >= df.loc[date, 'lower_band']) & (df.loc[date, 'spread'] <= df.loc[date, 'upper_band']):
            df.loc[date, 'Position 1'] = 0
            #short_val_1 = None
            #long_val_1 = None

        # STOP LOSS SHORT
        #elif (short_val_1) and df['Position 1'][num-1] < -1 and short_val_1 + short_val_1*stop_loss < df.loc[date, 'Close']:
            #print('short-stop')
            #df.loc[date, 'Position 1'] = 0
        
        # STOP LOSS LONG  
        #elif (long_val_1) and df['Position 1'][num-1] > 1 and long_val_1 - long_val_1*stop_loss > df.loc[date, 'Close']:
            #print('long-stop')
            #df.loc[date, 'Position 1'] = 0
        
        #else:
            
            #if (num > 0) and not short_val_1 and not long_val_1:
                #df.loc[date, 'Position 1'] = df['Position 1'][num-1]
            
            #if (num > 0) and not short_val_2 and not long_val_2:
                #df.loc[date, 'Position 2'] = df['Position 2'][num-1]  

   #this is the impact of the hedge ratio. Make sure you intuitively understand why this one-line solution works (the negative sign, the hedge ratio's role, the dependence on the other asset's position)
    df['Position 2'] = -df['hedge_ratio'] * df['Position 1']
    
    df['Position 1'].fillna(0, inplace=True)
    df['Position 2'].fillna(0, inplace=True)
    
    df['capital'] = initial_capital

    df['capital'] -= (df['Position 2'].diff() * data[p[1]]).cumsum()
    df['capital'] -= (df['Position 1'].diff() * data[p[0]]).cumsum()

    df['position_value'] = (df['Position 1'] * data[p[0]].shift(-1) + df['Position 2'] * data[p[1]].shift(-1))
    df['portfolio_value'] = df['capital'] + df['position_value']
    
    df['Returns'] = (df['portfolio_value'] - initial_capital) / initial_capital
    df['P&L'] = df['Returns'].diff().fillna(0)
    
    #(df['Returns'], p[2])
    return df


In [11]:

rets = {}

for num, pai in enumerate(pairs):
    rets[pai] = run_strategy_1(df_test, pairs[num], 8, 0.5)['Returns'][-2]


In [12]:


rets



{('BNB', 'BTC'): -0.0476861540954518,
 ('BTC', 'BNB'): 0.0,
 ('BTC', 'WBTC'): 0.0,
 ('BUSD', 'DAI'): 0.004421068671457306,
 ('BUSD', 'LEO'): 0.030112713999456447,
 ('BUSD', 'USDC'): -0.00102945886232294,
 ('BUSD', 'USDT'): 0.00809783826346711,
 ('CRO', 'SHIB'): 0.035278874370982975,
 ('DAI', 'BUSD'): -0.0024545416991277307,
 ('DAI', 'LEO'): 0.00928212630920268,
 ('DAI', 'USDC'): -0.0008202426356810975,
 ('DAI', 'USDT'): 0.0045380383536578394,
 ('LEO', 'USDT'): 0.03746819251377019,
 ('MATIC', 'SOL'): -0.01919035963297938,
 ('SHIB', 'CRO'): -0.03195937904255413,
 ('SOL', 'CRO'): -0.08576503912861085,
 ('SOL', 'DOGE'): -0.0977295243785452,
 ('SOL', 'MATIC'): -0.012275033178045078,
 ('USDC', 'BUSD'): 0.0014272509860887793,
 ('USDC', 'DAI'): 0.0029195210959962878,
 ('USDC', 'LEO'): 0.012227947543017877,
 ('USDC', 'USDT'): 0.007027239447679904,
 ('WBTC', 'BNB'): 0.0,
 ('WBTC', 'BTC'): 0.0}