In [1]:
import pandas as pd
import numpy as np
from yahoo_fin.stock_info import get_data
import statsmodels.formula.api as smf
from pypfopt.efficient_frontier import EfficientFrontier
import datetime

### Fetch historical data of Jan 2018 - Dec 2020. Test date starts from Jan 2020. 
### Reason why to choose Jan 2018 as the START is all the 72 CCs ICO before 2018, which means ALL data after 2018 is accessible. 
### Drawbacks: Jan 2018 is the first peak in CC history which may result in low Sharpe Ratio and Return of this strategy (in further research, try to include data from 2017 and solve data missing of some CCs.

In [2]:
START = TRAIN_START = datetime.datetime(2018,1,1)
TEST_START = datetime.datetime(2020,1,5)
END = datetime.datetime(2020,12,31)

### Download cryptocurrency(CC) market 100 index from [Bitwise](https://www.bitwiseinvestments.com/indexes/Bitwise-100) which covers 93% of the CC market.

In [3]:
CC100_Index = pd.read_csv("indexReturns-BITW100.csv")
CC100_Index['date'] = pd.to_datetime(CC100_Index['date']).dt.tz_localize(None)
CC100_Index = CC100_Index.sort_values('date').set_index('date')
CC100_Index = CC100_Index.truncate(before = START,after = END)
# CC100_Index = CC100_Index.resample('W').last()[:-1]
CC100_Return = np.log(CC100_Index/ CC100_Index.shift(1)).fillna(axis = 0, method ='bfill').rename(columns = {"MarketIndex":"MarketReturn"})
CC100 = pd.concat([CC100_Index, CC100_Return],axis =1)

In [4]:
CC100

Unnamed: 0_level_0,MarketIndex,MarketReturn
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01 00:00:00,26323.73,0.002073
2018-01-02 00:00:00,26378.36,0.002073
2018-01-03 00:00:00,28729.58,0.085383
2018-01-04 00:00:00,31423.55,0.089630
2018-01-05 00:00:00,31643.76,0.006983
...,...,...
2020-12-26 21:00:00,24145.53,0.069255
2020-12-27 21:00:00,23984.01,-0.006712
2020-12-28 21:00:00,24895.42,0.037296
2020-12-29 21:00:00,24925.16,0.001194


In [5]:
TEST_TimeList = CC100.loc[TEST_START:].index

#### VET Deleted due to late ICO (in Aug 2018)

In [6]:
CC_string = "ADA,ADX,AE,ANT,ARDR,ARK,BAT,BCH,BCN,BNB,BNT,BTC,BTG,BTS,CVC,DASH,DCR,DGB,DGD,DNT,DOGE,EDG,EOS,ETC,ETH,FUN,GAS,GBYTE,GNO,HC,ICX,KIN,KMD,KNC,LINK,LRC,LSK,LTC,MAID,MCO,MIOTA,MLN,MONA,MTL,NANO,NEO,NXS,OMG,PIVX,PPT,QRL,QTUM,REP,RLC,SALT,SC,SNT,STEEM,STORJ,SYS,TRX,VERI,WAVES,WTC,XEM,XLM,XMR,XRP,XVG,ZEC,ZEN,ZRX"
CC_temp_list = CC_string.split(",")
CC_list = [i+'-USD' for i in CC_temp_list]

### Fetching the following data may take some time. Thus, "data_copy" is used to protect "data_all" from being changed.

In [7]:
data_all = pd.DataFrame()
for i in CC_list:
    data = get_data(i,start_date = START,end_date = END).resample('W').last()[:-1]
    data_all = pd.concat([data_all,data])
data_all.index.names = ['date']
data_all

Unnamed: 0_level_0,open,high,low,close,adjclose,volume,ticker
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-07,1.024150,1.043960,0.991391,1.008520,1.008520,226067008.0,ADA-USD
2018-01-14,0.881961,0.881961,0.749514,0.802262,0.802262,160712000.0,ADA-USD
2018-01-21,0.706391,0.706391,0.590793,0.613163,0.613163,847731008.0,ADA-USD
2018-01-28,0.620560,0.681946,0.620560,0.670286,0.670286,325145984.0,ADA-USD
2018-02-04,0.433865,0.480140,0.369823,0.379303,0.379303,814753984.0,ADA-USD
...,...,...,...,...,...,...,...
2020-11-29,0.415439,0.419485,0.405365,0.414702,0.414702,37263535.0,ZRX-USD
2020-12-06,0.407349,0.410595,0.394628,0.400748,0.400748,33594446.0,ZRX-USD
2020-12-13,0.392727,0.403993,0.386825,0.396126,0.396126,31834561.0,ZRX-USD
2020-12-20,0.424108,0.424108,0.390759,0.395337,0.395337,45781167.0,ZRX-USD


### Example:  "date" is the first date of "TEST_TimeList".  Find the top10 liquid CCs.

In [10]:
data_copy = data_all.copy()
date = TEST_TimeList[0]
top10_liquid = data_copy.loc[date].sort_values('volume',ascending= False).iloc[:10]

data_multiIndex = data_copy.copy().reset_index().set_index(['ticker','date'])

price_top10_liquid = pd.DataFrame()
for ticker in top10_liquid['ticker']:
    price_top10_liquid[ticker] = data_multiIndex.loc[ticker][:date]['adjclose']
returns_top10_liquid = np.log(price_top10_liquid/ price_top10_liquid.shift(1)).fillna(axis = 0, method ='bfill').rename(columns=lambda s: s.replace('-USD',''))

returns_top10withMarket =returns_top10_liquid.copy()
returns_top10withMarket['MarketReturn'] = CC100.loc[:date]['MarketReturn']

returns_top10withMarket

KeyError: Timestamp('2020-01-05 21:00:00')

### CAPM Regressoin

In [9]:
regression = pd.DataFrame(columns=['beta', 'rse'])
for ticker in top10_liquid['ticker']:
    ticker = ticker.split('-')[0]
    CAPM_model = smf.ols(formula = ticker+' ~ MarketReturn', data = returns_top10withMarket)
    CAPM_fit = CAPM_model.fit()
    beta = CAPM_fit.params['MarketReturn']
    rse = CAPM_fit.bse['MarketReturn']
    dict = {'beta': beta, 'rse': rse} 
    df = pd.DataFrame(dict, index = [ticker])
    regression = regression.append(df)
regression

NameError: name 'top10_liquid' is not defined

### Calculate the Expected retrun using mean Market Return
## Qusestion: should use the latest market return OR the mean to forecast the next week's return (Expected Return)?

In [None]:
data = returns_top10withMarket.mean()['MarketReturn']*regression['beta']
ExpectedReturns = pd.DataFrame(data)
ExpectedReturns.columns = ['returns']
ExpectedReturns

In [None]:
CovarianceMatrix = returns_top10_liquid.cov()
CovarianceMatrix

In [None]:
ExpectedReturns['returns']

## risk_free_rate must be less than average return

In [None]:
ef1 = EfficientFrontier(ExpectedReturns['returns'], CovarianceMatrix, weight_bounds=(0,1))
weights = ef1.max_sharpe(risk_free_rate= 0.0)
weights = ef1.clean_weights()

weights