# Three Factor Model

### Data Processing

In [1]:
import pandas as pd
from statsmodels.formula.api import ols as sm_ols
import numpy as np
import seaborn as sns
from statsmodels.iolib.summary2 import summary_col
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

In [2]:
market_return = pd.read_csv('input_data/cci30_OHLCV.csv')
market_return['ret'] = market_return['ret'] * 100
market_return = market_return[['Date', 'ret']]

risk_free_rate = pd.read_csv('input_data/risk_free_rate.csv')
risk_free_rate = risk_free_rate[['Date', '1 mo']]

crypto_return = pd.read_csv('input_data/market_crypto_return.csv')
crypto_return = crypto_return.drop(['sp500_rtn', 'DJ30_rtn'],axis=1)

crypto_volume = pd.read_csv('input_data/cryptos_volume.csv')

In [3]:
crypto_volume_date = crypto_volume['Date']
crypto_volume = crypto_volume.iloc[:,1:]

crypto_volume=(crypto_volume-crypto_volume.mean())/crypto_volume.std()

crypto_volume = pd.merge(crypto_volume_date, crypto_volume,
        left_index = True, right_index = True)

In [4]:
market_excess_rtn = pd.merge(market_return, risk_free_rate,
        how = 'inner',
         on = 'Date',
         validate = '1:1'
        )
market_excess_rtn['mkt_premi'] = market_excess_rtn['ret'] - market_excess_rtn['1 mo']
market_excess_rtn['mkt_premi'] = market_excess_rtn['mkt_premi']/100
market_excess_rtn = market_excess_rtn[['Date','mkt_premi']]

In [5]:
crypto_return['max_rtn']=crypto_return[crypto_return.columns[1:]].max(axis=1)
crypto_return['min_rtn']=crypto_return[crypto_return.columns[1:]].min(axis=1)
crypto_return['lmh'] = crypto_return['min_rtn']- crypto_return['max_rtn']

In [6]:
return_df = pd.merge(market_excess_rtn, crypto_return,
         how = "inner",
         on = "Date",
         validate = '1:1'
        )

In [7]:
three_factor_model = pd.merge(return_df, crypto_volume,
         how = "inner",
         on = "Date"
        )

### Regression

In [8]:
training = three_factor_model.iloc[:395,:]
testing = three_factor_model.iloc[396:,:]

In [9]:
training

Unnamed: 0,Date,mkt_premi,BTC_rtn,ETH_rtn,BNB_rtn,XRP_rtn,USDT_rtn,DOGE_rtn,ADA_rtn,BCH_rtn,...,BTC,ETH,BNB,XRP,USDT,DOGE,ADA,BCH,LTC,LINK
0,2019-03-01,,,,,,,,,,...,-1.135892,-1.086839,-0.411767,-0.574669,-1.074814,-0.213440,-0.467255,-1.106220,-1.148750,-0.167969
1,2019-03-04,-0.057928,-0.022255,-0.033840,0.000697,-0.023743,0.001383,-0.008504,-0.040414,-0.048905,...,-1.072226,-1.061317,-0.428722,-0.596357,-1.033575,-0.213973,-0.464777,-1.078129,-1.121298,-0.167962
2,2019-03-05,0.039832,0.035841,0.078641,0.196261,0.037276,-0.000458,0.001514,0.064283,0.059032,...,-1.018938,-0.984694,-0.340654,-0.567611,-0.987920,-0.213272,-0.463665,-1.082239,-0.815926,-0.167497
3,2019-03-06,-0.020879,0.001942,0.007016,0.051114,0.001656,-0.000565,-0.001008,-0.008539,0.005729,...,-1.065425,-1.002496,-0.368105,-0.575150,-1.016404,-0.213808,-0.466251,-1.089220,-0.816511,-0.167901
4,2019-03-07,-0.018570,0.001932,-0.005439,0.043253,-0.007144,-0.002086,0.004034,0.002357,-0.008343,...,-1.046396,-1.027796,-0.340956,-0.582175,-1.010652,-0.213650,-0.465357,-1.089196,-0.650279,-0.167372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2020-09-21,-0.069601,0.007283,0.007950,0.035794,0.005510,-0.001813,0.009886,0.016487,0.030530,...,-0.393054,-0.177871,-0.279201,-0.519453,-0.201616,-0.208802,-0.330115,-0.792014,-1.187779,-0.012157
391,2020-09-22,0.015813,-0.029662,-0.069426,-0.050955,-0.050382,0.001206,-0.034639,-0.060229,-0.053942,...,-0.422610,-0.247291,-0.299408,-0.521158,-0.247605,-0.210499,-0.340085,-0.805256,-1.086002,0.055148
392,2020-09-23,-0.046934,0.050821,0.089746,0.076482,0.051255,-0.000270,0.031201,0.078941,0.040971,...,-0.421177,-0.104154,-0.282258,-0.458095,-0.233640,-0.210244,-0.304630,-0.797509,-1.167125,0.187923
393,2020-09-24,0.064595,-0.004026,0.008030,0.002288,0.037710,0.000262,0.023449,0.169057,-0.000158,...,-0.504254,-0.226305,-0.294267,-0.442986,-0.287554,-0.208002,-0.176254,-0.818637,-1.155152,0.170286


In [10]:
testing

Unnamed: 0,Date,mkt_premi,BTC_rtn,ETH_rtn,BNB_rtn,XRP_rtn,USDT_rtn,DOGE_rtn,ADA_rtn,BCH_rtn,...,BTC,ETH,BNB,XRP,USDT,DOGE,ADA,BCH,LTC,LINK
396,2020-09-29,0.014066,-0.005642,0.000164,0.016572,-0.003405,0.001374,-0.014547,0.000099,-0.004951,...,-0.526265,-0.477496,-0.244253,-0.490000,-0.453349,-0.209922,-0.294290,-0.763453,-1.176111,0.032409
397,2020-09-30,0.000091,-0.015229,-0.018863,-0.063422,-0.016301,-0.002600,-0.010220,-0.032310,0.000250,...,-0.227529,-0.215531,-0.223563,-0.463350,-0.259847,-0.208931,-0.247579,-0.653283,-1.002578,0.043582
398,2020-10-01,-0.017446,-0.003593,-0.018964,-0.005452,-0.017448,0.001991,-0.011855,-0.050726,-0.034930,...,-0.416043,-0.156238,-0.212818,-0.430432,-0.242073,-0.206046,-0.201224,-0.605293,-0.895624,0.066130
399,2020-10-02,-0.022977,-0.001858,0.002277,0.034804,-0.002976,0.000607,0.008127,0.008298,-0.002026,...,-0.696871,-0.596383,-0.273914,-0.553829,-0.600168,-0.210860,-0.322375,-0.877106,-1.164442,-0.046864
400,2020-10-05,0.004963,-0.016877,-0.037417,-0.031110,-0.020072,0.000415,-0.006471,-0.048911,-0.008733,...,-0.456204,-0.302902,-0.273343,-0.185554,-0.246701,-0.209986,-0.119481,-0.612099,-0.856804,-0.003543
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
536,2021-04-26,0.117013,0.018721,0.050655,0.063401,0.023711,0.000020,0.005594,0.059082,0.045196,...,0.808966,1.801120,1.934776,2.080386,1.513576,0.723168,0.874082,0.316541,0.235260,0.092168
537,2021-04-27,0.036161,-0.003787,0.031363,-0.010270,-0.030232,-0.000010,0.189185,0.021990,0.024760,...,0.741589,2.002940,1.694962,1.188200,1.687342,3.574486,0.872269,0.419195,0.085971,0.071294
538,2021-04-28,0.014152,-0.023157,0.003822,0.065894,0.030001,-0.000086,-0.057195,-0.020703,-0.032072,...,0.652617,1.831717,3.447395,0.848959,1.317628,1.099912,1.330779,-0.099708,-0.188251,0.054998
539,2021-04-29,-0.011204,0.078332,0.005923,0.040643,0.137430,0.000043,0.106145,0.033971,0.128040,...,0.946158,1.548089,2.943490,2.199599,1.466091,1.168886,0.741225,0.398868,-0.049883,0.072565


In [11]:
reg1 = sm_ols('BTC_rtn ~  mkt_premi + BTC + lmh ', data = training).fit()
reg2 = sm_ols('ETH_rtn ~  mkt_premi + ETH + lmh', data = training).fit()
reg3 = sm_ols('BNB_rtn ~  mkt_premi + BNB + lmh', data = training).fit()
reg4 = sm_ols('XRP_rtn ~  mkt_premi + XRP + lmh', data = training).fit()
reg5 = sm_ols('USDT_rtn ~  mkt_premi + USDT + lmh', data = training).fit()
reg6 = sm_ols('DOGE_rtn ~  mkt_premi + DOGE + lmh', data = training).fit()
reg7 = sm_ols('ADA_rtn ~  mkt_premi + ADA + lmh', data = training).fit()
reg8 = sm_ols('BCH_rtn ~  mkt_premi + BCH + lmh', data = training).fit()
reg9 = sm_ols('LTC_rtn ~  mkt_premi + LTC + lmh', data = training).fit()
reg10 = sm_ols('LINK_rtn ~  mkt_premi + LINK + lmh', data = training).fit()

In [12]:
# now I'll format an output table
# I'd like to include extra info in the table (not just coefficients)
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}

# q4b1 and q4b2 name the dummies differently in the table, so this is a silly fix
#reg4.model.exog_names[1:] = reg5.model.exog_names[1:]  

# This summary col function combines a bunch of regressions into one nice table
print('='*100)
print('                  y = return of crypto')
print(summary_col(results=[reg1,reg2,reg3,reg4,reg5], # list the result obj here
                  float_format='%0.6f',
                  stars = True, # stars are easy way to see if anything is statistically significant
                  model_names=['BTC', 'ETH', 'BNB', 'XRP', 'USDT'],
                  info_dict=info_dict,
                  regressor_order=['Intercept', 'mkt_premi', 'lmh','BTC', 'ETH', 'BNB', 'XRP', 'USDT']
                  )
     )

                  y = return of crypto

                     BTC         ETH         BNB         XRP         USDT    
-----------------------------------------------------------------------------
Intercept        0.003458    0.005335    0.036983*** 0.004468    -0.001423** 
                 (0.003382)  (0.004155)  (0.010757)  (0.005807)  (0.000564)  
mkt_premi        0.393533*** 0.501961*** 0.426201*** 0.365891*** -0.045865***
                 (0.043801)  (0.051683)  (0.051819)  (0.046369)  (0.006597)  
lmh              -0.033688   -0.038121   0.014712    -0.043983   -0.010817***
                 (0.026932)  (0.032107)  (0.033136)  (0.029352)  (0.004058)  
BTC              -0.000562                                                   
                 (0.003685)                                                  
ETH                          0.000304                                        
                             (0.004328)                                      
BNB                     

In [13]:
# now I'll format an output table
# I'd like to include extra info in the table (not just coefficients)
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}

# q4b1 and q4b2 name the dummies differently in the table, so this is a silly fix
#reg4.model.exog_names[1:] = reg5.model.exog_names[1:]  

# This summary col function combines a bunch of regressions into one nice table
print('='*100)
print('                  y = return of crypto')
print(summary_col(results = [reg6,reg7,reg8,reg9,reg10], # list the result obj here
                  float_format='%0.6f',
                  stars = True, # stars are easy way to see if anything is statistically significant
                  model_names=['DOGE', 'ADA', 'BCH', 'LTC', 'LINK'],
                  info_dict=info_dict,
                  regressor_order=['Intercept', 'mkt_premi', 'lmh','DOGE', 'ADA', 'BCH', 'LTC', 'LINK']
                  )
     )

                  y = return of crypto

                     DOGE         ADA         BCH          LTC         LINK    
-------------------------------------------------------------------------------
Intercept        0.115516***  0.009144    -0.002778    0.002381    -0.018986***
                 (0.019824)   (0.016534)  (0.004447)   (0.004394)  (0.007209)  
mkt_premi        0.280015***  0.474342*** 0.591985***  0.482093*** 0.407971*** 
                 (0.051748)   (0.062349)  (0.061014)   (0.053080)  (0.079247)  
lmh              -0.158907*** -0.063747   -0.131852*** -0.071832** -0.403503***
                 (0.032258)   (0.039640)  (0.037960)   (0.034017)  (0.049493)  
DOGE             0.642887***                                                   
                 (0.098328)                                                    
ADA                           0.014030                                         
                              (0.037958)                                       


### Machine Learning

In [14]:
print("R2 score of BTC is ", r2_score(testing['BTC_rtn'], reg1.predict(testing[['mkt_premi', 'BTC', 'lmh']])))
print("R2 score of ETH is ", r2_score(testing['ETH_rtn'], reg2.predict(testing[['mkt_premi', 'ETH', 'lmh']])))
print("R2 score of BNB is ", r2_score(testing['BNB_rtn'], reg3.predict(testing[['mkt_premi', 'BNB', 'lmh']])))
print("R2 score of XRP is ", r2_score(testing['XRP_rtn'], reg4.predict(testing[['mkt_premi', 'XRP', 'lmh']])))
print("R2 score of USDT is ", r2_score(testing['USDT_rtn'], reg5.predict(testing[['mkt_premi', 'USDT', 'lmh']])))
print("R2 score of DOGE is ", r2_score(testing['DOGE_rtn'], reg6.predict(testing[['mkt_premi', 'DOGE', 'lmh']])))
print("R2 score of ADA is ", r2_score(testing['ADA_rtn'], reg7.predict(testing[['mkt_premi', 'ADA', 'lmh']])))
print("R2 score of BCH is ", r2_score(testing['BCH_rtn'], reg8.predict(testing[['mkt_premi', 'BCH', 'lmh']])))
print("R2 score of LTC is ", r2_score(testing['LTC_rtn'], reg9.predict(testing[['mkt_premi', 'LTC', 'lmh']])))
print("R2 score of LINK is ", r2_score(testing['LINK_rtn'], reg10.predict(testing[['mkt_premi', 'LINK', 'lmh']])))

R2 score of BTC is  0.4006199703784705
R2 score of ETH is  0.41750930227655114
R2 score of BNB is  -2.543347953281119
R2 score of XRP is  0.09849557070126214
R2 score of USDT is  -10.196437492805122
R2 score of DOGE is  -23.215169427941472
R2 score of ADA is  0.10877411725646291
R2 score of BCH is  0.10614256122909482
R2 score of LTC is  0.20740004540025914
R2 score of LINK is  -3.179304113335408
