In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import datetime 

In [2]:
START = TRAIN_START = datetime.datetime(2018,1,7)
END = datetime.datetime(2020,12,31)

In [3]:
CC100_Index = pd.read_csv("indexReturns-BITW100.csv")
CC100_Index['date'] = pd.to_datetime(CC100_Index['date']).dt.tz_localize(None)
CC100_Index = CC100_Index.sort_values('date').set_index('date')
CC100_Index = CC100_Index.truncate(before = START,after = END)
CC100_Index = CC100_Index.resample('W').last()[:-1]
CC100_Return = CC100_Index.pct_change().fillna(axis = 0, method ='bfill').rename(columns = {"MarketIndex":"MarketReturn"})

In [4]:
raw_data = pd.read_csv('Data.csv')
listOfCC = pd.read_csv("WorldOfCC.csv")
listOfCC['List'] = listOfCC['List'].str.replace('-USD','')
listOfCC['L'] = 'nothing'

In [5]:
error_list = ['VET', 'SNT', 'MONA', 'MCO', 'EDG']
listOfCC = listOfCC[~listOfCC['List'].isin(error_list)]
listOfCC = listOfCC['List']
raw_data = raw_data[~raw_data['Ticker'].isin(error_list)]
raw_data = raw_data[raw_data['Date'] != '2017-12-31']

In [6]:
listOfCC

0       BTC
1       ETH
2       XRP
3       ADA
4       LTC
      ...  
67      WTC
68      QRL
69      NXS
70      GAS
71    GBYTE
Name: List, Length: 68, dtype: object

In [7]:
tickerData = raw_data.set_index(['Ticker','Date'])
dateData = raw_data.set_index(['Date', 'Ticker'])

date_list = tickerData.loc['ADA'].index

tickerData.sort_index(inplace=True)
dateData.sort_index(inplace=True)

tickerData = tickerData.fillna(0)
dateData = dateData.fillna(0)

In [8]:
CC100_Return.set_index(date_list, drop=True, inplace=True)

In [9]:
Returns_data = CC100_Return.copy()

for ticker in listOfCC:
    try:
        Returns_data[ticker] = tickerData.loc[ticker][:]['Returns']
    except:
        print(ticker)


In [10]:
dateData['NVT'] = dateData['Market Cap'] / dateData['Volume']

In [11]:
factors = pd.DataFrame(columns=['SMB', 'HML'])

In [12]:
for date in date_list:
    tempData = (dateData.loc[date]).copy()
    
    tempData.sort_values(by=['Market Cap'], ascending=False, inplace=True)
    smallCap = tempData.iloc[-7:]['Returns'].mean()
    largeCap = tempData.iloc[0:7]['Returns'].mean()
    SMB_factor = smallCap - largeCap
    
    tempData.sort_values(by=['NVT'], ascending=False, inplace=True)
    lowValue = tempData.iloc[-21:]['Returns'].mean()
    highValue = tempData.iloc[0:21]['Returns'].mean()
    HML_factor = highValue - lowValue
    
    df = pd.DataFrame({'SMB': SMB_factor, 'HML' : HML_factor}, index = [date])
    factors = factors.append(df)

In [13]:
factors

Unnamed: 0,SMB,HML
2018-01-07,0.222946,-0.088459
2018-01-14,-0.038391,-0.139609
2018-01-21,0.027981,-0.000495
2018-01-28,0.053776,0.057487
2018-02-04,-0.103240,-0.017334
...,...,...
2020-11-29,-0.031264,-0.035727
2020-12-06,0.105984,-0.004967
2020-12-13,0.026056,0.007889
2020-12-20,0.029114,-0.029640


In [14]:
Returns_data = pd.concat([Returns_data, factors], axis=1)
Returns_data

Unnamed: 0_level_0,MarketReturn,BTC,ETH,XRP,ADA,LTC,BCH,LINK,BNB,XLM,...,SALT,PIVX,MTL,WTC,QRL,NXS,GAS,GBYTE,SMB,HML
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-07,-0.102655,0.128197,0.526801,0.060606,0.260674,0.144450,0.042065,0.930674,1.175538,0.326753,...,0.213386,0.109448,-0.269191,0.319457,0.121094,3.175549,0.543281,-0.061865,0.222946,-0.088459
2018-01-14,-0.102655,-0.093606,0.122160,-0.318367,-0.126867,-0.085687,-0.002716,-0.357135,0.081728,-0.045264,...,-0.290071,0.001686,-0.135854,0.710562,0.198606,-0.417417,0.803099,0.194521,-0.038391,-0.139609
2018-01-21,-0.156306,-0.237330,-0.247640,-0.221557,-0.293108,-0.253908,-0.352142,-0.125447,-0.365736,-0.220644,...,-0.313528,-0.263468,-0.134522,0.178829,-0.427326,-0.271907,-0.483794,-0.196698,0.027981,-0.000495
2018-01-28,-0.117886,0.015108,0.169174,0.015385,0.019128,-0.003557,0.005153,-0.147971,-0.035660,0.169488,...,0.009321,0.072000,-0.102996,0.286054,0.076142,0.026549,0.466709,0.032247,0.053776,0.057487
2018-02-04,-0.202010,-0.254726,-0.286193,-0.363461,-0.333191,-0.158133,-0.294769,-0.369881,-0.329662,-0.324919,...,-0.398417,-0.390192,-0.319415,-0.362602,-0.410377,-0.465517,-0.381267,-0.286873,-0.103240,-0.017334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-11-29,-0.001030,-0.011999,0.025405,0.363294,0.145443,-0.045136,-0.017509,-0.083276,0.004336,0.873533,...,-0.137197,0.237871,0.072265,0.037594,-0.013079,0.094521,0.152174,0.068232,-0.031264,-0.035727
2020-12-06,0.047348,0.063600,0.047360,0.022176,-0.046622,0.054474,0.003648,0.011261,-0.020591,-0.096875,...,0.168625,0.032339,0.051332,0.043308,0.108680,0.203948,0.081761,-0.041891,0.105984,-0.004967
2020-12-13,-0.013224,-0.009025,-0.019353,-0.174754,-0.030212,-0.016301,-0.038273,-0.040089,-0.014920,-0.003889,...,-0.086348,-0.073543,0.027937,-0.074515,-0.056937,-0.047783,-0.069767,-0.004762,0.026056,0.007889
2020-12-20,0.220847,0.228022,0.083345,0.096629,0.053285,0.400634,0.265419,0.010828,0.176936,0.011191,...,0.249293,-0.008615,0.033906,0.003147,0.241919,0.191103,0.106250,0.124837,0.029114,-0.029640


In [15]:
regression = pd.DataFrame(columns=['beta', 'rse'])
for ticker in listOfCC:
    French_model = smf.ols(formula = ticker+' ~ MarketReturn + SMB + HML', data = Returns_data)
    French_fit = French_model.fit()
    beta = French_fit.params['MarketReturn']
    rse = French_fit.rsquared_adj #Convert back to rse after testing
    df = pd.DataFrame({'beta': beta, 'rse': rse}, index = [ticker])
    regression = regression.append(df)


In [16]:
regression['rse'].max() 

0.8702478597811818

In [17]:
regression['rse'].min()

0.11608331828196383

In [18]:
print(regression['rse'])

BTC      0.870248
ETH      0.609836
XRP      0.432322
ADA      0.577027
LTC      0.686464
           ...   
WTC      0.287603
QRL      0.365265
NXS      0.116083
GAS      0.366245
GBYTE    0.410642
Name: rse, Length: 68, dtype: float64
