In [183]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import datetime 
from statsmodels.iolib.summary2 import summary_col
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML
from prettytable import PrettyTable

# Train Data

In [184]:
START = datetime.datetime(2018,1,1)
END = datetime.datetime(2020,12,31)

Read data for market returns

In [185]:
CC100_Index = pd.read_csv("indexReturns-BITW100.csv")
CC100_Index['date'] = pd.to_datetime(CC100_Index['date']).dt.tz_localize(None)
CC100_Index = CC100_Index.sort_values('date').set_index('date')
CC100_Index = CC100_Index.truncate(before = START,after = END)
CC100_Index = CC100_Index.resample('W').last()
CC100_Return = CC100_Index.pct_change().fillna(axis = 0, method ='bfill').rename(columns = {"MarketIndex":"MarketReturn"})
CC100_Return

Unnamed: 0_level_0,MarketReturn
date,Unnamed: 1_level_1
2018-01-07,-0.102655
2018-01-14,-0.102655
2018-01-21,-0.156306
2018-01-28,-0.117886
2018-02-04,-0.202010
...,...
2020-12-06,0.047348
2020-12-13,-0.013224
2020-12-20,0.220847
2020-12-27,0.033810


Read data for CC used in this project.
The CC have data as from Jan 2018 to present day.
There is a total of 68 CC. 

In [186]:
listOfCC = pd.read_csv("Symbols.csv")
listOfCC['symbol']

0       zrx
1       adx
2        ae
3       ant
4      ardr
      ...  
63      xvg
64      wtc
65    waves
66      zec
67      zen
Name: symbol, Length: 68, dtype: object

Data is resampled for each CC: The daily data is transformed into weekly data. For the market cap and price, the last value of the day in a week is taken, for the market cap, the mean value for the week is taken and for the volume, the sum of all the volume of each day is taken.

In [187]:
ALL_DATA = pd.DataFrame()

for ticker in listOfCC['symbol']:
    
    filename = "CoinGecko_Data/" + ticker + '-usd-max.csv'
    temp = pd.read_csv(filename, parse_dates=['snapped_at'])
    temp = temp.rename(columns = {"snapped_at":"date"})
    temp['date'] = pd.to_datetime(temp['date']).dt.tz_localize(None)
    temp = temp.sort_values('date').set_index('date')
    temp = temp.truncate(before = START,after = END)
    
    temp1 = pd.to_numeric(temp['price'], downcast="float")
    temp2 = pd.to_numeric(temp['market_cap'], downcast="float")
    temp3 = pd.to_numeric(temp['total_volume'], downcast="float")
        
    temp1 = temp1.resample('W').last()
    temp2 = temp2.resample('W').mean()
    temp3 = temp3.resample('W').sum()
    temp4 = pd.DataFrame(index = [temp1.index])
    temp4 = pd.concat([temp1, temp2, temp3], axis=1)
    
    temp4 = temp4.fillna(axis = 0, method ='bfill')
    temp4['Returns'] = temp4['price'].pct_change()
    temp4['Ticker'] = ticker
    ALL_DATA = pd.concat([ALL_DATA,temp4])

In [188]:
ALL_DATA = ALL_DATA.fillna(0)
ALL_DATA

Unnamed: 0_level_0,price,market_cap,total_volume,Returns,Ticker
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-07,1.429257,5.478657e+08,200383264.0,0.000000,zrx
2018-01-14,2.090971,1.082455e+09,505867104.0,0.462978,zrx
2018-01-21,1.742747,8.778975e+08,215137936.0,-0.166537,zrx
2018-01-28,1.979192,9.018653e+08,225045584.0,0.135673,zrx
2018-02-04,1.376865,8.179149e+08,142141824.0,-0.304330,zrx
...,...,...,...,...,...
2020-12-06,12.489144,1.420835e+08,112744968.0,-0.147160,zen
2020-12-13,10.172314,1.121735e+08,65833872.0,-0.185508,zen
2020-12-20,12.075764,1.237890e+08,86296248.0,0.187121,zen
2020-12-27,11.915156,1.114705e+08,80495400.0,-0.013300,zen


In [189]:
ALL_DATA.reset_index(drop=False, inplace=True)
tickerData = ALL_DATA.set_index(['Ticker','date'])
dateData = ALL_DATA.set_index(['date', 'Ticker'])
tickerData.sort_index(inplace=True)
dateData.sort_index(inplace=True)

In [190]:
tickerData

Unnamed: 0_level_0,Unnamed: 1_level_0,price,market_cap,total_volume,Returns
Ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ada,2018-01-07,1.052854,2.605216e+10,2.750787e+09,0.000000
ada,2018-01-14,0.841890,2.257185e+10,1.857563e+09,-0.200374
ada,2018-01-21,0.629033,1.781683e+10,1.034086e+09,-0.252832
ada,2018-01-28,0.630576,1.587981e+10,5.482682e+08,0.002453
ada,2018-02-04,0.448043,1.274673e+10,6.180009e+08,-0.289471
...,...,...,...,...,...
zrx,2020-12-06,0.408543,3.085792e+08,3.243042e+08,-0.017129
zrx,2020-12-13,0.392484,2.005443e+08,2.680847e+08,-0.039307
zrx,2020-12-20,0.423716,2.881971e+08,3.250699e+08,0.079574
zrx,2020-12-27,0.353898,2.756317e+08,4.010995e+08,-0.164775


In [191]:
date_list = tickerData.loc['btc'].index
CC100_Return.set_index(date_list, drop=True, inplace=True)

Returns_data = CC100_Return.copy()

for ticker in listOfCC:
    try:
        Returns_data[ticker] = tickerData.loc[ticker][:]['Returns']
    except:
        print(ticker)

id
symbol


In [192]:
# Calculate NVT for each date
# NVT = Market Cap/ Volume
dateData['NVT'] = dateData['market_cap'] / dateData['total_volume']
dateData

Unnamed: 0_level_0,Unnamed: 1_level_0,price,market_cap,total_volume,Returns,NVT
date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-07,ada,1.052854,2.605216e+10,2.750787e+09,0.000000,9.470802
2018-01-07,adx,3.287920,1.825394e+08,1.972941e+08,0.000000,0.925215
2018-01-07,ae,2.737842,4.608177e+08,2.592159e+07,0.000000,17.777370
2018-01-07,ant,8.023694,1.965826e+08,2.687493e+07,0.000000,7.314721
2018-01-07,ardr,1.817910,1.751584e+09,1.332934e+08,0.000000,13.140822
...,...,...,...,...,...,...
2021-01-03,xrp,0.212070,1.097121e+10,3.017823e+10,-0.282052,0.363547
2021-01-03,xvg,0.007829,1.283332e+08,1.938374e+07,0.097947,6.620659
2021-01-03,zec,63.972191,7.189166e+08,2.031491e+09,-0.037638,0.353886
2021-01-03,zen,10.836555,1.251109e+08,5.605896e+07,-0.090523,2.231774


In [194]:
factors = pd.DataFrame(columns=['SMB', 'NVT'])

for date in date_list:
    tempData = (dateData.loc[date]).copy()
    
    m1 = tempData['market_cap'].quantile(0.1)
    m2 = tempData['market_cap'].quantile(0.9)
    
    n1 = tempData['NVT'].quantile(0.1)
    n2 = tempData['NVT'].quantile(0.9)
    
    SMB_factor = tempData.Returns.loc[(tempData['market_cap'] <= m1)].mean()
    - tempData.Returns.loc[(tempData['market_cap'] >= m2)].mean()
    
    NVT_factor = tempData.Returns.loc[(tempData['NVT'] >= n2)].mean()
    - tempData.Returns.loc[(tempData['NVT'] <= n1)].mean()
    
    df = pd.DataFrame({'SMB': SMB_factor, 'NVT' : NVT_factor}, index = [date])
    factors = factors.append(df)


In [196]:
factors = pd.concat([factors, CC100_Return], axis=1)

In [197]:
def calc_vif(X):
    from statsmodels.stats.outliers_influence import variance_inflation_factor

    # Calculating VIF
    vif = pd.DataFrame()
    vif["variables"] = X.columns
    vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

    return(vif)

In [198]:
x2 = PrettyTable(["","SMB", "NVT","Market"])
x2.padding_width = 1
y2 = round(calc_vif(factors)['VIF'], 4).tolist()
y2 = ["VIF"] + y2
x2.add_row(y2)

print(x2)

+-----+--------+--------+--------+
|     |  SMB   |  NVT   | Market |
+-----+--------+--------+--------+
| VIF | 3.1221 | 4.8746 | 3.1681 |
+-----+--------+--------+--------+


Calculate returns of each portfolio

In [199]:
columns_name = ['SMB1', 'SMB2', 'SMB3', 'SMB4', 'SMB5', 'NVT1', 'NVT2', 'NVT3', 'NVT4', 'NVT5']
portfolio_returns = pd.DataFrame(columns= columns_name)

In [200]:
for date in date_list:
    tempData = (dateData.loc[date]).copy()
    
    m1 = tempData['market_cap'].quantile(0.2)
    m2 = tempData['market_cap'].quantile(0.4)
    m3 = tempData['market_cap'].quantile(0.6)
    m4 = tempData['market_cap'].quantile(0.8)
    
    n1 = tempData['NVT'].quantile(0.2)
    n2 = tempData['NVT'].quantile(0.4)
    n3 = tempData['NVT'].quantile(0.6)
    n4 = tempData['NVT'].quantile(0.8)
    
    SMB1 = tempData.Returns.loc[(tempData['market_cap'] <= m1)].mean()
    SMB2 = tempData.Returns.loc[(tempData['market_cap'] <= m2) & (tempData['market_cap'] > m1)].mean()
    SMB3 = tempData.Returns.loc[(tempData['market_cap'] <= m3) & (tempData['market_cap'] > m2)].mean()
    SMB4 = tempData.Returns.loc[(tempData['market_cap'] <= m4) & (tempData['market_cap'] > m3)].mean()
    SMB5 = tempData.Returns.loc[(tempData['market_cap'] > m4)].mean()
    
    NVT1 = tempData.Returns.loc[(tempData['NVT'] <= n1)].mean()
    NVT2 = tempData.Returns.loc[(tempData['NVT'] <= n2) & (tempData['NVT'] > n1)].mean()
    NVT3 = tempData.Returns.loc[(tempData['NVT'] <= n3) & (tempData['NVT'] > n2)].mean()
    NVT4 = tempData.Returns.loc[(tempData['NVT'] <= n4) & (tempData['NVT'] > n3)].mean()
    NVT5 = tempData.Returns.loc[(tempData['NVT'] > n4)].mean()
    
    df = pd.DataFrame({'SMB1':SMB1, 'SMB2':SMB2, 'SMB3':SMB3, 'SMB4':SMB4, 'SMB5':SMB5,
                       'NVT1':NVT1, 'NVT2':NVT2, 'NVT3':NVT3,'NVT4':NVT4, 'NVT5':NVT5},
                       index = [date])
    portfolio_returns = portfolio_returns.append(df)

In [201]:
portfolio_returns

Unnamed: 0,SMB1,SMB2,SMB3,SMB4,SMB5,NVT1,NVT2,NVT3,NVT4,NVT5
2018-01-07,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2018-01-14,-0.047185,-0.003107,-0.075791,-0.112003,-0.044925,-0.003358,-0.054365,0.033228,-0.122742,-0.140202
2018-01-21,-0.249425,-0.232047,-0.248672,-0.270017,-0.223426,-0.217415,-0.279271,-0.236500,-0.259926,-0.233127
2018-01-28,0.032719,0.135872,0.106964,0.066650,0.014568,0.043296,0.015173,0.119768,0.029561,0.137704
2018-02-04,-0.306758,-0.271310,-0.200478,-0.267003,-0.275624,-0.210466,-0.286925,-0.282367,-0.265925,-0.276528
...,...,...,...,...,...,...,...,...,...,...
2020-12-06,0.091747,0.087508,0.058307,0.032436,0.051728,0.095126,0.046269,0.010428,0.063384,0.105783
2020-12-13,-0.052792,-0.102789,-0.071850,-0.078546,-0.054445,-0.071304,-0.099832,-0.072238,-0.054850,-0.060294
2020-12-20,0.161581,0.088575,0.149523,0.122617,0.170883,0.215877,0.109294,0.076045,0.148537,0.146757
2020-12-27,0.004933,-0.165198,-0.087038,-0.068230,-0.090599,-0.027529,-0.135722,-0.088807,-0.107666,-0.047119


In [202]:
portfolio_returns = pd.concat([portfolio_returns, factors], axis=1)

Mean returns of each portfolio

In [203]:
x2 = PrettyTable(["","Smallest", "2","3", "4","Largest"])
x2.padding_width = 1
y2 = round(portfolio_returns.loc[:,'SMB1': 'SMB5'].mean(), 4).tolist()
y2 = ["Mean"] + y2
x2.add_row(y2)

print(x2)

+------+----------+---------+--------+--------+---------+
|      | Smallest |    2    |   3    |   4    | Largest |
+------+----------+---------+--------+--------+---------+
| Mean | -0.0006  | -0.0029 | 0.0005 | 0.0005 |  0.0026 |
+------+----------+---------+--------+--------+---------+


In [204]:
x1 = PrettyTable(["","Lowest", "2","3", "4","Highest"])
x1.padding_width = 1
y1 = round(portfolio_returns.loc[:,'NVT1': 'NVT5'].mean(), 4).tolist()
y1 = ["Mean"] + y1
x1.add_row(y1)
print(x1)

+------+--------+--------+---------+-------+---------+
|      | Lowest |   2    |    3    |   4   | Highest |
+------+--------+--------+---------+-------+---------+
| Mean | 0.0207 | 0.0021 | -0.0002 | -0.01 |  -0.013 |
+------+--------+--------+---------+-------+---------+


## CAPM

In [205]:
trial_result = []
for ticker in columns_name:
    French_model = smf.ols(formula = ticker+' ~ MarketReturn', data = portfolio_returns)
    French_fit = French_model.fit()
    trial_result.append(French_fit)

In [206]:
stargazer = Stargazer(trial_result[0:5])
stargazer.show_model_numbers(False)
stargazer.custom_columns(["Smallest", "2","3", "4","Largest"], [1,1,1,1,1])
stargazer.rename_covariates({'MarketReturn': 'Market Return'})
HTML(stargazer.render_html())

0,1,2,3,4,5
,,,,,
,,,,,
,Smallest,2,3,4,Largest
,,,,,
Intercept,-0.004,-0.006,-0.003,-0.003,-0.001
,(0.006),(0.007),(0.006),(0.006),(0.004)
Market Return,0.942***,0.918***,0.961***,0.958***,1.027***
,(0.060),(0.063),(0.058),(0.056),(0.042)
Observations,157,157,157,157,157
R2,0.613,0.579,0.638,0.657,0.792


In [207]:
stargazer = Stargazer(trial_result[5:10])
stargazer.show_model_numbers(False)
stargazer.custom_columns(["Lowest", "2","3", "4","Highest"], [1,1,1,1,1])
stargazer.rename_covariates({'MarketReturn': 'Market Return'})
HTML(stargazer.render_html())

0,1,2,3,4,5
,,,,,
,,,,,
,Lowest,2,3,4,Highest
,,,,,
Intercept,0.017**,-0.001,-0.004,-0.013**,-0.016***
,(0.007),(0.006),(0.006),(0.005),(0.005)
Market Return,1.054***,0.972***,0.957***,0.904***,0.919***
,(0.064),(0.054),(0.055),(0.051),(0.049)
Observations,157,157,157,157,157
R2,0.633,0.676,0.658,0.669,0.691


## Fama-French 3 Factor

In [208]:
trial_result = []
for ticker in columns_name:
    French_model = smf.ols(formula = ticker+' ~ MarketReturn + SMB + NVT', data = portfolio_returns)
    French_fit = French_model.fit()
    trial_result.append(French_fit)

In [209]:
stargazer = Stargazer(trial_result[0:5])
stargazer.show_model_numbers(False)
stargazer.custom_columns(["Smallest", "2","3", "4","Largest"], [1,1,1,1,1])
stargazer.rename_covariates({'MarketReturn': 'Market Return'})
HTML(stargazer.render_html())

0,1,2,3,4,5
,,,,,
,,,,,
,Smallest,2,3,4,Largest
,,,,,
Intercept,0.001,0.003,0.005,0.004,0.005
,(0.003),(0.005),(0.005),(0.005),(0.004)
Market Return,0.106**,0.098,0.253***,0.353***,0.572***
,(0.044),(0.081),(0.080),(0.082),(0.063)
NVT,0.304***,0.733***,0.645***,0.548***,0.449***
,(0.051),(0.093),(0.091),(0.094),(0.072)


In [210]:
stargazer = Stargazer(trial_result[5:10])
stargazer.show_model_numbers(False)
stargazer.custom_columns(["Lowest", "2","3", "4","Highest"], [1,1,1,1,1])
stargazer.rename_covariates({'MarketReturn': 'Market Return'})
HTML(stargazer.render_html())

0,1,2,3,4,5
,,,,,
,,,,,
,Lowest,2,3,4,Highest
,,,,,
Intercept,0.021***,0.006,0.004,-0.006,-0.006***
,(0.006),(0.004),(0.004),(0.004),(0.002)
Market Return,0.535***,0.300***,0.255***,0.226***,0.071**
,(0.102),(0.072),(0.072),(0.063),(0.033)
NVT,0.282**,0.542***,0.553***,0.537***,0.750***
,(0.117),(0.082),(0.082),(0.072),(0.038)


In [234]:
out_of_sample_returns = 0

for i in range(0,156): 
    # long SMB1 and short SMB5
    
    
    tempData_currentWeek = (dateData.loc[date_list[i]]).copy()
    tempData_nextWeek = (dateData.loc[date_list[i+1]]).copy()
    
    n1 = tempData_currentWeek['market_cap'].quantile(0.2)
    n2 = tempData_currentWeek['market_cap'].quantile(0.8)
    
    SMB1 = (tempData_currentWeek.loc[(tempData_currentWeek['market_cap'] <= n1)]).index.to_list() 
    SMB5 = (tempData_currentWeek.loc[(tempData_currentWeek['market_cap'] >= n2)]).index.to_list() 
    
    out_of_sample_returns += tempData_nextWeek.Returns.loc[SMB1].mean() - tempData_nextWeek.Returns.loc[SMB5].mean()
    df = pd.DataFrame({'Large': tempData_nextWeek.Returns.loc[SMB5].mean(), 'Small' : tempData_nextWeek.Returns.loc[SMB1].mean()}, index = [i])
    X = X.append(df)
out_of_sample_returns /= 156 
print(out_of_sample_returns)


0.006499246216009898


In [239]:
out_of_sample_returns = 0
for i in range(0,156): 
    # long NVT1 and short NVT5
    
    
    tempData_currentWeek = (dateData.loc[date_list[i]]).copy()
    tempData_nextWeek = (dateData.loc[date_list[i+1]]).copy()
    n1 = tempData_currentWeek['NVT'].quantile(0.2)
    n2 = tempData_currentWeek['NVT'].quantile(0.8)
    
    NVT1 = (tempData_currentWeek.loc[(tempData_currentWeek['NVT'] <= n1)]).index.to_list() 
    NVT5 = (tempData_currentWeek.loc[(tempData_currentWeek['NVT'] >= n2)]).index.to_list() 
    
    out_of_sample_returns += tempData_nextWeek.Returns.loc[NVT5].mean() - tempData_nextWeek.Returns.loc[NVT1].mean()
out_of_sample_returns /= 156
print(out_of_sample_returns)

0.007350698960064433


In [237]:
out_of_sample_returns = 0
for i in range(0,156):     
    
    tempData_currentWeek = (dateData.loc[date_list[i]]).copy()
    tempData_nextWeek = (dateData.loc[date_list[i+1]]).copy()
    
    m1 = tempData_currentWeek['market_cap'].quantile(0.2)
    m2 = tempData_currentWeek['market_cap'].quantile(0.8)
    n1 = tempData_currentWeek['NVT'].quantile(0.2)
    n2 = tempData_currentWeek['NVT'].quantile(0.8)
    
    NVT1 = (tempData_currentWeek.Returns.loc[(tempData_currentWeek['NVT'] <= n1)]).index.to_list()
    NVT5 = (tempData_currentWeek.Returns.loc[(tempData_currentWeek['NVT'] >= n2)]).index.to_list()
    
    SMB1 = (tempData_currentWeek.Returns.loc[(tempData_currentWeek['market_cap'] <= m1)]).index.to_list()
    SMB5= (tempData_currentWeek.Returns.loc[(tempData_currentWeek['market_cap'] >= m2)]).index.to_list()
    
    investment_list1 = set(NVT5).intersection(SMB1)
    investment_list2 = set(NVT1).intersection(SMB5)
    
    out_of_sample_returns += tempData_nextWeek.Returns.loc[investment_list1].mean() - tempData_nextWeek.Returns.loc[investment_list2].mean()
    
out_of_sample_returns /= 156
print(out_of_sample_returns)

0.009574256367975464
