In [None]:
import matplotlib.pyplot as mpl
import scipy.cluster.hierarchy as sch
import random, numpy as np, pandas as pd
from scipy.optimize import minimize
import scipy.cluster
import math
import statistics

In [None]:
#Equally Weighted Portfolio
def ewPortfolio(cov,**kargs):
    n=len(cov)
    return n*[1/n]

In [None]:
#Risk Parity
def calculate_risk_contribution(w,V):
    # function that calculates asset contribution to total risk
    w = np.matrix(w)
    sigma = np.sqrt(calculate_portfolio_var(w,V))
    # Marginal Risk Contribution
    MRC = np.dot(V,w.T)
    # Risk Contribution
    RC = np.multiply(MRC,w.T)/sigma
    return RC

def risk_budget_objective(x,pars):
    # calculate portfolio risk
    V = pars[0]# covariance table
    x_t = pars[1] # risk target in percent of portfolio risk
    sig_p =  np.sqrt(calculate_portfolio_var(x,V)) # portfolio sigma
    risk_target = np.asmatrix(np.multiply(sig_p,x_t))
    asset_RC = calculate_risk_contribution(x,V)
    J = sum(np.square(asset_RC-risk_target.T)) # sum of squared error
    return J

def risk_parity(cov,**kargs):
    riskbudget=np.array([1/len(cov)]*len(cov))
    w0=np.array([1/len(cov)]*len(cov))
    x_t = riskbudget # your risk budget percent of total portfolio risk (equal risk)
    cons = ({'type': 'eq', 'fun': total_weight_constraint},
    {'type': 'ineq', 'fun': long_only_constraint})
    res= minimize(risk_budget_objective, w0, args=[cov,x_t],tol=0.000000000000001, method='SLSQP',constraints=cons) 
    return res.x

In [None]:
#Global minimum variance
def GMVPortfolio(cov,**kargs):
    return np.dot(np.linalg.inv(cov),
                  np.ones(len(cov)))/np.dot(np.dot(np.transpose(np.ones(len(cov))),
                                                  np.linalg.inv(cov)),np.ones(len(cov)))

In [None]:
#Global minimum variance - long only
def GMVLOPortfolio(cov,**kargs):
    x0=pd.Series([1/len(cov)]*len(cov))
    cons = ({'type': 'eq', 'fun': total_weight_constraint},
    {'type': 'ineq', 'fun': long_only_constraint})
    res = minimize(calculate_portfolio_var,x0,args=cov,tol=0.000000000000001,method='SLSQP',constraints=cons)
    return res.x

In [None]:
#Inverse variance portfolio (Risk parity fra De Prado)
def getIVP(cov,**kargs):
    #Compute the inverse-variance portfolio
    ivp=1/np.diag(cov)
    ivp/=ivp.sum()
    return ivp

In [None]:
#Maximum diversification
def calculate_portfolio_var(w,V):
    # function that calculates portfolio risk
    return (np.dot(np.dot(w,V),w.T))

def calc_diversification_ratio(w, V):
    # average weighted vol
    w_vol = np.dot(np.sqrt(np.diag(V)), w.T)
    # portfolio vol
    port_vol = np.sqrt(calculate_portfolio_var(w, V))
    diversification_ratio = w_vol/port_vol
    # return negative for minimization problem (maximize = minimize -)
    return -diversification_ratio

def total_weight_constraint(x):
    return np.sum(x)-1

def long_only_constraint(x):
    return x

def max_div_port(cov,**kargs):
    # w0: initial weight
    # V: covariance matrix
    # bnd: individual position limit
    # long only: long only constraint
    bnd=None
    long_only=True
    w0=np.array([1/len(cov)]*len(cov))
    cons = ({'type': 'eq', 'fun': total_weight_constraint},)
    if long_only: # add in long only constraint
        cons = cons + ({'type': 'ineq', 'fun':  long_only_constraint},)
    res = minimize(calc_diversification_ratio, w0, bounds=bnd, args=cov, method='SLSQP', constraints=cons)
    return res.x

In [None]:
#HRP2
#This version of HRP divides the weights between clusters

import collections
def flatten(x):
    if isinstance(x, collections.Iterable):
        return [a for i in x for a in flatten(i)]
    else:
        return [x]
    
def get_cluster_dict(link):
    link=np.append(link,np.array([[j] for j in range(int(link[-1,3]),int(link[-1,3])*2-1)]),axis=1)
    cluster_dict={}
    for i in link[:,0:5].astype(int):
        cluster_dict[i[4]]=[]
        if i[0]>=link[0,-1]:
            cluster_dict[i[4]].append(cluster_dict[i[0]])
        else:
            cluster_dict[i[4]].append(i[0])
        if i[1]>=link[0,-1]:
            cluster_dict[i[4]].append(cluster_dict[i[1]])
        else:
            cluster_dict[i[4]].append(i[1])
        
    return cluster_dict


def recClusterVar(cluster_dict,link, cov):
    link=np.append(link,np.array([[j] for j in range(int(link[-1,3]),int(link[-1,3])*2-1)]),axis=1)
    w=pd.Series(1,index=[i for i in range(int(link[0,-1]))]) #ændr i til i+1, hvis du skal køre main eller TestStabillity
    
    for i in reversed(link.astype(int)):
        if i[0]>=link[0,-1]:
            cluster1 = cluster_dict[i[0]]
        else:
            cluster1 = i[0]

        if i[1]>=link[0,-1]:
            cluster2 = cluster_dict[i[1]]
        else:
            cluster2 = i[1]
        
        cluster1=[i for i in flatten(cluster1)] #ændr i til i+1, hvis du skal køre main eller TestStabillity
        cluster2=[i for i in flatten(cluster2)] #ændr i til i+1, hvis du skal køre main eller TestStabillity
        c1_var=getClusterVar(cov,cluster1)
        c2_var=getClusterVar(cov,cluster2)
        alpha=1-c1_var/(c1_var+c2_var)
        w[cluster1]*=alpha # weight 1
        w[cluster2]*=1-alpha # weight 2
    return w

In [None]:
#HRP
def getClusterVar(cov,cItems):
    #Compute variance per cluster
    cov_=cov.loc[cItems,cItems] # matrix slice
    w_=getIVP(cov_).reshape(-1,1)
    cVar=np.dot(np.dot(w_.T,cov_),w_)[0,0]
    return cVar

def getQuasiDiag(link):
    # Sort clustered items by distance
    link=link.astype(int)
    sortIx=pd.Series([link[-1,0],link[-1,1]])
    numItems=link[-1,3] #number of original items
    while sortIx.max()>=numItems:
        sortIx.index=range(0,sortIx.shape[0]*2,2) #make space
        df0=sortIx[sortIx>=numItems] # find clusters
        i = df0.index;j=df0.values-numItems
        sortIx[i]=link[j,0] # item 1
        df0=pd.Series(link[j,1],index=i+1)
        sortIx=sortIx.append(df0) # item 2
        sortIx=sortIx.sort_index() #re-sort
        sortIx.index=range(sortIx.shape[0]) # re-index
    return sortIx.tolist()

def getRecBipart(cov,sortIx):
    # Compute HRP alloc
    w=pd.Series(1,index=sortIx)
    cItems=[sortIx] # initialize all items in one cluster
    while len(cItems)>0:
        cItems=[i[j:k] for i in cItems for j,k in ((0,len(i)//2),(len(i)//2,len(i))) if len(i)>1] # bi-section
        for i in range(0,len(cItems),2):
            cItems0=cItems[i] # cluster 1
            cItems1=cItems[i+1] # cluster 2
            cVar0=getClusterVar(cov,cItems0)
            cVar1=getClusterVar(cov,cItems1)
            alpha=1-cVar0/(cVar0+cVar1)
            w[cItems0]*=alpha # weight 1
            w[cItems1]*=1-alpha # weight 2
    w.sort_index(inplace=True)
    return w
    

def correlDist(corr):
    # A distance matrix based on correlation, where 0<=d[i,j]<=1
    #This is a proper diastance metric
    dist=((1-corr)/2.)**.5 # distance matrix
    return dist


def plotCorrMatrix(path,corr,labels=None):
    #Heatmap of the correlation matrix
    if labels is None: labels=[]
    mpl.pcolor(corr)
    mpl.colorbar()
    mpl.yticks(np.arange(.5,corr.shape[0]+.5),labels)
    mpl.xticks(np.arange(.5,corr.shape[0]+.5),labels)
    mpl.savefig(path)
    mpl.clf();mpl.close()  #reset pylab
    return
    
def generateData(nObs,size0,size1,sigma1,x=np.empty((0,1))):
    #Time series of correlated variables
    #1)generating some uncorrelated data
    np.random.seed(seed=12345);random.seed(12345)
    if len(x)==0:
        x=np.random.normal(0,1,size=(nObs,size0)) # each row is a variable
    #2) creating correlation between the variables
    cols=[random.randint(0,size0-1) for i in range(size1)]
    q=np.random.normal(0,sigma1,size=(nObs,len(cols)))
    y=x[:,cols]+q
    x=np.append(x,y,axis=1)
    x=pd.DataFrame(x,columns=range(1,x.shape[1]+1))
    return x,cols

def generateAutocorrelatedData(nObs,correlation,size):
    x=np.random.normal(0,1,size=(1,size))
    for i in range(nObs-1):
        x=np.append(x,correlation*x[i]+np.random.normal(0,1,size=(1,size)),axis=0)
    return x

def generateCauchyDistData(nObs,size):
    x=np.random.standard_cauchy(size=(nObs,size))
    return pd.DataFrame(x)

def generateT_DistData(nObs,size,df):
    x=np.random.standard_t(df,size=(nObs,size))
    return pd.DataFrame(x)
    
def findCorrelatedCols(colnbs,size0):
    keys = list(set([i[0] for i in colnbs]))
    for i in range(1,size0+1):
        if i not in keys:
            keys.append(i)     
    keys.sort()
    clusters={key: [key] for key in keys}
    for i in colnbs:
        clusters[i[0]].append(i[1])
    return clusters

def clusterWeights(clusters, hrp):
    weights={key:None for key in clusters.keys()}
    for i in weights:
        weights[i] = sum([hrp.loc[j] for j in clusters[i]])
    return list(weights.values())
    
    
def main():
    #1) Generate correlated data
    nObs, size0,size1,sigma1,correlation = 1000,10,10,0.75,-1
    #x = generateAutocorrelatedData(nObs,correlation,size0)
    x, cols=generateData(nObs,size0,size1,sigma1)
    print(findCorrelatedCols([(j+1,size0+i) for i,j in enumerate(cols,1)],size0))
    cov,corr=x.cov(),x.corr()
    # 2) compute and plot correl matrix
    #corr=pd.DataFrame(np.array([[1,0.7,0.2],[0.7,1,-0.2],[0.2,-0.2,1]]))
    plotCorrMatrix('HRP3_corr0.png',corr,labels=corr.columns)
    # 3) cluster
    dist=correlDist(corr)
    link=sch.linkage(dist,'single')
    sortIx=getQuasiDiag(link)
    sortIx=corr.index[sortIx].tolist() #recover labels
    df0=corr.loc[sortIx,sortIx] #re-order
    plotCorrMatrix('HRP3_corr1.png',df0,labels=df0.columns)
    #4) Capital allocation
    hrp=getRecBipart(cov,sortIx)
    #5) Capital allocation HRP2
    cluster_dict = get_cluster_dict(link)
    hrp2 = recClusterVar(cluster_dict,link, cov)
    plot_weights(hrp, hrp2, cov, x)
    scipy.cluster.hierarchy.dendrogram(link, labels=[i+1 for i in range(size0+size1)])
    
    return hrp


def testStability():
    nObs, size0,size1,sigma1,recalc_time, samplesize,correlation = 528,5,5,0.5,22,264,0
    #x = generateAutocorrelatedData(nObs,correlation,size0)
    x,cols=generateData(nObs,size0,size1,sigma1)
    clusters=findCorrelatedCols([(j+1,size0+i) for i,j in enumerate(cols,1)],size0)
    clusterweights=[]
    weights=[]
    weights2=[]
    print(clusters)
    for i in range(int((nObs-samplesize)/recalc_time)+1):
        x_sample = x.iloc[i*recalc_time:samplesize+recalc_time*i]
        cov,corr=x_sample.cov(),x_sample.corr()
        dist=correlDist(corr)
        link=sch.linkage(dist,'single')
        sortIx=getQuasiDiag(link)
        sortIx=corr.index[sortIx].tolist() #recover labels
        df0=corr.loc[sortIx,sortIx] #re-order
        hrp=getRecBipart(cov,sortIx)
        cluster_dict = get_cluster_dict(link)
        hrp2 = recClusterVar(cluster_dict,link, cov)
        #plotCorrMatrix('HRP3_corr{}.png'.format(i),df0,labels=df0.columns)
        clusterweights.append(clusterWeights(clusters,hrp))
        weights.append(list(hrp.sort_index().values))
        weights2.append(hrp2.values)
    return pd.DataFrame(weights), pd.DataFrame(weights2)
    return pd.DataFrame(clusterweights)

In [None]:
def risk_contribution(weights,cov):
    portvar = np.dot(np.dot(weights,cov),weights.T)
    rc=[(weights[i]*np.dot(cov,weights)[i])/portvar for i in range(len(weights))]
    return rc

In [None]:
#Barplots of allocations
def plot_weights(hrp, hrp2, cov, data):
    index = list(data.columns)
    
    #HRP
    hrp = hrp.sort_values(ascending=False)
    hrp.plot.bar(figsize = (15,7))
    mpl.title("HRP")
    mpl.ylabel("Weight")
    mpl.xlabel("Asset")
    mpl.ylim((0, 0.5))
    mpl.show()
    
    #HRP2
    hrp2 = hrp2.sort_values(ascending=False)
    hrp2.plot.bar(figsize = (15,7))
    mpl.title("HRP2")
    mpl.ylabel("Weight")
    mpl.xlabel("Asset")
    mpl.ylim((0, 0.5))
    mpl.show()

    #Naive Risk-Parity
    ivp = getIVP(cov)
    ivp = pd.Series(ivp, index=index)
    ivp = ivp.sort_values(ascending=False)
    ivp.plot.bar(figsize = (15,7))
    mpl.title("Naive Risk parity (IVP)")
    mpl.ylabel("Weight")
    mpl.xlabel("Asset")
    mpl.ylim((0,0.5))
    mpl.show()
    
    #Risk Parity
    rp = risk_parity(cov)
    rp = pd.Series(rp, index=index)
    rp = rp.sort_values(ascending=False)
    rp.plot.bar(figsize = (15,7))
    mpl.title("Risk Parity")
    mpl.ylabel("Weight")
    mpl.xlabel("Asset")
    mpl.ylim((0, 0.5))
    mpl.show()
    
    #GMV
    gmv = GMVPortfolio(cov)
    gmv = pd.Series(gmv, index=index)
    gmv = gmv.sort_values(ascending=False)
    gmv.plot.bar(figsize = (15,7))
    mpl.title("GMV")
    mpl.ylabel("Weight")
    mpl.xlabel("Asset")
    mpl.ylim((-0.2, 0.5))
    mpl.show()
    
    #GMV Long-only
    gmvlo = GMVLOPortfolio(cov)
    gmvlo = pd.Series(gmvlo, index=index)
    gmvlo = gmvlo.sort_values(ascending = False)
    gmvlo.plot.bar(figsize=(15,7))
    mpl.title("GMV Long-only")
    mpl.ylabel("weight")
    mpl.xlabel("Asset")
    mpl.ylim((0,0.5))
    mpl.show()
    
    
    #Maximum_Div_port
    mdv = pd.Series(max_div_port(cov), index=index)
    mdv = mdv.sort_values(ascending=False)
    mdv.plot.bar(figsize = (15,7))
    mpl.title("Maximum Diversification Portfolio")
    mpl.ylabel("Weight")
    mpl.xlabel("Asset")
    mpl.ylim((-0.1, 0.5))
    mpl.show()

In [None]:
main()

In [None]:
#data fra excel fil arp_strategies
for i in range(5):
    arp_data = pd.read_excel("arp_strategies.xlsx", sheet_name="S_{}".format(i+1))
    arp_data = arp_data.iloc[2611:] #nogle strategier starter først fra 2005, så de første 2611 rækker fjernes
    arp_data = arp_data.drop(['Date'], axis=1)
    if i == 0:
        arp_data_samlet = arp_data
    else:
        arp_data_samlet = pd.merge(arp_data_samlet,arp_data,right_index = True, left_index = True)

arp_data = arp_data_samlet    
del arp_data_samlet
arp_data.head()

In [None]:
#data fra excel fil Betting against beta
#den seneste faktor data starter d. 7/3/1989, som er forskellige rækker i nogle af datasættet
dates = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx','MKT',skiprows=17020,usecols='A', header = 0)
dates.columns=['Date']
dates=pd.Series(dates['Date'])
dd= pd.to_datetime(dates)

risk_free_rates = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx','RF',skiprows=17588, usecols='A')

sheet_names=['MKT','SMB','HML FF','HML Devil']
for i in sheet_names:
    faktor_data = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx',i, skiprows=17021, usecols='AC')
    if i == 'MKT':
        data_samlet = faktor_data
    else:
        data_samlet = pd.merge(data_samlet,faktor_data,right_index = True, left_index = True)

#Henter UMD
umd_data = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx','UMD', skiprows=16871, usecols='AC')
data_samlet = pd.merge(data_samlet,umd_data,right_index = True, left_index = True)

#Henter BAB
bab_data = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx','BAB Factors', skiprows=15712, usecols='AC')
data_samlet = pd.merge(data_samlet,bab_data,right_index = True, left_index = True)

#Henter QMJ
qmj_data = pd.read_excel('Quality Minus Junk Factors Daily.xlsx','QMJ Factors', skiprows=8112, usecols='AC')
data_samlet = pd.merge(data_samlet,qmj_data,right_index = True, left_index = True)

sheet_names=['MKT','SMB','HML FF','HML Devil', 'UMD', 'BAB Factors','QMJ Factors']
faktor_data = data_samlet        
faktor_data.columns = sheet_names
faktor_data.head()

In [None]:
dates = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx','MKT',skiprows=18434,usecols='A', header = 0)
dates.columns=['Date']
dates=pd.Series(dates['Date'])
dd= pd.to_datetime(dates[520:]).reset_index(drop=True)

risk_free_rates = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx','RF',skiprows=19001, usecols='B')
risk_free_rates = (risk_free_rates[520:-18])
#Henter alle lande i MKT
mkt_data = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx','MKT', skiprows=18434, usecols='B:Y')
mkt_global = pd.read_excel('Betting Against Beta Equity Factors Daily.xlsx','MKT', skiprows=18434, usecols='Z', header=0) #Global som benchmark?
mkt_data.columns = ['AUS', 'AUT', 'BEL', 'CAN', 'CHE', 'DEU', 'DNK', 'ESP', 'FIN', 'FRA', 'GBR', 'GRC', 'HKG', 'IRL', 'ISR', 'ITA', 'JPN', 'NLD', 'NOR', 'NZL', 'PRT', 'SGP', 'SWE', 'USA']

In [None]:
arp_strategies.xlsx_strategies.xlsx_data = mkt_data
arp_data.head()

In [None]:
arp_data = mkt_data
arp_data.head()

In [None]:
#Henter data.csv
data2 = pd.read_csv('data.csv')
data2.columns
data2.drop(['gvkey', 'iid', 'datadate','cusip','conm','cheqv','divd','divsp','ajexdi','cshoc','cshtrd','prccd','prchd','prcod','trfd','ggroup','gind','gsector','gsubind','naics','sic'], axis=1, inplace=True)
data2.head()

In [None]:
def main_2():
    #1) data
    x = arp_data
    cov,corr=x.cov(),x.corr()
    # 2) compute and plot correl matrix
    #corr=pd.DataFrame(np.array([[1,0.7,0.2],[0.7,1,-0.2],[0.2,-0.2,1]]))
    plotCorrMatrix('HRP3_corr0.png',corr,labels=corr.columns)
    # 3) cluster
    dist=correlDist(corr)
    link=sch.linkage(dist,'median')
    sortIx=getQuasiDiag(link)
    sortIx=corr.index[sortIx].tolist() #recover labels
    df0=corr.loc[sortIx,sortIx] #re-order
    plotCorrMatrix('HRP3_corr1.png',df0,labels=df0.columns)
    #4) Capital allocation
    hrp=getRecBipart(cov,sortIx)
    
    cluster_dict = get_cluster_dict(link)
    hrp2 = recClusterVar(cluster_dict,link, cov)
    
    plot_weights(hrp, hrp2, cov, x)
    mpl.figure(figsize=[20,10])
    scipy.cluster.hierarchy.dendrogram(link)#, labels=sortIx)
    
    
    return hrp

In [None]:
main_2()


In [None]:
def main_3():
    #Formålet er at se hvordan vægtene i porteføljen udvikler sig over tid.
    re_calc_time,sample_size = 22,520
    x = pd.DataFrame(arp_data)
    #Nye
    HRP_portfolio_return=[]
    RP_portfolio_return=[]
    RiskP_portfolio_return=[]
    GMV_portfolio_return=[]
    GMVLO_portfolio_return=[]
    MD_portfolio_return=[]
    EW_portfolio_return=[]
    HRP2_portfolio_return=[]
    
    hrp_weights=[]
    hrp2_weights=[]
    gmv_weights=[]
    mdv_weights=[]
    ivp_weights=[]
    rp_weights=[]
    gmvlo_weights=[]
    
    realisedRC_RP = []
    realisedRC_HRP = []
    realisedRC_HRP2 = []
    realisedRC_GMV = []
    realisedRC_MD = []
    realisedRC_IVP = []
    realisedRC_GMVLO = []
    realisedRC_EW = []
    
    realisedDR_RP = []
    realisedDR_HRP = []
    realisedDR_HRP2 = []
    realisedDR_GMV = []
    realisedDR_MD = []
    realisedDR_IVP = []
    realisedDR_GMVLO = []
    realisedDR_EW = []
    
    EW_weight=[1/len(arp_data.columns)]*len(arp_data.columns)
    returns=[]
    pointers = range(520,len(x)-22,re_calc_time)
    for pointer in pointers:
        print("pointer= {}".format(pointer))
        #Henter data
        x_sample = x.iloc[pointer-sample_size:pointer] 
        cov,corr=x_sample.cov().reset_index(drop=True) ,x_sample.corr().reset_index(drop=True)
        cov.columns, corr.columns = [i for i in range(len(cov))], [i for i in range(len(cov))]
        #HRP
        dist=correlDist(corr)
        link=sch.linkage(dist,'complete')
        sortIx=getQuasiDiag(link)
        sortIx=corr.index[sortIx].tolist() #recover labels
        #df0=corr.loc[sortIx,sortIx]
        #plotCorrMatrix('HRP3_corr{}.png'.format(i),df0,labels=df0.columns)
        hrp=getRecBipart(cov,sortIx).sort_index()
        #HRP2
        cluster_dict = get_cluster_dict(link)
        hrp2 = recClusterVar(cluster_dict,link, cov).sort_index()
        #IVP
        ivp = getIVP(cov)
        #GMV
        gmv = GMVPortfolio(cov)
        #GMV Long-only
        gmvlo = GMVLOPortfolio(cov)
        #MD
        mdv = pd.Series(max_div_port(cov))
        #Risk Parity
        rp = pd.Series(risk_parity(cov))
        hrp_weights.append(hrp)
        hrp2_weights.append(hrp2)
        gmv_weights.append(gmv)
        gmvlo_weights.append(gmvlo)
        mdv_weights.append(mdv)
        ivp_weights.append(ivp)
        rp_weights.append(rp)
        
        #realised risk contribution
        cov_out = x.iloc[pointer:pointer+re_calc_time].cov()
        realisedRC_RP.append(calculate_risk_contribution(rp, cov_out)/np.sqrt(calculate_portfolio_var(rp,cov_out)))
        realisedRC_HRP.append(calculate_risk_contribution(hrp, cov_out)/np.sqrt(calculate_portfolio_var(hrp,cov_out)))
        realisedRC_HRP2.append(calculate_risk_contribution(hrp2, cov_out)/np.sqrt(calculate_portfolio_var(hrp2,cov_out)))
        realisedRC_GMV.append(calculate_risk_contribution(gmv, cov_out)/np.sqrt(calculate_portfolio_var(gmv,cov_out)))
        realisedRC_MD.append(calculate_risk_contribution(mdv, cov_out)/np.sqrt(calculate_portfolio_var(mdv,cov_out)))
        realisedRC_IVP.append(calculate_risk_contribution(ivp, cov_out)/np.sqrt(calculate_portfolio_var(ivp,cov_out)))
        realisedRC_GMVLO.append(calculate_risk_contribution(gmvlo, cov_out)/np.sqrt(calculate_portfolio_var(gmvlo,cov_out)))
        realisedRC_EW.append(calculate_risk_contribution(EW_weight, cov_out)/np.sqrt(calculate_portfolio_var(pd.Series(EW_weight),cov_out)))
        
        
        #realised diversification ratio
        realisedDR_RP.append(calc_diversification_ratio(rp, cov_out))
        realisedDR_HRP.append(calc_diversification_ratio(hrp, cov_out))
        realisedDR_HRP2.append(calc_diversification_ratio(hrp2, cov_out))
        realisedDR_GMV.append(calc_diversification_ratio(gmv, cov_out))
        realisedDR_MD.append(calc_diversification_ratio(mdv, cov_out))
        realisedDR_IVP.append(calc_diversification_ratio(ivp, cov_out))
        realisedDR_GMVLO.append(calc_diversification_ratio(gmvlo, cov_out))
        realisedDR_EW.append(calc_diversification_ratio(pd.Series(EW_weight), cov_out))
        
        for j in range(re_calc_time):
            HRP_portfolio_return.append(np.dot(hrp,arp_data.iloc[pointer+j]))
            RP_portfolio_return.append(np.dot(ivp,arp_data.iloc[pointer+j]))
            GMV_portfolio_return.append(np.dot(gmv,arp_data.iloc[pointer+j]))
            GMVLO_portfolio_return.append(np.dot(gmvlo,arp_data.iloc[pointer+j]))
            MD_portfolio_return.append(np.dot(mdv,arp_data.iloc[pointer+j]))
            EW_portfolio_return.append(np.dot(EW_weight,arp_data.iloc[pointer+j]))
            HRP2_portfolio_return.append(np.dot(hrp2,arp_data.iloc[pointer+j]))
            RiskP_portfolio_return.append(np.dot(rp,arp_data.iloc[pointer+j]))
    return pd.DataFrame(HRP_portfolio_return), pd.DataFrame(RP_portfolio_return), pd.DataFrame(GMV_portfolio_return), pd.DataFrame(GMVLO_portfolio_return), pd.DataFrame(MD_portfolio_return),pd.DataFrame(EW_portfolio_return), pd.DataFrame(HRP2_portfolio_return), pd.DataFrame(RiskP_portfolio_return), pd.DataFrame(returns),pd.DataFrame(hrp_weights),pd.DataFrame(hrp2_weights),pd.DataFrame(gmv_weights),pd.DataFrame(mdv_weights),pd.DataFrame(ivp_weights), pd.DataFrame(rp_weights), pd.DataFrame(gmvlo_weights), realisedRC_RP, realisedRC_HRP, realisedRC_HRP2, realisedRC_GMV, realisedRC_GMVLO, realisedRC_IVP, realisedRC_MD, realisedRC_EW, realisedDR_GMV,realisedDR_GMVLO,realisedDR_HRP,realisedDR_HRP2,realisedDR_IVP,realisedDR_MD,realisedDR_RP,realisedDR_EW



In [None]:
def value_at_risk(returns, confidence_level=.05):
    return returns.quantile(confidence_level, interpolation='higher')


def expected_shortfall(returns, confidence_level=.05):
    var = value_at_risk(returns, confidence_level)

    return returns[returns.lt(var)].mean()


In [None]:
HRP_weights,RP_weights,GMV_weights,GMVLO_weights,MD_weights,EW_weights,HRP2_weights, RiskP_weights, returns, hrp_weights, hrp2_weights, gmv_weights, mdv_weights, ivp_weights, rp_weights, gmvlo_weights, realisedRC_RP, realisedRC_HRP, realisedRC_HRP2, realisedRC_GMV, realisedRC_GMVLO, realisedRC_IVP, realisedRC_MD, realisedRC_EW, realisedDR_GMV, realisedDR_GMVLO, realisedDR_HRP, realisedDR_HRP2, realisedDR_IVP, realisedDR_MD, realisedDR_RP, realisedDR_EW = main_3()

In [None]:
#HRP_weighted_returns = (HRP_weights*returns)
HRP_portfolio_returns = HRP_weights
HRP_cumulative_returns = (HRP_portfolio_returns + 1).cumprod()
HRP_cumulative_returns['dates']=dd
HRP_cumulative_returns.set_index('dates',inplace=True,drop=True)


#HRP2
HRP2_portfolio_returns = HRP2_weights
HRP2_cumulative_returns = (HRP2_portfolio_returns +1).cumprod()
HRP2_cumulative_returns['dates']=dd
HRP2_cumulative_returns.set_index('dates',inplace=True,drop=True)

#RP_weighted_returns = (RP_weights*returns)
RP_portfolio_returns = RP_weights
RP_cumulative_returns = (RP_portfolio_returns + 1).cumprod()
RP_cumulative_returns['dates']=dd
RP_cumulative_returns.set_index('dates',inplace=True,drop=True)

#RiskP
RiskP_portfolio_returns = RiskP_weights
RiskP_cumulative_returns = (RiskP_portfolio_returns + 1).cumprod()
RiskP_cumulative_returns['dates']=dd
RiskP_cumulative_returns.set_index('dates',inplace=True,drop=True)

#GMV_weighted_returns = (GMV_weights*returns)
GMV_portfolio_returns = GMV_weights
GMV_cumulative_returns = (GMV_portfolio_returns + 1).cumprod()
GMV_cumulative_returns['dates']=dd
GMV_cumulative_returns.set_index('dates',inplace=True,drop=True)

#GMVLO_weighted_returns = (GMV_weights*returns)
GMVLO_portfolio_returns = GMVLO_weights
GMVLO_cumulative_returns = (GMVLO_portfolio_returns + 1).cumprod()
GMVLO_cumulative_returns['dates']=dd
GMVLO_cumulative_returns.set_index('dates',inplace=True,drop=True)

#MD_weighted_returns = (MD_weights*returns)
MD_portfolio_returns = MD_weights
MD_cumulative_returns = (MD_portfolio_returns + 1).cumprod()
MD_cumulative_returns['dates']=dd
MD_cumulative_returns.set_index('dates',inplace=True,drop=True)

#EW_weighted_returns = (EW_weights*returns)
EW_portfolio_returns = EW_weights
EW_cumulative_returns = (EW_portfolio_returns + 1).cumprod()
EW_cumulative_returns['dates']=dd
EW_cumulative_returns.set_index('dates',inplace=True,drop=True)


#global
mkt_global_cumulative_returns = (mkt_global[520:] + 1).cumprod()
mkt_global_cumulative_returns =mkt_global_cumulative_returns.reset_index(drop=True)
mkt_global_cumulative_returns['dates']=dd
mkt_global_cumulative_returns.set_index('dates',inplace=True,drop=True)

In [None]:
ax = HRP_cumulative_returns.plot(figsize=[15,10])
RP_cumulative_returns.plot(figsize=[15,10], ax=ax)
RiskP_cumulative_returns.plot(figsize=[15,10],ax=ax)
GMV_cumulative_returns.plot(figsize=[15,10], ax=ax)
GMVLO_cumulative_returns.plot(figsize=[15,10], ax=ax)
MD_cumulative_returns.plot(figsize=[15,10], ax=ax)
EW_cumulative_returns.plot(figsize=[15,10], ax=ax)
HRP2_cumulative_returns.plot(figsize=[15,10], ax=ax)

mkt_global_cumulative_returns.plot(figsize=[15,10], ax=ax)

mpl.legend(['HRP', 'Naive RP','RP', 'GMV','GMVLO', 'MD','EW','HRP2', 'Global'])
mpl.ylabel("Accumulated returns (Index 1)")
mpl.show()
#Hvordan får vi de rigtige datoer på x akse?

In [None]:
#########returns for sharpe-ratio
risk_free_rates = risk_free_rates.reset_index(drop=True)
sr_HRP=pd.Series(((np.array(HRP_portfolio_returns)-np.array(risk_free_rates))+1).cumprod())
sr_HRP2=pd.Series(((np.array(HRP2_portfolio_returns)-np.array(risk_free_rates))+1).cumprod())
sr_RP=pd.Series(((np.array(RP_portfolio_returns)-np.array(risk_free_rates))+1).cumprod())
sr_RiskP=pd.Series(((np.array(RiskP_portfolio_returns)-np.array(risk_free_rates))+1).cumprod())
sr_GMV=pd.Series(((np.array(GMV_portfolio_returns)-np.array(risk_free_rates))+1).cumprod())
sr_GMVLO=pd.Series(((np.array(GMVLO_portfolio_returns)-np.array(risk_free_rates))+1).cumprod())
sr_MD=pd.Series(((np.array(MD_portfolio_returns)-np.array(risk_free_rates))+1).cumprod())
sr_EW=pd.Series(((np.array(EW_portfolio_returns)-np.array(risk_free_rates))+1).cumprod())

In [None]:
#Danner tabel der sammenligner allokerings strategierne
l=len(HRP_cumulative_returns)-1
allo_comp = [[(HRP_cumulative_returns[0][-1])**(1/24)-1, (HRP2_cumulative_returns[0][-1])**(1/24)-1, (RP_cumulative_returns[0][-1])**(1/24)-1, (GMV_cumulative_returns[0][-1])**(1/24)-1, (GMVLO_cumulative_returns[0][-1])**(1/24)-1, (MD_cumulative_returns[0][-1])**(1/24)-1,(RiskP_cumulative_returns[0][-1])**(1/24)-1 ,(EW_cumulative_returns[0][-1])**(1/24)-1]]
allo_comp.append([HRP_portfolio_returns.std()[0]*np.sqrt(250), HRP2_portfolio_returns.std()[0]*np.sqrt(250), RP_portfolio_returns.std()[0]*np.sqrt(250), GMV_portfolio_returns.std()[0]*np.sqrt(250), GMVLO_portfolio_returns.std()[0]*np.sqrt(250),MD_portfolio_returns.std()[0]*np.sqrt(250),RiskP_portfolio_returns.std()[0]*np.sqrt(250),EW_portfolio_returns.std()[0]*np.sqrt(250)])
allo_comp.append([expected_shortfall(HRP_portfolio_returns)[0], expected_shortfall(HRP2_portfolio_returns)[0], expected_shortfall(RP_portfolio_returns)[0], expected_shortfall(GMV_portfolio_returns)[0], expected_shortfall(GMVLO_portfolio_returns)[0], expected_shortfall(MD_portfolio_returns)[0],expected_shortfall(RiskP_portfolio_returns)[0], expected_shortfall(EW_portfolio_returns)[0]])


In [None]:
allo_comp=pd.DataFrame(allo_comp, columns=['HRP', 'HRP2','Naive RP','GMV','GMVLO','MD','Risk Parity','EW'],index=['Annualized return','Annualized standard deviance','Expected shortfall'])
#Tilføjer Sharpe Ratio til tabellen
allo_comp.loc['Sharpe Ratio']=allo_comp.loc['Annualized return']/allo_comp.loc['Annualized standard deviance']
#Expected shortfall 14 dage
N = 14

HRP_portfolio_returns14 = HRP_portfolio_returns.groupby(HRP_portfolio_returns.index // N).sum()
HRP_portfolio_returns.groupby(HRP_portfolio_returns.index // N).sum()
HRP2_portfolio_returns14 = HRP_portfolio_returns.groupby(HRP_portfolio_returns.index // N).sum()
RP_portfolio_returns14 = RP_portfolio_returns.groupby(RP_portfolio_returns.index // N).sum()
GMV_portfolio_returns14 = GMV_portfolio_returns.groupby(GMV_portfolio_returns.index // N).sum()
GMVLO_portfolio_returns14 = GMVLO_portfolio_returns.groupby(GMVLO_portfolio_returns.index // N).sum()
MD_portfolio_returns14 = MD_portfolio_returns.groupby(MD_portfolio_returns.index // N).sum()
RiskP_portfolio_returns14 = RiskP_portfolio_returns.groupby(RiskP_portfolio_returns.index // N).sum()
EW_portfolio_returns14 = EW_portfolio_returns.groupby(EW_portfolio_returns.index // N).sum()
allo_comp.loc['Expected shortfall 14 days'] = [expected_shortfall(HRP_portfolio_returns14)[0], expected_shortfall(HRP2_portfolio_returns14)[0], expected_shortfall(RP_portfolio_returns14)[0], expected_shortfall(GMV_portfolio_returns14)[0], expected_shortfall(GMVLO_portfolio_returns14)[0], expected_shortfall(MD_portfolio_returns14)[0], expected_shortfall(RiskP_portfolio_returns14)[0],expected_shortfall(EW_portfolio_returns14)[0]]

In [None]:
#Tilføjer realised risk contribution og diversification ratio til tabellen
allo_comp.loc['Realised deviance risk contribution'] = [portfolio_risk_deviance(realisedRC_HRP), portfolio_risk_deviance(realisedRC_HRP2), portfolio_risk_deviance(realisedRC_IVP), portfolio_risk_deviance(realisedRC_GMV), portfolio_risk_deviance(realisedRC_GMVLO), portfolio_risk_deviance(realisedRC_MD), portfolio_risk_deviance(realisedRC_RP), portfolio_risk_deviance(realisedRC_EW)]
allo_comp.loc['Realised diversification ratio'] = [abs(statistics.mean(realisedDR_HRP)), abs(statistics.mean(realisedDR_HRP2)), abs(statistics.mean(realisedDR_IVP)), abs(statistics.mean(realisedDR_GMV)), abs(statistics.mean(realisedDR_GMVLO)), abs(statistics.mean(realisedDR_MD)), abs(statistics.mean(realisedDR_RP)), abs(statistics.mean(realisedDR_EW))]
allo_comp

In [None]:
HRP_weights.columns = arp_data.columns
weight_stats=pd.DataFrame()
weight_stats['mean'] = HRP_weights.mean()
weight_stats['std.dev'] = HRP_weights.std()
weight_stats['variance'] = HRP_weights.var()
weight_stats.T

In [None]:
rene_strategier=(arp_data.iloc[262:]+1).cumprod()

In [None]:
rene_strategier.plot(figsize=(15,7))

In [None]:
gmv_weights.plot(figsize=(15,7))

In [None]:
hrp2_weights.plot(figsize=(15,7))

In [None]:
hrp_weights.plot(figsize=(15,7))

In [None]:
mdv_weights.plot(figsize=(15,7))

In [None]:
rp_weights.plot(figsize=(15,7))

In [None]:
gmvlo_weights.plot(figsize=(15,7))

In [None]:
############# Monte Carlo :-)
import time

def generateData(nObs,sLength,size0,size1,mu0,sigma0,sigma1F):
    # Time series of correlated variables
    #1) generate random uncorrelated data
    x=np.random.normal(mu0,sigma0,size=(nObs,size0)) # each row is a variable 
    #2) create correlation between the variables
    cols=[random.randint(0,size0-1) for i in range(size1)]
    y=x[:,cols]+np.random.normal(0,sigma0*sigma1F,size=(nObs,len(cols)))
    x=np.append(x,y,axis=1)
    #3) add common random shock
    point=np.random.randint(sLength,nObs-1,size=2)
    x[np.ix_(point,[cols[0],size0])]=np.array([[-.5,-.5],[2,2]])
    #4) add specific random shock
    point=np.random.randint(sLength,nObs-1,size=2)
    x[point,cols[-1]]=np.array([-.5,2])
    return x,cols

def getHRP(cov,corr):
    # Construct a hierarchical portfolio
    corr,cov=pd.DataFrame(corr),pd.DataFrame(cov)
    dist=correlDist(corr) 
    link=sch.linkage(dist,'single')
    sortIx=getQuasiDiag(link) 
    sortIx=corr.index[sortIx].tolist() # recover labels 
    hrp=getRecBipart(cov,sortIx)
    return hrp.sort_index()

def getHRP2(cov,corr):
    # Construct a hierarchical portfolio
    corr,cov=pd.DataFrame(corr),pd.DataFrame(cov)
    dist=correlDist(corr) 
    link=sch.linkage(dist,'single')
    cluster_dict = get_cluster_dict(link)
    hrp2 = recClusterVar(cluster_dict,link, cov)
    return hrp2


def hrpMC(numIters=10000,nObs=520,size0=5,size1=5,mu0=0,sigma0=0.01, sigma1F=.25,sLength=260,rebal=22):
    start_time = time.time()
    # Monte Carlo experiment on HRP
    methods=[getHRP,getHRP2,getIVP,risk_parity,GMVPortfolio,GMVLOPortfolio,max_div_port,ewPortfolio]#,getCLA] 
    stats,numIter={i.__name__:pd.Series() for i in methods},0
    pointers=range(sLength,nObs,rebal)
    divratio={i.__name__:pd.Series() for i in methods}
    rc={i.__name__:pd.Series() for i in methods}
    #w={i.__name__:pd.DataFrame(columns=[i for i in range(size0+size1)]) for i in methods}
    while numIter<numIters:
        print (numIter)
        #1) Prepare data for one experiment 
        x,cols=generateData(nObs,sLength,size0,size1,mu0,sigma0,sigma1F)
        r={i.__name__:pd.Series() for i in methods}
        #2) Compute portfolios in-sample
        for pointer in pointers:
            x_=x[pointer-sLength:pointer]
            cov_,corr_=np.cov(x_,rowvar=0),np.corrcoef(x_,rowvar=0) 
            #3) Compute performance out-of-sample
            x_=x[pointer:pointer+rebal]
            cov_out=pd.DataFrame(np.cov(x[pointer:pointer+rebal],rowvar=0))
            for func in methods:
                w_=pd.Series(func(cov=cov_,corr=corr_))
                # callback 
                r_=pd.Series(np.dot(x_,w_))
                divratio_ = calc_diversification_ratio(w_,cov_out)
                divratio[func.__name__]=divratio[func.__name__].append(pd.Series(divratio_))
                rc_ = np.squeeze(np.asarray(calculate_risk_contribution(w_, cov_out)))/np.sqrt(calculate_portfolio_var(w_,cov_out))
                rc[func.__name__]=rc[func.__name__].append(pd.Series(rc_))
                r[func.__name__]=r[func.__name__].append(r_)
        #4) Evaluate and store results
        for func in methods:
            r_=r[func.__name__].reset_index(drop=True)
            p_=(1+r_).cumprod()
            stats[func.__name__].loc[numIter]=p_.iloc[-1]-1 # terminal return
        print("Vi har nu kørt i", time.time() - start_time, "sekunder")
        numIter+=1
    #5) Report results
    stats=pd.DataFrame.from_dict(stats,orient='columns')
    stats.to_csv('stats.csv')
    df0,df1=stats.std(),stats.var()
    print(pd.concat([df0,df1,df1/df1['getHRP']-1],axis=1))
    return divratio,rc
        

In [None]:
divratios, rcs = hrpMC()

In [None]:
vaegte['getHRP'].plot()

In [None]:
vaegte['getHRP2'].plot()

In [None]:
vaegte['getIVP'].plot()

In [None]:
vaegte['risk_parity'].plot()

In [None]:
vaegte['GMVPortfolio'].plot()

In [None]:
vaegte['GMVLOPortfolio'].plot()

In [None]:
vaegte['max_div_port'].plot()

In [None]:
dist=correlDist(a.corr()) 
print(dist)
link=sch.linkage(dist,'complete')
cluster_dict = get_cluster_dict(link)
print(cluster_dict)
hrp2 = recClusterVar(cluster_dict,link, a.cov())
hrp2.plot.bar()
mpl.show()
sortIx=getQuasiDiag(link) 
sortIx=a.cov().index[sortIx].tolist() # recover labels 
hrp=getRecBipart(a.cov(),sortIx)
hrp.plot.bar()
mpl.show()
pd.DataFrame(risk_parity(a.cov())).plot.bar()
mpl.show()

In [None]:
print(pd.read_csv('divratios_stats_risk_parity_single.csv')['0'].mean())
print(pd.read_csv('divratios_stats_max_div_port_single.csv')['0'].mean())
print(pd.read_csv('divratios_stats_GMVPortfolio_single.csv')['0'].mean())
print(pd.read_csv('divratios_stats_GMVLOPortfolio_single.csv')['0'].mean())
print(pd.read_csv('divratios_stats_GETIVP_single.csv')['0'].mean())
print(pd.read_csv('divratios_stats_GETHRP2_single.csv')['0'].mean())
print(pd.read_csv('divratios_stats_GETHRP_single.csv')['0'].mean())
print(pd.read_csv('divratios_stats_ewPortfolio_single.csv')['0'].mean())


In [None]:
def portfolio_risk_deviance(rcs,mean=True):
    deviances=[]
    for i in rcs:
        sam = 0
        for j in i:
            sam = sam+abs(j-1/len(i))
        deviances.append(sam)
    if mean:    
        return np.array(deviances).mean()
    else:
        return np.array(deviances)



In [None]:
for i in rcs:
    rcs[i].to_csv('rcs {}'.format(i))

In [None]:
for i in divratios:
    divratios[i].to_csv('divratios {}'.format(i))