#Install and import packages

In [None]:
!pip install scikit-learn==0.24.2

In [None]:
!pip install factor_analyzer

In [None]:
!pip install nolds

In [None]:
!pip install PyPortfolioOpt

In [None]:
!git clone https://github.com/josemiotto/pylevy

# navigate to atalaia directory
%cd pylevy

# get modifications made on the repo
!git pull origin master

# install packages requirements
#!pip install -r requirements.txt

# install package
!python setup.py install



In [None]:
%cd /content

In [None]:
# Import packages
import numpy as np
import pandas as pd
import scipy.stats as st
from scipy.stats import norm, gumbel_l
import sklearn as skl
from sklearn.linear_model import LinearRegression 
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.decomposition import PCA, FactorAnalysis
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from nolds import hurst_rs
import random
import seaborn as sb
import levy
import factor_analyzer as fa
import pypfopt
from pypfopt import EfficientFrontier

# Preparation

In [None]:
#Features
features = ['Sharpe', 'std', 'skew', 'kurt', 'VaR0.05', 'ES', 'beta', 'VaR0.95', 'EU', 'autocor', 'Hurst', 'stab_a', 'stab_g'] 
num_feat = len(features) #number of features  

#monthly update
num_updates=118 #2360:20=118

#Feature Matrix
Data_mon = np.zeros((num_stocks, num_feat, num_updates))

In [None]:
logreturns = pd.read_csv('logreturns_stocks.csv', index_col=0)


In [None]:
logreturns_np = np.array(logreturns.transpose())

In [None]:
dates_rightorder = logreturns.index.values


In [None]:
num_stocks = logreturns.shape[1] #number of stocks (including market average)
tp = logreturns.shape[0] #number of timepoints

In [None]:
for k in range(num_updates):
  #rolling window of logreturns and logreturns_np
  logr = logreturns.iloc[k*20:250+k*20,:]
  logr_np = logreturns_np[:, k*20:250+k*20]
  #Features
  Data_mon[:,0,k] = logr.mean(axis=0)/logr.std(axis=0)
  Data_mon[:,1,k] = logr.std(axis=0)
  Data_mon[:,2,k] = logr.skew(axis=0)
  Data_mon[:,3,k] = logr.kurtosis(axis=0)
  Data_mon[:,4,k] = logr.quantile(q=0.05, axis=0)
  Data_mon[:,5,k] = logr[logr < Data_mon[:,4,k]].mean(axis=0)
  Data_mon[:,7,k] = logr.quantile(q=0.95, axis=0)
  Data_mon[:,8,k] = logr[logr > Data_mon[:,7,k]].mean(axis=0)

  #CAPM beta
  r_market = logr_np.transpose()[:,num_stocks-1].reshape(250,1)
  for j in range(num_stocks):
    X = logr_np.transpose()[:,j].reshape(250,1)
    reg = LinearRegression().fit(X, r_market)
    Data_mon[j,6,k]=reg.coef_ 

  for i in range(num_stocks):
    series = logr.iloc[:,i]
    #autocorrelation coefficient
    Data_mon[i,9,k]= series.autocorr()
    #Hurst
    Data_mon[i,10,k] = hurst_rs(series) #using nolds package
    #fit levy stable distribution
    levystab = levy.fit_levy(series)
    #alpha stable
    Data_mon[i,11,k] = levystab[0].get()[0]
    #gamma stable
    Data_mon[i,12,k] = levystab[0].get()[3]

# Video of Correlation Matrix

In [None]:
for k in range(Data_mon.shape[2]):
  Data = Data_mon[:,:,k]
  ScData = StandardScaler().fit_transform(Data[:num_stocks -1,1:]) # Data ohne EURO bzw Sharpe und ohne market_avg
 
  fig, ax = plt.subplots(figsize=(10,10))
  sb.heatmap(data=pd.DataFrame(ScData, columns=features[1:]).corr(), annot=True, cmap="vlag")
  ax.set_title('Correlation matrix')
  place = '/content/drive/MyDrive/VideosMA/StocksCorrMat/plot' + str(k+100) + '.png'
  fig.savefig(place)

# PCA / FA and Clustering

In [None]:
#Goal: find best k stocks with k is number of clusters
#number of clusters
num_clust = [3,4,5,6,7,8,9,10,15,20]

model_names = ['km03_PC2', 'ac03_PC2',
               'km03_PC3', 'ac03_PC3',
               'km03_FA2', 'ac03_FA2',
               'km03_FA3', 'ac03_FA3',
               'km04_PC2', 'ac04_PC2',
              'km04_PC3', 'ac04_PC3',
              'km04_FA2', 'ac04_FA2',
              'km04_FA3', 'ac04_FA3',
               'km05_PC2', 'ac05_PC2',
               'km05_PC3', 'ac05_PC3',
               'km05_FA2', 'ac05_FA2', 
               'km05_FA3', 'ac05_FA3',
               'km06_PC2', 'ac06_PC2',
               'km06_PC3', 'ac06_PC3',
               'km06_FA2', 'ac06_FA2',
             'km06_FA3', 'ac06_FA3',
               'km07_PC2', 'ac07_PC2',
               'km07_PC3', 'ac07_PC3',
               'km07_FA2', 'ac07_FA2',
               'km07_FA3', 'ac07_FA3',
               'km08_PC2', 'ac08_PC2',
               'km08_PC3', 'ac08_PC3',
               'km08_FA2', 'ac08_FA2',
               'km08_FA3', 'ac08_FA3',
               'km09_PC2', 'ac09_PC2',
               'km09_PC3', 'ac09_PC3',
               'km09_FA2', 'ac09_FA2',
               'km09_FA3', 'ac09_FA3',
               'km10_PC2', 'ac10_PC2',
               'km10_PC3', 'ac10_PC3',
               'km10_FA2', 'ac10_FA2',
               'km10_FA3', 'ac10_FA3', 
               'km15_PC2', 'ac15_PC2',
               'km15_PC3', 'ac15_PC3',
               'km15_FA2', 'ac15_FA2',
               'km15_FA3', 'ac15_FA3',
               'km20_PC2', 'ac20_PC2',
               'km20_PC3', 'ac20_PC3',
               'km20_FA2', 'ac20_FA2',
               'km20_FA3', 'ac20_FA3'
            ]



In [None]:
#Assigned clusters Matrix: for every model and every stock and every time: to which cluster belongs the stock?
#without market average
Ass_Clusters = np.zeros((num_stocks-1, len(model_names), num_updates))  #num_stocks-1, because we do not want market_avg

In [None]:
#For evaluation

#Silhouette coefficient: between -1 (bad) and 1 (good). Close to zero: overlapping clusters
Sil_coef = np.zeros((len(model_names), num_updates))

#Calinski-Harabasz Index = Variance Ratio Criterion: higher better
VarRatC = np.zeros((len(model_names), num_updates))

#Davies-Bouldin-Index: close to zero is best
Dav_Bould = np.zeros((len(model_names), num_updates))

In [None]:
for k in range(num_updates):
  Data = Data_mon[:,:,k]
  ScData = StandardScaler().fit_transform(Data[:num_stocks-1,1:]) # Data without Sharpe Ratio and without market_avg

  #Perform PCA
  pca2 = PCA(2)
  pca3 = PCA(3)
  Y_2 = pca2.fit_transform(ScData)
  Y_3 = pca3.fit_transform(ScData)
  PrinComp = pd.DataFrame(pca3.components_).transpose()
  PrinComp.index = features[1:]

  #Perform FA with varimax
  fac2_vari = FactorAnalysis(n_components=2, rotation = 'varimax')#FA with 2 factors and Varimax rotation
  fac3_vari = FactorAnalysis(n_components=3, rotation = 'varimax')#FA with 3 factors and Varimax rotation
  F_2V = fac2_vari.fit_transform(ScData)
  F_3V = fac3_vari.fit_transform(ScData)


  #Plots for Video
  fig, axes = plt.subplots(1, 3, figsize=(15,5))
  axes[0].set_title('PCA')
  sb.heatmap(ax=axes[0], data=PrinComp, annot=True, cmap="vlag", vmin=-1, vmax=1, cbar=False)
  axes[1].set_title('FA with 2 factors with Varimax')
  sb.heatmap(ax=axes[1], data=pd.DataFrame(fac2_vari.components_, columns=features[1:]).transpose(), annot =True, cmap="vlag", vmin=-1, vmax=1, cbar=False)
  sb.heatmap(ax=axes[2], data=pd.DataFrame(fac3_vari.components_, columns=features[1:]).transpose(), annot =True, cmap="vlag", vmin=-1, vmax=1)
  axes[2].set_title('FA with 3 factors with Varimax')
  place = '/content/drive/MyDrive/VideosMA/StocksFactors/plot' + str(k+100) + '.png'
  fig.savefig(place)

  Input_Data = [Y_2, Y_3, F_2V, F_3V]

  #Clustering
  model_num = 0 #for selecting column
  for c in num_clust:
    for Input in Input_Data:
      kmeans = KMeans(n_clusters=c).fit(Input)
      labels = kmeans.labels_
      #Ass_Clusters[:,model_num,k] = kmeans.predict(Input)
      Ass_Clusters[:,model_num,k] = labels
      #Evaluation
      Sil_coef[model_num, k] = metrics.silhouette_score(Input, labels)
      VarRatC[model_num, k] = metrics.calinski_harabasz_score(Input, labels)
      Dav_Bould[model_num, k] = metrics.davies_bouldin_score(Input, labels)
      
      model_num = model_num+1
      
      labels = AgglomerativeClustering(n_clusters=c).fit_predict(Input)
      Ass_Clusters[:,model_num,k] = labels
      #Evaluation
      Sil_coef[model_num, k] = metrics.silhouette_score(Input, labels)
      VarRatC[model_num, k] = metrics.calinski_harabasz_score(Input, labels)
      Dav_Bould[model_num, k] = metrics.davies_bouldin_score(Input, labels)
      
      model_num = model_num+1


# Take best stock from each cluster based on Sharpe Ratio

In [None]:
num_models = Ass_Clusters.shape[1]

#One Weight Matrix for every Asset-Allocation = for every Portfolio
Weights_ones = np.zeros((num_stocks-1, num_updates, num_models)) #without market_avg

In [None]:
#Ass_Clusters.shape = 961, 24, 118
#Data_mon.shape = 961, 10, 118

for k in range(num_updates):
  for m in range(num_models): 
    y_true = Ass_Clusters[:,m,k]
    Sharpe = Data_mon[:num_stocks-1,0,k]
    nc = max(y_true)+1 #number of clusters, +1 because Clusters are named 0,1,2,3...
    for j in range(int(nc)):
      if (y_true == j).sum() >=5: #only clusters with 5 or more elements are considered
         copy_Sharpe = np.copy(Sharpe) #we need copy because see wrong code below
         copy_Sharpe[y_true!=j] = -100000 #no Sharpe is below -100000
         idxmax = copy_Sharpe.argmax() #Choose stock with maximum Sharpe from each cluster
         Weights_ones[idxmax,k,m]=1

# Weights of different strategies

In [None]:
#Weights of equal weight strategy
Weights_EW = np.zeros((num_stocks-1, num_updates, num_models))
for n in range(num_models):
  Weights_EW[:,:,n] = Weights_ones[:,:,n]/Weights_ones[:,:,n].sum(axis=0)

In [None]:
np.append(Weights_ones[:,1,1]==1, False).shape

In [None]:
#Weights of Max Sharpe portfolio ("Markowitz")
Weights_MV = np.zeros((num_stocks-1, num_updates, num_models))
Weights_MS = np.zeros((num_stocks-1, num_updates, num_models))

In [None]:
for k in range(num_updates):
  for m in range(num_models):
    #first: get relevant part of logreturns_np. 
    #logreturns_np.shape = (961, 2610)
    #append False because logreturns_np includes market_avg, but Weights_ones doesn't
    rel_logr = logreturns_np[np.append(Weights_ones[:,k,m]==1, False), (k*20):(250+k*20)]
    expected_returns = rel_logr.mean(axis=1)
    
    cov_matrix = np.cov(rel_logr)
    ef = EfficientFrontier(expected_returns, cov_matrix)
    mv = ef.min_volatility() 

    ms = ef.nonconvex_objective(
        pypfopt.objective_functions.sharpe_ratio,
        objective_args=(ef.expected_returns, ef.cov_matrix),
        weights_sum_to_one=True,
    )

    Weights_MV[Weights_ones[:,k,m]==1, k, m]= np.fromiter(mv.values(), dtype=float)
    Weights_MS[Weights_ones[:,k,m]==1, k, m]= np.fromiter(ms.values(), dtype=float)

# Portfolio based on clustering according to criterions above

In [None]:
num_clustermodels = 8 #(because of: 4 Inputs x 2 (kmeans, agg))
#for km_PC2', 'ac_PC2', 'km_PC3', 'ac_PC3', 'km_FA2', 'ac_FA2', 'km_FA3', 'ac_FA3',

#We have already calculated the optimal weights for each strategy
#Saved in Weights_MV, Weights_MS, Weights_EW 
#each has dimensions: (num_stocks-1, num_updates, num_models)

#just select weights from these according to criterions
#Criterions saved in Sil_coef, VarRatC, Dav_Bould with dim=(num_models, num_updates)

#optimal number of clusters: save here argmax, i.e. [0,1,2] instead of [3,5,10]
optclustnum_sil = np.zeros((num_updates, num_clustermodels))
optclustnum_vrc = np.zeros((num_updates, num_clustermodels))
optclustnum_db = np.zeros((num_updates, num_clustermodels))

#Weights for criteria and strategies
#Equal weights
Weights_optclust_sil_eq = np.zeros((num_stocks-1, num_updates, num_clustermodels))
Weights_optclust_vrc_eq = np.zeros((num_stocks-1, num_updates, num_clustermodels))
Weights_optclust_db_eq = np.zeros((num_stocks-1, num_updates, num_clustermodels))

#Mean Variance
Weights_optclust_sil_mv = np.zeros((num_stocks-1, num_updates, num_clustermodels))
Weights_optclust_vrc_mv = np.zeros((num_stocks-1, num_updates, num_clustermodels))
Weights_optclust_db_mv = np.zeros((num_stocks-1, num_updates, num_clustermodels))

#Max Sharpe
Weights_optclust_sil_ms = np.zeros((num_stocks-1, num_updates, num_clustermodels))
Weights_optclust_vrc_ms = np.zeros((num_stocks-1, num_updates, num_clustermodels))
Weights_optclust_db_ms = np.zeros((num_stocks-1, num_updates, num_clustermodels))


for k in range(num_updates):
  for i in range(num_clustermodels): #for km_PC2', 'ac_PC2', 'km_PC3', 'ac_PC3', 'km_FA2', 'ac_FA2', 'km_FA3', 'ac_FA3',
    ran = range(i,num_models,8) #range, such that we choose indices of all numbers of clusters i.e.[3,5,10] for each of the above models
    #that means: ran =[0,8,16,24...], ran= [1,9,17,25,...],...,ran=[7,15,23,31,...]
    sil_am = Sil_coef[ran,k].argmax() 
    vrc_am = VarRatC[ran,k].argmax()
    db_am = Dav_Bould[ran,k].argmin()
    optclustnum_sil[k,i] = sil_am
    optclustnum_vrc[k,i] = vrc_am
    optclustnum_db[k,i] = db_am

    #Weights
    Weights_optclust_sil_eq[:,k,i]= Weights_EW[:,k,ran[sil_am]]
    Weights_optclust_vrc_eq[:,k,i]= Weights_EW[:,k,ran[vrc_am]]
    Weights_optclust_db_eq[:,k,i] = Weights_EW[:,k,ran[db_am]]

    Weights_optclust_sil_mv[:,k,i]= Weights_MV[:,k,ran[sil_am]]
    Weights_optclust_vrc_mv[:,k,i]= Weights_MV[:,k,ran[vrc_am]]
    Weights_optclust_db_mv[:,k,i] = Weights_MV[:,k,ran[db_am]]

    Weights_optclust_sil_ms[:,k,i]= Weights_MS[:,k,ran[sil_am]]
    Weights_optclust_vrc_ms[:,k,i]= Weights_MS[:,k,ran[vrc_am]]
    Weights_optclust_db_ms[:,k,i] = Weights_MS[:,k,ran[db_am]]



In [None]:
optclustnum_sil_true = optclustnum_sil + 3
optclustnum_sil_true[np.where(optclustnum_sil_true==11)] = 15
optclustnum_sil_true[np.where(optclustnum_sil_true==12)] = 20

optclustnum_db_true = optclustnum_db + 3
optclustnum_db_true[np.where(optclustnum_db_true==11)] = 15
optclustnum_db_true[np.where(optclustnum_db_true==12)] = 20

optclustnum_vrc_true = optclustnum_vrc + 3
optclustnum_vrc_true[np.where(optclustnum_vrc_true==11)] = 15
optclustnum_vrc_true[np.where(optclustnum_vrc_true==12)] = 20

In [None]:
dates_updates = dates_rightorder
dates_updates = dates_updates[np.array(range(118))*20+250]
dates_updates

#Figure: Best number of clusters according to criterions

In [None]:
#New figure
fontsize=25
plt.rc('font', size=fontsize) #controls default text size
plt.rc('axes', titlesize=fontsize) #fontsize of the title
plt.rc('axes', labelsize=fontsize) #fontsize of the x and y labels
plt.rc('xtick', labelsize=20) #fontsize of the x tick labels
plt.rc('ytick', labelsize=20) #fontsize of the y tick labels
plt.rc('legend', fontsize=fontsize) #fontsize of the legend

fig, ax = plt.subplots(8,1, figsize=(15,25))
plt.tight_layout(h_pad=3.0)
ax[0].set_title('K-Means with PC2')
ax[1].set_title('Agg. Clustering with PC2')
ax[2].set_title('K-Means with PC3')
ax[3].set_title('Agg. Clustering with PC3')
ax[4].set_title('K-Means with FA2')
ax[5].set_title('Agg. Clustering with FA2')
ax[6].set_title('K-Means with FA3')
ax[7].set_title('Agg. Clustering with FA3')
 
for k in range(8):
  ax[k].plot(dates_updates, optclustnum_sil_true[:,k], 'o')
  ax[k].plot(dates_updates, optclustnum_vrc_true[:,k], 'o')
  ax[k].plot(dates_updates, optclustnum_db_true[:,k], 'o')

#ax[0].legend(['Silhouette', 'Variance Ratio', 'Davies Bouldin'], bbox_to_anchor=(1,1))
#ax[4].legend(['Silhouette', 'Variance Ratio', 'Davies Bouldin'], bbox_to_anchor=(1,1))

# Cumulative Portfolio Wealth

In [None]:
weights_allequal = np.ones(960)*(1/960)
CPW_MarkAvg = np.zeros(2361)
CPW_MarkAvg[0]=1
TS_MarkAvg = np.zeros(2360)
for t in range(2360):
  np_in_MarkAvg = np.inner(weights_allequal,logreturns_np[:num_stocks-1,249+t+1])
  TS_MarkAvg = np_in_MarkAvg
  CPW_MarkAvg[t+1] = CPW_MarkAvg[t]+ np_in_MarkAvg

In [None]:
#Cumulative Portfolio Wealth
CPW_EW = np.zeros((2361, num_models)) #For equal-weights strategy
CPW_MV = np.zeros((2361, num_models)) #Minimum volatility strategy
CPW_MS = np.zeros((2361, num_models))

CPW_EW[0,:] = 1 #Initial CPW is 1
CPW_MV[0,:] = 1 #Initial CPW is 1
CPW_MS[0,:] = 1 #Initial CPW is 1

#Timeseries of sums of weighted logreturns
TS_EW = np.zeros((2360, num_models))
TS_MV = np.zeros((2360, num_models))
TS_MS = np.zeros((2360, num_models))

for m in range(num_models):
  for k in range(num_updates): 
    for j in range(20):
      t = k*20+j #i.e. t from 0 to 117*20+19 = 2359. Note: CPW[0]=1
      np_in_EW = np.inner(Weights_EW[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_EW[t,m] = np_in_EW
      CPW_EW[t+1,m] = CPW_EW[t,m] +  np_in_EW
      
      np_in_MV = np.inner(Weights_MV[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_MV[t,m] = np_in_MV
      CPW_MV[t+1,m] = CPW_MV[t,m] + np_in_MV
      
      np_in_MS = np.inner(Weights_MS[:,k,m], logreturns_np[:num_stocks-1, 249+t+1])
      TS_MS[t,m] = np_in_MS
      CPW_MS[t+1,m] = CPW_MS[t,m] + np_in_MS
     

In [None]:
CPW_opt_clust = np.zeros((2361, #timesteps
                          num_clustermodels, #km/agg and PC23/FA23: 8
                          3, #sil, vrc, db
                          3 #EW, MV, MS
                          ))

CPW_opt_clust[0,:,:,:] = 1 #Initial CPW is 1

TS_opt_clust = np.zeros((2360, num_clustermodels, 3, 3))

for m in range(num_clustermodels):
  for k in range(num_updates):
    for j in range(20):
      t = k*20+j
      
      #0,0 = sil, EW
      np_in = np.inner(Weights_optclust_sil_eq[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,0,0] = np_in
      CPW_opt_clust[t+1,m,0,0] = CPW_opt_clust[t,m,0,0] + np_in

      #1,0 = vrc, EW
      np_in = np.inner(Weights_optclust_vrc_eq[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,1,0] = np_in
      CPW_opt_clust[t+1,m,1,0] = CPW_opt_clust[t,m,1,0] + np_in

      #2,0 = db, EW
      np_in = np.inner(Weights_optclust_db_eq[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,2,0] = np_in
      CPW_opt_clust[t+1,m,2,0] = CPW_opt_clust[t,m,2,0] + np_in

      #0,1 = sil, MV
      np_in = np.inner(Weights_optclust_sil_mv[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,0,1] = np_in
      CPW_opt_clust[t+1,m,0,1] = CPW_opt_clust[t,m,0,1] + np_in

      #1,1 = vrc, MV
      np_in = np.inner(Weights_optclust_vrc_mv[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,1,1] = np_in
      CPW_opt_clust[t+1,m,1,1] = CPW_opt_clust[t,m,1,1] + np_in

      #2,1 = db, MV
      np_in = np.inner(Weights_optclust_db_mv[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,2,1] = np_in
      CPW_opt_clust[t+1,m,2,1] = CPW_opt_clust[t,m,2,1] + np_in

      #0,2 = sil, MS
      np_in = np.inner(Weights_optclust_sil_ms[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,0,2] = np_in
      CPW_opt_clust[t+1,m,0,2] = CPW_opt_clust[t,m,0,2] + np_in

      #1,2 = vrc, MS
      np_in = np.inner(Weights_optclust_vrc_ms[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,1,2] = np_in
      CPW_opt_clust[t+1,m,1,2] = CPW_opt_clust[t,m,1,2] + np_in

      #2,2 = db, MS
      np_in = np.inner(Weights_optclust_db_ms[:,k,m],logreturns_np[:num_stocks-1,249+t+1])
      TS_opt_clust[t,m,2,2] = np_in
      CPW_opt_clust[t+1,m,2,2] = CPW_opt_clust[t,m,2,2] + np_in

      

# Portfolio Diversification Index

In [None]:
def WeightedLogret(WeightMat, logret):
  W = np.zeros((960,2360,WeightMat.shape[2]))
  for k in range(118):
    for j in range(20):
      W[:,k*20+j,:]=WeightMat[:,k,:]
  WL = np.zeros((960,2360,WeightMat.shape[2]))
  for k in range(WeightMat.shape[2]):
    WL[:,:,k]=W[:,:,k]*logreturns_np[:num_stocks-1, 250:]
  return WL

In [None]:
WL_EW = WeightedLogret(Weights_EW, logreturns_np)
WL_MS = WeightedLogret(Weights_MS, logreturns_np)
#WL_MV = WeightedLogret(Weights_MV, logreturns_np)
WL_optclust_db_eq = WeightedLogret(Weights_optclust_db_eq, logreturns_np)
WL_optclust_db_ms = WeightedLogret(Weights_optclust_db_ms, logreturns_np)
#WL_optclust_db_mv = WeightedLogret(Weights_optclust_db_mv, logreturns_np)
WL_optclust_vrc_eq = WeightedLogret(Weights_optclust_vrc_eq, logreturns_np)
WL_optclust_vrc_ms = WeightedLogret(Weights_optclust_vrc_ms, logreturns_np)
WL_optclust_sil_eq = WeightedLogret(Weights_optclust_sil_eq, logreturns_np)
WL_optclust_sil_ms = WeightedLogret(Weights_optclust_sil_ms, logreturns_np)

In [None]:
def PDI(WL):
  PDI = np.zeros(WL.shape[2])
  for k in range(WL.shape[2]):
    pca=skl.decomposition.PCA() #PCA-Input: X.shape: n_samples, n_features, i.e. here: 2359, 960 
    pca.fit(WL[:,:,k].transpose())
    PDI[k]=sum(pca.explained_variance_/sum(pca.explained_variance_) *range(1,961))*2-1
  return PDI

In [None]:
PDI_WL_EW = PDI(WL_EW)
PDI_WL_db_eq =PDI(WL_optclust_db_eq)
PDI_WL_db_ms =PDI(WL_optclust_db_ms)
PDI_WL_sil_eq =PDI(WL_optclust_sil_eq)
PDI_WL_sil_ms =PDI(WL_optclust_sil_ms)
PDI_WL_vrc_eq =PDI(WL_optclust_vrc_eq)
PDI_WL_vrc_ms =PDI(WL_optclust_vrc_ms)
PDI_WL_MS = PDI(WL_MS)

#Figures

In [None]:
#CPW to dataframe 
df_CPW_EW = pd.DataFrame(CPW_EW, columns = model_names, index=dates_rightorder[250:])
df_CPW_MV = pd.DataFrame(CPW_MV, columns = model_names, index=dates_rightorder[250:])
df_CPW_MS = pd.DataFrame(CPW_MS, columns = model_names, index=dates_rightorder[250:])

df_CPW_MarkAv = pd.DataFrame(CPW_MarkAvg, columns= ['mark_avg'], index=dates_rightorder[250:])

df_CPW_EW = df_CPW_EW.join(df_CPW_MarkAv)
df_CPW_MV = df_CPW_MV.join(df_CPW_MarkAv)
df_CPW_MS = df_CPW_MS.join(df_CPW_MarkAv)

In [None]:
#Overview
#New Figure
fig, ax = plt.subplots(3,3, figsize=(20,15), sharey=True)
ax[0,0].set_title('Silhouette Coef, 1/n Portfolio')
ax[0,0].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[0,0].plot(pd.DatetimeIndex(dates_rightorder[250:]), CPW_opt_clust[:,k,0,0])

 
ax[1,0].set_title('Variance Ratio Crit, 1/n Portfolio')
ax[1,0].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[1,0].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,0])

ax[2,0].set_title('Davies Bouldin, 1/n Portfolio')
ax[2,0].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[2,0].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,0])


ax[0,1].set_title('Silhouette Coef, MV')
ax[0,1].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[0,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,0,1])

ax[1,1].set_title('Variance Ratio Crit, MV')
ax[1,1].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[1,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,1])

ax[2,1].set_title('Davies Bouldin, MV')
ax[2,1].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[2,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,1])


ax[0,2].set_title('Silhouette Coef, MS')
ax[0,2].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[0,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,0,2])

ax[1,2].set_title('Variance Ratio Crit, MS')
ax[1,2].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[1,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,2])

ax[2,2].set_title('Davies Bouldin, MS')
ax[2,2].plot(df_CPW_EW.loc[:, ['mark_avg']])
for k in range(8):
  ax[2,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,2])


for k in [0,1,2]:
  ax[0,k].set_xticks(ax[0,k].get_xticks()[::4])
  ax[1,k].set_xticks(ax[1,k].get_xticks()[::4])
  ax[2,k].set_xticks(ax[2,k].get_xticks()[::4])

ax[2,0].legend(['mark_avg', 'KM with PC2', 'AC with PC2', 'KM with PC3', 'AC with PC3', 
                'KM with FA2', 'AC with FA2', 'KM with FA3', 'AC with FA3'], ncol=3,
               bbox_to_anchor=(0,0,2,-0.2))

In [None]:
#EW
from cycler import cycler
custom_cycler = (cycler(color=['c', 'm', 'y', 'g']))


fig, ax = plt.subplots(2,3, figsize=(20,10), sharey=True)
fig.suptitle('1/n Portfolio')

ax[0,0].set_prop_cycle(custom_cycler)
ax[1,0].set_prop_cycle(custom_cycler)
ax[0,1].set_prop_cycle(custom_cycler)
ax[1,1].set_prop_cycle(custom_cycler)
ax[0,2].set_prop_cycle(custom_cycler)
ax[1,2].set_prop_cycle(custom_cycler)


ax[0,0].set_title('Silhouette Coef, K-Means')
ax[0,0].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,0].plot(dates_rightorder[250:], CPW_opt_clust[:,k,0,0])
#ax[0,0].legend(['market avg', 'PC2','PC3','FA2','FA3'])

ax[1,0].set_title('Silhouette Coef, Agg. Cluster')
ax[1,0].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,0].plot(dates_rightorder[250:], CPW_opt_clust[:,k,0,0])

ax[0,1].set_title('Variance Ratio Crit., K-Means')
ax[0,1].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,0])

ax[1,1].set_title('Variance Ratio Crit., Agg. Cluster')
ax[1,1].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,0])

ax[0,2].set_title('Davies Bouldin, K-Means')
ax[0,2].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,0])

ax[1,2].set_title('Davies Bouldin, Agg. Cluster')
ax[1,2].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,0])

for k in [0,1,2]:
  ax[0,k].set_xticks(ax[0,k].get_xticks()[::3])
  ax[1,k].set_xticks(ax[1,k].get_xticks()[::3])

ax[1,0].legend(['market avg', 'PC2','PC3','FA2','FA3'], ncol=5,
               bbox_to_anchor=(0,0,2,-0.2))

In [None]:
#MV
custom_cycler = (cycler(color=['c', 'm', 'y', 'g']))


fig, ax = plt.subplots(2,3, figsize=(20,10), sharey=True)
fig.suptitle('Minimum Variance Portfolio')

ax[0,0].set_prop_cycle(custom_cycler)
ax[1,0].set_prop_cycle(custom_cycler)
ax[0,1].set_prop_cycle(custom_cycler)
ax[1,1].set_prop_cycle(custom_cycler)
ax[0,2].set_prop_cycle(custom_cycler)
ax[1,2].set_prop_cycle(custom_cycler)


ax[0,0].set_title('Silhouette Coef, K-Means')
ax[0,0].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,0].plot(dates_rightorder[250:], CPW_opt_clust[:,k,0,1])
#ax[0,0].legend(['market avg', 'PC2','PC3','FA2','FA3'])

ax[1,0].set_title('Silhouette Coef, Agg. Cluster')
ax[1,0].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,0].plot(dates_rightorder[250:], CPW_opt_clust[:,k,0,1])

ax[0,1].set_title('Variance Ratio Crit., K-Means')
ax[0,1].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,1])

ax[1,1].set_title('Variance Ratio Crit., Agg. Cluster')
ax[1,1].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,1])

ax[0,2].set_title('Davies Bouldin, K-Means')
ax[0,2].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,1])

ax[1,2].set_title('Davies Bouldin, Agg. Cluster')
ax[1,2].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,1])

for k in [0,1,2]:
  ax[0,k].set_xticks(ax[0,k].get_xticks()[::3])
  ax[1,k].set_xticks(ax[1,k].get_xticks()[::3])

ax[1,0].legend(['market avg', 'PC2','PC3','FA2','FA3'], ncol=5,
               bbox_to_anchor=(0,0,2,-0.2))

In [None]:
#MS
custom_cycler = (cycler(color=['c', 'm', 'y', 'g']))


fig, ax = plt.subplots(2,3, figsize=(20,10), sharey=True)
fig.suptitle('Maximum Sharpe Portfolio')

ax[0,0].set_prop_cycle(custom_cycler)
ax[1,0].set_prop_cycle(custom_cycler)
ax[0,1].set_prop_cycle(custom_cycler)
ax[1,1].set_prop_cycle(custom_cycler)
ax[0,2].set_prop_cycle(custom_cycler)
ax[1,2].set_prop_cycle(custom_cycler)


ax[0,0].set_title('Silhouette Coef, K-Means')
ax[0,0].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,0].plot(dates_rightorder[250:], CPW_opt_clust[:,k,0,2])
#ax[0,0].legend(['market avg', 'PC2','PC3','FA2','FA3'])

ax[1,0].set_title('Silhouette Coef, Agg. Cluster')
ax[1,0].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,0].plot(dates_rightorder[250:], CPW_opt_clust[:,k,0,2])

ax[0,1].set_title('Variance Ratio Crit., K-Means')
ax[0,1].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,2])

ax[1,1].set_title('Variance Ratio Crit., Agg. Cluster')
ax[1,1].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,1].plot(dates_rightorder[250:], CPW_opt_clust[:,k,1,2])

ax[0,2].set_title('Davies Bouldin, K-Means')
ax[0,2].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [0,2,4,6]:
  ax[0,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,2])

ax[1,2].set_title('Davies Bouldin, Agg. Cluster')
ax[1,2].plot(df_CPW_EW.loc[:, ['mark_avg']], color='k')
for k in [1,3,5,7]:
  ax[1,2].plot(dates_rightorder[250:], CPW_opt_clust[:,k,2,2])

for k in [0,1,2]:
  ax[0,k].set_xticks(ax[0,k].get_xticks()[::3])
  ax[1,k].set_xticks(ax[1,k].get_xticks()[::3])

ax[1,0].legend(['market avg', 'PC2','PC3','FA2','FA3'], ncol=5,
               bbox_to_anchor=(0,0,2,-0.2))  

In [None]:
fig, ax = plt.subplots(2,4, figsize=(20,10), sharey=True)
fig.suptitle('1/n Portfolio')

ax[0,0].set_title('KM with PC2')
ax[0,0].plot(df_CPW_EW.loc[:, ['km03_PC2', 'km04_PC2', 'km05_PC2', 'km06_PC2', 
                               'km07_PC2', 'km08_PC2', 'km09_PC2', 'km10_PC2', 
                               'km15_PC2', 'km20_PC2', 'mark_avg']])
#ax[0,0].legend(['3 clusters', '4 clusters', '5 clusters', '6 clusters', '7 clusters',
#                '8 clusters', '9 clusters', '10 clusters', '15 clusters', '20 clusters', 'mark_avg'])

ax[0,1].set_title('KM with PC3')
ax[0,1].plot(df_CPW_EW.loc[:, ['km03_PC3', 'km04_PC3', 'km05_PC3', 'km06_PC3', 
                               'km07_PC3', 'km08_PC3', 'km09_PC3', 'km10_PC3', 
                               'km15_PC3', 'km20_PC3', 'mark_avg']])

ax[0,2].set_title('KM with FA2')
ax[0,2].plot(df_CPW_EW.loc[:, ['km03_FA2', 'km04_FA2', 'km05_FA2', 'km06_FA2', 
                               'km07_FA2', 'km08_FA2', 'km09_FA2', 'km10_FA2', 
                               'km15_FA2', 'km20_FA2', 'mark_avg']])

ax[0,3].set_title('KM with FA3')
ax[0,3].plot(df_CPW_EW.loc[:, ['km03_FA3', 'km04_FA3', 'km05_FA3', 'km06_FA3', 
                               'km07_FA3', 'km08_FA3', 'km09_FA3', 'km10_FA3', 
                               'km15_FA3', 'km20_FA3', 'mark_avg']])

ax[1,0].set_title('AC with PC2')
ax[1,0].plot(df_CPW_EW.loc[:, ['ac03_PC2', 'ac04_PC2', 'ac05_PC2', 'ac06_PC2', 
                               'ac07_PC2', 'ac08_PC2', 'ac09_PC2', 'ac10_PC2', 
                               'ac15_PC2', 'ac20_PC2', 'mark_avg']])

ax[1,1].set_title('AC with PC3')
ax[1,1].plot(df_CPW_EW.loc[:, ['ac03_PC3', 'ac04_PC3', 'ac05_PC3', 'ac06_PC3', 
                               'ac07_PC3', 'ac08_PC3', 'ac09_PC3', 'ac10_PC3', 
                               'ac15_PC3', 'ac20_PC3', 'mark_avg']])

ax[1,2].set_title('AC with FA2')
ax[1,2].plot(df_CPW_EW.loc[:, ['ac03_FA2', 'ac04_FA2', 'ac05_FA2', 'ac06_FA2', 
                               'ac07_FA2', 'ac08_FA2', 'ac09_FA2', 'ac10_FA2', 
                               'ac15_FA2', 'ac20_FA2', 'mark_avg']])

ax[1,3].set_title('AC with FA3')
ax[1,3].plot(df_CPW_EW.loc[:, ['ac03_FA3', 'ac04_FA3', 'ac05_FA3', 'ac06_FA3', 
                               'ac07_FA3', 'ac08_FA3', 'ac09_FA3', 'ac10_FA3', 
                               'ac15_FA3', 'ac20_FA3', 'mark_avg']])

for k in [0,1,2,3]:
  ax[0,k].set_xticks(ax[0,k].get_xticks()[::4])
  ax[1,k].set_xticks(ax[1,k].get_xticks()[::4])

ax[1,0].legend(['3 clusters', '4 clusters', '5 clusters', '6 clusters', '7 clusters',
                '8 clusters', '9 clusters', '10 clusters', '15 clusters', '20 clusters', 'mark_avg'], 
                bbox_to_anchor=(0, 0, 4.0, -0.2), ncol=5)

In [None]:
fig, ax = plt.subplots(2,4, figsize=(20,10), sharey=True)
fig.suptitle('Minimum Variance Portfolio')
ax[0,0].set_title('KM with PC2')
ax[0,0].plot(df_CPW_MV.loc[:, ['km03_PC2', 'km04_PC2', 'km05_PC2', 'km06_PC2', 
                               'km07_PC2', 'km08_PC2', 'km09_PC2', 'km10_PC2', 
                               'km15_PC2', 'km20_PC2', 'mark_avg']])
#ax[0,0].legend(['3 clusters', '4 clusters', '5 clusters', '6 clusters', '7 clusters',
 #               '8 clusters', '9 clusters', '10 clusters', '15 clusters', '20 clusters', 'mark_avg'])

ax[0,1].set_title('KM with PC3')
ax[0,1].plot(df_CPW_MV.loc[:, ['km03_PC3', 'km04_PC3', 'km05_PC3', 'km06_PC3', 
                               'km07_PC3', 'km08_PC3', 'km09_PC3', 'km10_PC3', 
                               'km15_PC3', 'km20_PC3', 'mark_avg']])

ax[0,2].set_title('KM with FA2')
ax[0,2].plot(df_CPW_MV.loc[:, ['km03_FA2', 'km04_FA2', 'km05_FA2', 'km06_FA2', 
                               'km07_FA2', 'km08_FA2', 'km09_FA2', 'km10_FA2', 
                               'km15_FA2', 'km20_FA2', 'mark_avg']])

ax[0,3].set_title('KM with FA3')
ax[0,3].plot(df_CPW_MV.loc[:, ['km03_FA3', 'km04_FA3', 'km05_FA3', 'km06_FA3', 
                               'km07_FA3', 'km08_FA3', 'km09_FA3', 'km10_FA3', 
                               'km15_FA3', 'km20_FA3', 'mark_avg']])

ax[1,0].set_title('AC with PC2')
ax[1,0].plot(df_CPW_MV.loc[:, ['ac03_PC2', 'ac04_PC2', 'ac05_PC2', 'ac06_PC2', 
                               'ac07_PC2', 'ac08_PC2', 'ac09_PC2', 'ac10_PC2', 
                               'ac15_PC2', 'ac20_PC2', 'mark_avg']])

ax[1,1].set_title('AC with PC3')
ax[1,1].plot(df_CPW_MV.loc[:, ['ac03_PC3', 'ac04_PC3', 'ac05_PC3', 'ac06_PC3', 
                               'ac07_PC3', 'ac08_PC3', 'ac09_PC3', 'ac10_PC3', 
                               'ac15_PC3', 'ac20_PC3', 'mark_avg']])

ax[1,2].set_title('AC with FA2')
ax[1,2].plot(df_CPW_MV.loc[:, ['ac03_FA2', 'ac04_FA2', 'ac05_FA2', 'ac06_FA2', 
                               'ac07_FA2', 'ac08_FA2', 'ac09_FA2', 'ac10_FA2', 
                               'ac15_FA2', 'ac20_FA2', 'mark_avg']])

ax[1,3].set_title('AC with FA3')
ax[1,3].plot(df_CPW_MV.loc[:, ['ac03_FA3', 'ac04_FA3', 'ac05_FA3', 'ac06_FA3', 
                               'ac07_FA3', 'ac08_FA3', 'ac09_FA3', 'ac10_FA3', 
                               'ac15_FA3', 'ac20_FA3', 'mark_avg']])

for k in [0,1,2,3]:
  ax[0,k].set_xticks(ax[0,k].get_xticks()[::4])
  ax[1,k].set_xticks(ax[1,k].get_xticks()[::4])

ax[1,0].legend(['3 clusters', '4 clusters', '5 clusters', '6 clusters', '7 clusters',
                '8 clusters', '9 clusters', '10 clusters', '15 clusters', '20 clusters', 'mark_avg'], 
                bbox_to_anchor=(0, 0, 4.0, -0.2), ncol=5)

In [None]:
fig, ax = plt.subplots(2,4, figsize=(20,10), sharey=True)
fig.suptitle('Maximum Sharpe Portfolio')
ax[0,0].set_title('KM with PC2')
ax[0,0].plot(df_CPW_MS.loc[:, ['km03_PC2', 'km04_PC2', 'km05_PC2', 'km06_PC2', 
                               'km07_PC2', 'km08_PC2', 'km09_PC2', 'km10_PC2', 
                               'km15_PC2', 'km20_PC2', 'mark_avg']])
#ax[0,0].legend(['3 clusters', '4 clusters', '5 clusters', '6 clusters', '7 clusters',
 #               '8 clusters', '9 clusters', '10 clusters', '15 clusters', '20 clusters', 'mark_avg'])

ax[0,1].set_title('KM with PC3')
ax[0,1].plot(df_CPW_MS.loc[:, ['km03_PC3', 'km04_PC3', 'km05_PC3', 'km06_PC3', 
                               'km07_PC3', 'km08_PC3', 'km09_PC3', 'km10_PC3', 
                               'km15_PC3', 'km20_PC3', 'mark_avg']])

ax[0,2].set_title('KM with FA2')
ax[0,2].plot(df_CPW_MS.loc[:, ['km03_FA2', 'km04_FA2', 'km05_FA2', 'km06_FA2', 
                               'km07_FA2', 'km08_FA2', 'km09_FA2', 'km10_FA2', 
                               'km15_FA2', 'km20_FA2', 'mark_avg']])

ax[0,3].set_title('KM with FA3')
ax[0,3].plot(df_CPW_MS.loc[:, ['km03_FA3', 'km04_FA3', 'km05_FA3', 'km06_FA3', 
                               'km07_FA3', 'km08_FA3', 'km09_FA3', 'km10_FA3', 
                               'km15_FA3', 'km20_FA3', 'mark_avg']])

ax[1,0].set_title('AC with PC2')
ax[1,0].plot(df_CPW_MS.loc[:, ['ac03_PC2', 'ac04_PC2', 'ac05_PC2', 'ac06_PC2', 
                               'ac07_PC2', 'ac08_PC2', 'ac09_PC2', 'ac10_PC2', 
                               'ac15_PC2', 'ac20_PC2', 'mark_avg']])

ax[1,1].set_title('AC with PC3')
ax[1,1].plot(df_CPW_MS.loc[:, ['ac03_PC3', 'ac04_PC3', 'ac05_PC3', 'ac06_PC3', 
                               'ac07_PC3', 'ac08_PC3', 'ac09_PC3', 'ac10_PC3', 
                               'ac15_PC3', 'ac20_PC3', 'mark_avg']])

ax[1,2].set_title('AC with FA2')
ax[1,2].plot(df_CPW_MS.loc[:, ['ac03_FA2', 'ac04_FA2', 'ac05_FA2', 'ac06_FA2', 
                               'ac07_FA2', 'ac08_FA2', 'ac09_FA2', 'ac10_FA2', 
                               'ac15_FA2', 'ac20_FA2', 'mark_avg']])

ax[1,3].set_title('AC with FA3')
ax[1,3].plot(df_CPW_MS.loc[:, ['ac03_FA3', 'ac04_FA3', 'ac05_FA3', 'ac06_FA3', 
                               'ac07_FA3', 'ac08_FA3', 'ac09_FA3', 'ac10_FA3', 
                               'ac15_FA3', 'ac20_FA3', 'mark_avg']])

for k in [0,1,2,3]:
  ax[0,k].set_xticks(ax[0,k].get_xticks()[::4])
  ax[1,k].set_xticks(ax[1,k].get_xticks()[::4])

ax[1,0].legend(['3 clusters', '4 clusters', '5 clusters', '6 clusters', '7 clusters',
                '8 clusters', '9 clusters', '10 clusters', '15 clusters', '20 clusters', 'mark_avg'], 
                bbox_to_anchor=(0, 0, 4.0, -0.2), ncol=5)

In [None]:
fig, ax = plt.subplots(2,4, figsize=(20,10), sharey=True)
ax[0,0].set_title('KM with PC2, 1/n Portfolio')
ax[0,0].plot(df_CPW_EW.loc[:, ['km03_PC2', 'km05_PC2', 'km10_PC2', 'mark_avg']])
ax[0,0].legend(['3 clusters', '5 clusters', '10 clusters', 'mark_avg'])

ax[0,1].set_title('KM with PC3, 1/n Portfolio')
ax[0,1].plot(df_CPW_EW.loc[:, ['km03_PC3', 'km05_PC3', 'km10_PC3', 'mark_avg']])

ax[0,2].set_title('KM with FA2, 1/n Portfolio')
ax[0,2].plot(df_CPW_EW.loc[:, ['km03_FA2', 'km05_FA2', 'km10_FA2', 'mark_avg']])

ax[0,3].set_title('KM with FA3, 1/n Portfolio')
ax[0,3].plot(df_CPW_EW.loc[:, ['km03_FA3', 'km05_FA3', 'km10_FA3', 'mark_avg']])

ax[1,0].set_title('AC with PC2, 1/n Portfolio')
ax[1,0].plot(df_CPW_EW.loc[:, ['ac03_PC2', 'ac05_PC2', 'ac10_PC2', 'mark_avg']])

ax[1,1].set_title('AC with PC3, 1/n Portfolio')
ax[1,1].plot(df_CPW_EW.loc[:, ['ac03_PC3', 'ac05_PC3', 'ac10_PC3', 'mark_avg']])

ax[1,2].set_title('AC with FA2, 1/n Portfolio')
ax[1,2].plot(df_CPW_EW.loc[:, ['ac03_FA2', 'ac05_FA2', 'ac10_FA2', 'mark_avg']])

ax[1,3].set_title('AC with FA3, 1/n Portfolio')
ax[1,3].plot(df_CPW_EW.loc[:, ['ac03_FA3', 'ac05_FA3', 'ac10_FA3', 'mark_avg']])

In [None]:
fig, ax = plt.subplots(2,4, figsize=(20,10), sharey=True)
ax[0,0].set_title('KM with PC2, Min volatility Portfolio')
ax[0,0].plot(df_CPW_MV.loc[:, ['km03_PC2', 'km05_PC2', 'km10_PC2', 'mark_avg']])
ax[0,0].legend(['3 clusters', '5 clusters', '10 clusters', 'mark_avg'])

ax[0,1].set_title('KM with PC3, Min volatility Portfolio')
ax[0,1].plot(df_CPW_MV.loc[:, ['km03_PC3', 'km05_PC3', 'km10_PC3', 'mark_avg']])

ax[0,2].set_title('KM with FA2, Min volatility Portfolio')
ax[0,2].plot(df_CPW_MV.loc[:, ['km03_FA2', 'km05_FA2', 'km10_FA2', 'mark_avg']])

ax[0,3].set_title('KM with FA3, Min volatility Portfolio')
ax[0,3].plot(df_CPW_MV.loc[:, ['km03_FA3', 'km05_FA3', 'km10_FA3', 'mark_avg']])

ax[1,0].set_title('AC with PC2, Min volatility Portfolio')
ax[1,0].plot(df_CPW_MV.loc[:, ['ac03_PC2', 'ac05_PC2', 'ac10_PC2', 'mark_avg']])

ax[1,1].set_title('AC with PC3, Min volatility Portfolio')
ax[1,1].plot(df_CPW_MV.loc[:, ['ac03_PC3', 'ac05_PC3', 'ac10_PC3', 'mark_avg']])

ax[1,2].set_title('AC with FA2, Min volatility Portfolio')
ax[1,2].plot(df_CPW_MV.loc[:, ['ac03_FA2', 'ac05_FA2', 'ac10_FA2', 'mark_avg']])

ax[1,3].set_title('AC with FA3, Min volatility Portfolio')
ax[1,3].plot(df_CPW_MV.loc[:, ['ac03_FA3', 'ac05_FA3', 'ac10_FA3', 'mark_avg']])

In [None]:
fig, ax = plt.subplots(2,4, figsize=(20,10), sharey=True)
ax[0,0].set_title('KM with PC2, Max Sharpe Portfolio')
ax[0,0].plot(df_CPW_MS.loc[:, ['km03_PC2', 'km05_PC2', 'km10_PC2', 'mark_avg']])
ax[0,0].legend(['3 clusters', '5 clusters', '10 clusters', 'mark_avg'])

ax[0,1].set_title('KM with PC3, Max Sharpe Portfolio')
ax[0,1].plot(df_CPW_MS.loc[:, ['km03_PC3', 'km05_PC3', 'km10_PC3', 'mark_avg']])

ax[0,2].set_title('KM with FA2, Max Sharpe Portfolio')
ax[0,2].plot(df_CPW_MS.loc[:, ['km03_FA2', 'km05_FA2', 'km10_FA2', 'mark_avg']])

ax[0,3].set_title('KM with FA3, Max Sharpe Portfolio')
ax[0,3].plot(df_CPW_MS.loc[:, ['km03_FA3', 'km05_FA3', 'km10_FA3', 'mark_avg']])

ax[1,0].set_title('AC with PC2, Max Sharpe Portfolio')
ax[1,0].plot(df_CPW_MS.loc[:, ['ac03_PC2', 'ac05_PC2', 'ac10_PC2', 'mark_avg']])

ax[1,1].set_title('AC with PC3, Max Sharpe Portfolio')
ax[1,1].plot(df_CPW_MS.loc[:, ['ac03_PC3', 'ac05_PC3', 'ac10_PC3', 'mark_avg']])

ax[1,2].set_title('AC with FA2, Max Sharpe Portfolio')
ax[1,2].plot(df_CPW_MS.loc[:, ['ac03_FA2', 'ac05_FA2', 'ac10_FA2', 'mark_avg']])

ax[1,3].set_title('AC with FA3, Max Sharpe Portfolio')
ax[1,3].plot(df_CPW_MS.loc[:, ['ac03_FA3', 'ac05_FA3', 'ac10_FA3', 'mark_avg']])

# Sharpe Ratio

In [None]:
Sharpe_EW = TS_EW.mean(axis=0)/TS_EW.std(axis=0)
Sharpe_MV = TS_MV.mean(axis=0)/TS_MV.std(axis=0)
Sharpe_MS = TS_MS.mean(axis=0)/TS_MS.std(axis=0)
Sharpe_Opt = TS_opt_clust.mean(axis=0)/TS_opt_clust.std(axis=0)

# Certainty Equivalent

In [None]:
#Certainty Equivalent: mean- variance*0.5*lambda, lambda fuer Risikoaversion, we take lambda=1
CerE_EW = TS_EW.mean(axis=0)-0.5*TS_EW.var(axis=0) 
CerE_MV = TS_MV.mean(axis=0)-0.5*TS_MV.var(axis=0) 
CerE_MS = TS_MS.mean(axis=0)-0.5*TS_MS.var(axis=0) 

# Adjusted Sharpe Ratio

In [None]:
#Adjusted Sharpe Ratio
ASR_EW = Sharpe_EW*(1+scipy.stats.skew(TS_EW, axis=0)*Sharpe_EW/6 - scipy.stats.kurtosis(TS_EW, axis=0)*Sharpe_EW*Sharpe_EW/24 )
ASR_MV = Sharpe_MV*(1+scipy.stats.skew(TS_MV, axis=0)*Sharpe_MV/6 - scipy.stats.kurtosis(TS_MV, axis=0)*Sharpe_MV*Sharpe_MV/24 )
ASR_MS = Sharpe_MS*(1+scipy.stats.skew(TS_MS, axis=0)*Sharpe_MS/6 - scipy.stats.kurtosis(TS_MS, axis=0)*Sharpe_MS*Sharpe_MS/24 )

In [None]:
ASR_Opt = Sharpe_Opt*(1+scipy.stats.skew(TS_opt_clust, axis=0)*Sharpe_Opt/6 - scipy.stats.kurtosis(TS_opt_clust, axis=0)*Sharpe_Opt*Sharpe_Opt/24)