In [2]:
import pandas as pd
import numpy as np
from numpy.random import normal
from numpy.random import uniform
from numpy.random import random
from warnings import catch_warnings
from warnings import simplefilter
from sklearn.gaussian_process import GaussianProcessRegressor
from datetime import datetime
from scipy.stats import norm

In [3]:
returns=pd.read_csv("asset_class_returns.csv")
returns = pd.DataFrame(returns).set_index("Year")
returns=returns.apply(lambda num : num.str.strip('%'))
for col in returns.columns:
    returns[col]=returns[col].astype(str).astype(float)
cols=returns.columns

Portfolio Optimization to Backtest - Note: This is purposeful overfitting to show the value of diversification to assets with uncorrelated returns. This can be seen through an optimization of the sharpe ratio.

In [4]:
def backtest(wealth,weights,col, start_date, end_date, data, loss):
    colin=[0]*len(weights)
    port=[0]*len(weights)
    start=wealth
    worth=[start]
    cash=returns["Cash"]
    data=data[col].copy()
    data=pd.merge(data,cash,on="Year").dropna()
    data = data.iloc[(start_date-1995):(end_date-1995)]

    for n in range(len(weights)):
        colin[n] = data.columns.get_loc(col[n])
        port[n]=start*weights[n]

    for n in range (len(data)):
        for i in range(len(weights)):
            port[i]=port[i]*(1+data.iloc[n,colin[i]]/100)
            
        yr_end = sum(port)
        worth.append(yr_end)
        for i in range(len(weights)): port[i] = yr_end * weights[i]
        
    p_rtn = pd.DataFrame(worth).pct_change()[1:].reset_index().drop(["index"],axis=1)
    p_rtn = p_rtn.join(data["Cash"].reset_index().drop(["Year"],axis=1)/100)
    p_rtn=p_rtn[0]-p_rtn["Cash"]
    sharpe= round((p_rtn.mean()/p_rtn.std()),4)
    yearly_rtn = round((worth[-1]/worth[0])**(1/len(data))-1,4)
    
    if loss == 0:
        score = sharpe
    elif loss ==1:
        score = np.sqrt(sharpe*yearly_rtn)
    elif loss ==2:
        score = yearly_rtn
    
    return(score, sharpe, yearly_rtn)

In [5]:
#1 sample of features(N) which sum to 1. Sample M times
def constrained_samp(M,N):
    H=np.zeros([M,N+1])
    U=np.zeros([M,N])
    for j in range (0,M):
        for i in range(1,N): 
            H[j,i] = round(uniform(0,1),2)
        H[j,N] = 1
        H.sort()
        for i in range(1,N+1):
            U[j,i-1] = H[j,i] - H[j,i-1]
    return(U)

In [6]:
# surrogate or approximation for the objective function
def surrogate(model, X):
    # catch any warning generated when making a prediction
    with catch_warnings():
        # ignore generated warnings
        simplefilter("ignore")
        return model.predict(X, return_std=True)

In [7]:
# probability of improvement acquisition function
def acquisition(X, Xsamples, model):
    # calculate the best surrogate score found so far
    yhat, _ = surrogate(model, X)
    best = max(yhat)
    # calculate mean and stdev via surrogate function
    mu, std = surrogate(model, Xsamples)
    mu = mu[:, 0]
    # calculate the probability of improvement
    probs = norm.cdf((mu - best) / (std+1E-9))
    return probs

In [8]:
# optimize the acquisition function
def opt_acquisition(X, y, model,search_samp):
    # random search, generate random samples
    Xsamples = constrained_samp(search_samp,asset_classes)
    # calculate the acquisition function for each sample
    scores = acquisition(X, Xsamples, model)
    # locate the index of the largest scores
    ix = np.argmax(scores)
    return Xsamples[ix]

In [9]:
def Bayes_opt(asset_classes, starting_data, search_samp, optimization_steps, start_year, end_year, wealth, col, loss):

    #samples
    X = constrained_samp(starting_data,asset_classes)
    y = np.asarray([backtest(wealth,x,col,start_year,end_year,returns, loss) for x in X])[:,0].reshape(-1,1)  #to expand to allow for expected returns too
    # reshape into rows and cols
    y = y.reshape(len(y), 1)
    # define the model
    model = GaussianProcessRegressor()
    # fit the model
    model.fit(X, y)

    # perform the optimization process
    error=[]
    for i in range(optimization_steps):
        # select the next point to sample
        x = opt_acquisition(X, y, model,search_samp).reshape(1,-1)
        # sample the point
        actual,_,_ = backtest(wealth,x[0],col,start_year,end_year,returns, loss)    #expand to include expected returns
        # summarize the finding
        est, _ = surrogate(model, x)
                #print('>x=%s, f()=%3f, actual=%.3f' % (tuple(x), est, actual))
                #print(i)
        wrongness=(actual-est)/est
        error.append(wrongness)
        # add the data to the dataset
        X = np.vstack((X, x))
        y = np.vstack((y, [[actual]]))
        # update the model
        model.fit(X, y)
    
    # best result
    np.round(X,2)
    ix = np.argmax(y)
            #print('Best Result: \nWeights=%s, \nSharpe=%.3f' % (tuple(X[ix]), y[ix]))
            #print('\nMean Error=%.3f' % (abs(np.mean(error))))
    _, sharpe, yr_rtn = backtest(wealth,tuple(X[ix]),col,start_year,end_year,returns, loss)
    return(sharpe, yr_rtn, tuple(X[ix]))

In [10]:
def multi_run(asset_classes, starting_data, search_samp, optimization_steps, start_year, end_year, wealth, col, loss, algo_runs):

    sharpe = []
    annual = []
    weights = np.zeros([algo_runs,11])
    for i in range(algo_runs):
        shpe, anul, weights[i] = Bayes_opt(asset_classes, starting_data, search_samp, optimization_steps, start_year, end_year, wealth, col, loss)
        sharpe.append(shpe)
        annual.append(anul)
    index = np.argmax(sharpe)
    print("max sharpe:        ", np.max(sharpe), annual[index], weights[index])
    print("ave of" ,algo_runs, "algo runs:", np.round(np.mean(sharpe),4), np.round(np.mean(annual),4), np.round(np.mean(weights[:2],axis=0),2))
    

In [11]:
def one_asset_ports(wealth, col, start_year, end_year, returns, loss):
    #single asset portfolios
    test_weights = np.zeros([11,11])
    col_width = max(len(row) for row in col) + 2  # padding
    print(" ".ljust(col_width), "Sharpe Ratio      Annual Geo-Mean(CAGR)")
    i=0
    for row in col:
        test_weights[i,i]=1
        _, shpe, anul = backtest(wealth, test_weights[i], col, start_year, end_year, returns, loss)
        print(row.ljust(col_width), str(shpe).ljust(17), anul)
        i+=1

In [12]:
# Settings
asset_classes=11
starting_data=500   #norm is 300
search_samp=300     #norm is 250
optimization_steps=200   #norm is 50, been doing 100
start_year=1995     # 1995-2020 final year excluded
end_year=2021      # 1996-2021. final year excluded
wealth=1e3
#loss = 0
algo_runs = 1
col=['US Large Cap', 'US Mid Cap', 'US Small Cap', 'US Micro Cap', 'Intl Developed ex-US Market',
    'Emerging Markets', '10-year Treasury', 'Total US Bond Market', 'High Yield Corporate Bonds', 'Gold', 'Precious Metals']

In [13]:
#see how sharpe ratio, expected return and weights change as we change the loss function 

In [14]:
for i in range (3):
    print("")
    if i == 0:
        print("cost function = sharpe."," Backtest years:", start_year, "-", end_year-1)
        print("                    Sharpe Return")
    elif i == 1:
        print("cost function = geometric mean of sharpe and expt rtn.", " Backtest years:", start_year, "-", end_year-1)
        print("                    Sharpe Return")
    elif i == 2:
        print("cost function = expt rtn", " Backtest years:", start_year, "-", end_year-1)
        print("                    Sharpe Return")

    multi_run(asset_classes, starting_data, search_samp, optimization_steps, start_year, end_year, wealth, col, i, algo_runs)
    print(" ")

#can I beat it with a portfolio of a single asset over the same time period? No!
print("show outperformance relative to all single asset portfolios")
one_asset_ports(wealth, col, start_year, end_year, returns, 1)


cost function = sharpe.  Backtest years: 1995 - 2020
                    Sharpe Return
max sharpe:         1.0378 0.0747 [0.04 0.03 0.08 0.   0.01 0.09 0.65 0.03 0.03 0.03 0.01]
ave of 1 algo runs: 1.0378 0.0747 [0.04 0.03 0.08 0.   0.01 0.09 0.65 0.03 0.03 0.03 0.01]
 

cost function = geometric mean of sharpe and expt rtn.  Backtest years: 1995 - 2020
                    Sharpe Return
max sharpe:         1.0122 0.0818 [0.07 0.15 0.04 0.01 0.01 0.02 0.55 0.05 0.02 0.02 0.06]
ave of 1 algo runs: 1.0122 0.0818 [0.07 0.15 0.04 0.01 0.01 0.02 0.55 0.05 0.02 0.02 0.06]
 

cost function = expt rtn  Backtest years: 1995 - 2020
                    Sharpe Return
max sharpe:         0.5632 0.1082 [0.17 0.33 0.03 0.38 0.   0.01 0.   0.   0.02 0.03 0.03]
ave of 1 algo runs: 0.5632 0.1082 [0.17 0.33 0.03 0.38 0.   0.01 0.   0.   0.02 0.03 0.03]
 
show outperformance relative to all single asset portfolios
                              Sharpe Ratio      Annual Geo-Mean(CAGR)
US Large Cap          

In [None]:
#error model should be finding us mid cap annual geo mean of .1118 but creates a .1082 through various equity
#componants certainly favoring mid an micro cap which have the highest expected return
#This means an error of 3.3%