# Robust Portfolio Optimization
We use the returns of the six portfolios formed on size and book-to-market ratio (2$\times$3) obtained from Kenneth French's website. 
We choose monthly returns from 1984 January to 2014 January. 
This gives us a total of $N=360$ return realizations for each of the six portfolios.

Let $\mathbf{a}\in \mathbb{R}^6$ be an investment strategy on the six portfolios, subject to the constraints $a_i\geq 0, \sum^6_{i=1}a_i=1$. Let $\mathbf{R}$ be a $360\times 6$ matrix with $\mathbf{r}_1,\ldots,\mathbf{r}_{360}\in \mathbb{R}^6$ the vectors of realizations of returns for all six portfolios. For each realization $\mathbf{r}_i$, the wealth level after a single period is defined as $1+\mathbf{a}^T\mathbf{r}_i$. We consider the following worst-case risk evaluation minimization problem: 
\begin{align}
\begin{split}
\min_{\mathbf{a}\in \mathbb{R}^{6},\mathbf{a}\geq \mathbf{0}, c\in \mathbb{R}}\left\{c~\middle|~\sup_{(\mathbf{q},\bar{\mathbf{q}})\in U_{\phi,h}(\mathbf{p})}-\sum^{360}_{i=1}\bar{q}_iu(1+\mathbf{a}^T\mathbf{r}_i)\leq c,\sum^6_{i=1}a_i=1\right\},   
\end{split}
\end{align}
where we choose the modified chi-squared function $\phi(x)=(x-1)^2$ with radius $r=\frac{1}{N}\chi^2_{359,0.95}$. Furthermore, we choose $h(p)=1-(1-p)^2,~p\in[0,1]$ as the distortion function and $u(x)=1-e^{-x/\lambda}$ with $\lambda=10$ the exponential utility function.
The solution of this problem will be compared to the nominal risk evaluation minimization problem, where the nominal probability is the uniform distirbution $p_i=\frac{1}{360}$:
\begin{align}
\begin{split}
\min_{\mathbf{a}\in \mathbb{R}^6,\mathbf{a}\geq \mathbf{0}, c\in \mathbb{R}}\left\{c~\middle|~\sup_{\bar{\mathbf{q}}\in M_h(\mathbf{p})}-\sum^{360}_{i=1}\bar{q}_iu(1+\mathbf{a}^T\mathbf{r}_i)\leq c, \sum^6_{i=1}a_i=1,\right\}.    
\end{split}
\end{align}

In [8]:
import numpy as np
import pandas as pd
import cvxpy as cp
import mosek
import matplotlib.pyplot as plt
import datetime as date
from datetime import datetime as dt
from dateutil.relativedelta import *
import distortion_function as hf
import phi_divergence as phi
import affine_approx as af
import Utility_functions as ut
import Cutting_plane as ct
from time import process_time
import importlib
import scipy.stats

In [3]:
importlib.reload(ct)    #### reload the module when changes to the module are made

<module 'Cutting_plane' from 'C:\\Users\\gjin\\Robust\\Cutting_plane.py'>

In [4]:

############# This code checks if you have installed Mosek with a valid academic license

import mosek

# Try to initialize MOSEK
with mosek.Env() as env:
    pass


In [5]:
### Reading the Kenneth-French return data's
df_returns6 = pd.read_csv('6_Portfolios_2x3.csv', skiprows = 15) 
df_returns = df_returns6[0:1144].copy()
df_returns['Date'] = pd.to_datetime(df_returns['Date'], format = '%Y%m')
for i in range(1, len(df_returns.columns)):
    df_returns[df_returns.columns[i]] = pd.to_numeric(df_returns[df_returns.columns[i]])
df_returns

Unnamed: 0,Date,SMALL LoBM,ME1 BM2,SMALL HiBM,BIG LoBM,ME2 BM2,BIG HiBM
0,1926-07-01,1.0874,0.9349,-0.0695,5.7168,1.9620,1.4222
1,1926-08-01,0.7030,1.2300,5.3842,2.7154,2.6930,6.3154
2,1926-09-01,-2.9117,-0.1303,-0.4374,1.4287,0.0704,-0.7967
3,1926-10-01,-3.8196,-4.5860,-2.0112,-3.5898,-2.3398,-4.0970
4,1926-11-01,3.1806,3.7233,2.0944,3.1292,2.8952,3.4614
...,...,...,...,...,...,...,...
1139,2021-06-01,5.6058,0.4400,-1.0979,4.8188,-1.2594,-4.0036
1140,2021-07-01,-5.5593,-1.8623,-3.6521,3.1048,-0.0099,-2.3000
1141,2021-08-01,2.3903,1.5124,2.6680,3.5667,1.4122,3.0371
1142,2021-09-01,-4.3421,-3.4661,0.6445,-5.4525,-3.8570,-0.2526


In [6]:
### Selecting the return values between 1984 and 2014, since everything was in percentage, we divde the return by 100
startdate = dt(1984,1,3)
enddate = dt(2014,1,3)
X = df_returns[np.logical_and(df_returns.Date >= startdate, df_returns.Date <= enddate )][df_returns.columns[1:7]]
X = X.reset_index(drop = True)
R = X.to_numpy()     
R = R/100
W0 = 1

In [9]:
phi_func = phi.mod_chi2_cut
phi_conj = phi.mod_chi2_conj
phi_dot = 2

h_conj = hf.h_spw_conj
h_func = hf.h_spw_cut
h_eva = hf.h_spw_eva
### parameter of h function, in this case we have h(p)=1-(1-p)^2, hence we take par = 2
h_par = 2 

utility = ut.exp_utility
utility_eva = ut.exp_utility_eva

### Defining the nominal probability vector and denote I the number of assets
N=R.shape[0]
p = np.zeros(N)+1/N
I = R.shape[1]


### defining phi-divergence uncertainty set radius
alpha_phi_set = 0.95
r = phi_dot/(2*N)*scipy.stats.chi2.ppf(alpha_phi_set, N-1)   

In [10]:
###### Cutting-plane robust risk minimization solution

e_tol = 0.0001
tb = process_time()
res_rob = ct.cut_rob_pmin(R,p,e_tol,utility,utility_eva,h_func,phi_func,h_eva,r,W0, par=2, par_u=10)  
te = process_time()
print('time', te-tb, 'seconds')
print(res_rob)

lb: -0.08786117865952753 ub: [-0.09629894] iter: 0
lb: -0.08919039635560265 ub: [-0.09044157] iter: 1
lb: -0.08940651729343968 ub: [-0.09009738] iter: 2
lb: -0.08948381007093242 ub: [-0.08966657] iter: 3
lb: -0.08948143999397146 ub: [-0.08952706] iter: 4
time 110.0625 seconds
('sol:', array([3.54055574e-11, 1.71782966e-10, 1.15974067e-10, 2.50944809e-01,
       7.49055190e-01, 3.12074820e-10]), 'lb:', -0.0895270557704481, 'ub:', -0.08948143999397146, 'iter:', 4)


In [11]:
###### Cutting-plane nominal risk minimization solution
e_tol = 0.0001
tb = process_time()
res_nom = ct.cut_nom_pmin(R,p,e_tol,utility,utility_eva,h_eva,W0,par=h_par,par_u=10) 
te = process_time()
print('time', te-tb, 'seconds')
print(res_nom)

lb: -0.0937186092796091 ub: [-0.09629894] iter: 0
lb: -0.09393146527231497 ub: [-0.09439286] iter: 1
lb: -0.09387991226421565 ub: [-0.09414542] iter: 2
lb: -0.09397411716061542 ub: [-0.09408978] iter: 3
lb: -0.09396835079972576 ub: [-0.0940359] iter: 4
time 61.4375 seconds
('sol:', array([6.12265646e-10, 1.40642318e-07, 1.43623410e-01, 3.84557777e-01,
       4.71818668e-01, 3.77053986e-09]), 'lb:', -0.09403589599270797, 'ub:', -0.09396835079972576, 'iter:', 4)


In [12]:
#### Calculating the worst-case risk of the nominal solution from the cutting-plane
w_nom = res_nom[1]       
x_nom = ut.exp_utility_eva(R,w_nom,W0,10)
print(ct.robustcheck(x_nom,p,h_func,phi_func,r,h_par)[0])

-0.08937877108425232


In [13]:
### Calculating the nominal risk of the robust solution from the cutting-plane
w_rob = res_rob[1]
x_rob = ut.exp_utility_eva(R,w_rob,W0,10)
[risk_robnom, qb_nomrb]=ct.nominal_risk(x_rob,p,h_eva,h_par) 
print(risk_robnom)  

-0.0939484452866081


In [14]:
### The precision of the PL approximation
e_pl = 0.001       

### This gives the slope and constants that constitutes the PL approximation of h(p)=1-(1-p)^2
x_points = af.affine_approx_hspw(h_par,e_pl)     
[slope, const] = af.makepoints(af.sing_pw,x_points,par = h_par)  
                                                               

In [15]:
### The following function solves the robust problem using the picewise-linear approximation method

def af_rob_exp_pmin(p,R,r,phi_conj,slope,const,W0,par = 2, par_u = 1):
    N = len(p)
    I = len(R[0])
    K = len(slope)
    c = cp.Variable(1)
    lbda = cp.Variable((N,K), nonneg = True)
    a = cp.Variable(I, nonneg = True)
    v = cp.Variable(K, nonneg = True)
    alpha = cp.Variable(1)
    beta = cp.Variable(1)
    gamma = cp.Variable(1,nonneg = True)
    t = cp.Variable(N)
    s = cp.Variable(N)
    w = cp.Variable(N)
    constraints = [cp.abs(a)<= W0, cp.sum(a) == 1]
    for i in range(N):
        arg = -W0*(1+(R @ a)[i])/par_u
        constraints.append(-(1-cp.exp(arg)) - cp.sum(lbda[i]) - beta <= 0)
        constraints.append(s[i] == -alpha + lbda[i]@slope)
        constraints.append(lbda[i] <= v)
        constraints = phi_conj(gamma,s[i],t[i],w[i],constraints)
    constraints.append(alpha + beta + gamma * r  + v@const + p@t <= c)
    obj = cp.Minimize(c)
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK)
    return(a.value, prob.value)



### The following function solves the nominal problem using the picewise-linear approximation method

def af_nom_exp_pmin(p,R,slope,const,W0,par = 2, par_u = 1):
    N = len(p)
    I = len(R[0])
    K = len(slope)
    c = cp.Variable(1)
    lbda = cp.Variable((N,K), nonneg = True)
    a = cp.Variable(I, nonneg = True)
    v = cp.Variable(K, nonneg = True)
    beta = cp.Variable(1)
    constraints = [cp.abs(a)<= W0, cp.sum(a) == 1]
    for i in range(N):
        arg = -W0*(1+(R @ a)[i])/par_u
        constraints.append(-(1-cp.exp(arg)) - cp.sum(lbda[i]) - beta <= 0)
        constraints.append(lbda[i] <= v)
    constraints.append(beta  + v@const + p@lbda@slope <= c)
    obj = cp.Minimize(c)
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK)
    return(a.value, prob.value)

In [16]:
t1_af_res = process_time()
af_res = af_rob_exp_pmin(p,R,r,phi_conj,slope,const,W0,par = h_par, par_u = 10)
t2_af_res = process_time()
### the solution and objective value of the piecewise-linear approximation method
print('sol:',af_res[0], 'lb:', af_res[1])   
### Total run time of the algorithm
print('time', t2_af_res- t1_af_res, 'seconds')


t1_af_res_ub = process_time()
af_res_ub = af_rob_exp_pmin(p,R,r,phi_conj,slope,const+e_pl,W0,par = h_par, par_u = 10)
t2_af_res_ub = process_time()

print('sol:',af_res_ub[0], 'ub:', af_res_ub[1])   
### Total run time of the algorithm
print('time', t2_af_res_ub- t1_af_res_ub, 'seconds')

sol: [7.13420613e-10 3.78209461e-09 2.51930935e-09 3.79717189e-01
 6.20282798e-01 6.19859795e-09] lb: -0.08951029319587173
time 12.671875 seconds
sol: [1.87609197e-09 9.98318381e-09 1.30755000e-08 3.76560089e-01
 6.23439871e-01 1.45642906e-08] ub: -0.08948262687815382
time 12.46875 seconds


In [17]:
t1_af_nom = process_time()
af_nom = af_nom_exp_pmin(p,R,slope,const,1,par = h_par, par_u = 10)
t2_af_nom = process_time()
### the solution and objective value of the piecewise-linear approximation method
print('sol:',af_nom[0], 'lb:', af_nom[1])   
### Total run time of the algorithm
print('time', t2_af_nom- t1_af_nom, 'seconds')


t1_af_nom_ub = process_time()
af_nom_ub = af_nom_exp_pmin(p,R,slope,const+e_pl,1,par = h_par, par_u = 10)
t2_af_nom_ub = process_time()

print('sol:',af_nom_ub[0], 'ub:', af_nom_ub[1])   
### Total run time of the algorithm
print('time', t2_af_nom_ub- t1_af_nom_ub, 'seconds')

sol: [1.00618210e-09 8.59290405e-02 2.07493483e-01 1.30601768e-01
 5.75975698e-01 8.89528303e-09] lb: -0.09400888974788543
time 5.984375 seconds
sol: [1.50352434e-09 7.84892167e-02 2.07079630e-01 1.29813633e-01
 5.84617505e-01 1.41332539e-08] ub: -0.09397821096000436
time 6.4375 seconds
