In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sps
import tqdm
%matplotlib inline

In [2]:
def euclidean_proj_simplex(v, s=1):
    n, = v.shape  # will raise ValueError if v is not 1-D
    # check if we are already on the simplex
    if v.sum() == s and np.alltrue(v >= 0):
        # best projection: itself!
        return v
    # get the array of cumulative sums of a sorted (decreasing) copy of v
    u = np.sort(v)[::-1]
    cssv = np.cumsum(u)
    # get the number of > 0 components of the optimal solution
    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = (cssv[rho] - s) / (rho + 1.0)
    # compute the projection by thresholding v using theta
    w = (v - theta).clip(min=0)
    return w


def ternary_search(func, left, right, precision):
    # finds argmax of convex function f
    # on the segment [left; right] with given precision
    if abs(right - left) < precision:
        return left

    left_third = (2*left + right)/3
    right_third = (left + 2*right)/3
    if func(left_third) == func(right_third):
        return ternary_search(func, left_third, right_third, precision) 
    if func(left_third) < func(right_third):
        return ternary_search(func, left_third, right, precision) 
    else:
        return ternary_search(func, left, right_third, precision)


def search(f, estimate, precision):
    # finds argmax of convex function f
    # on the line with given precision
    curr = estimate
    delta = 1
    while f(curr + delta) > f(curr):
        curr = curr + delta
        delta *= 2
    curr_left = estimate
    delta_left = 1
    while f(curr_left - delta_left) > f(curr_left):
        curr_left = curr_left - delta_left
        delta_left *= 2
    return ternary_search(f, curr_left - delta_left, curr + delta, precision)
    
    
def get_step_size(x, grad, precision):
    # steepest descent
    def h(alpha):
        return f(euclidean_proj_simplex(x + alpha * grad))
    optimal_alpha = search(h, 0, precision)
    return optimal_alpha

def get_constant_step_size(alpha):
    def get_size(x, grad, precision):
        return alpha
    return get_size


In [3]:
# Gradient descent method

def proj_grad_descent(x0, alpha_generator, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    while previous_step_size > precision and iters < max_iters:
        prev_x = cur_x
        alpha = alpha_generator(cur_x, df(cur_x), 1e-7)
        cur_x = euclidean_proj_simplex(prev_x + alpha * df(prev_x))
        previous_step_size = np.linalg.norm(cur_x - prev_x)
        iters += 1
        
    np.set_printoptions(precision=4)
    print("The local maximum of proj grad occurs at", cur_x)
    print("Number of iterations: ", iters)
    return cur_x

# Quasi-Newton method

def proj_quasi_newton_method (x0, alpha, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    H = np.eye(np.size(x0))
    alpha = 1
    while previous_step_size > precision and iters < max_iters:
        prev_x = cur_x
        h = -H @ df(prev_x)
        alpha = get_step_size(prev_x, h, 1e-7)
        cur_x = euclidean_proj_simplex(prev_x + alpha * h)
        s = np.atleast_2d(cur_x - prev_x).T
        y = np.atleast_2d(df(cur_x) - df(prev_x)).T
        #print(s, y)
        H = H - (H @ y @ y.T @ H) / (y.T @ H @ y) + (s @ s.T) / (y.T @ s)
        previous_step_size = np.linalg.norm(cur_x - prev_x) 
        iters += 1
        
    np.set_printoptions(precision=4)
    print("The local maximum of quasi newton occurs at", cur_x)
    print("Number of iterations: ", iters)
    return cur_x

# Newton method 

def proj_newton_method (x0, alpha, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    while previous_step_size > precision and iters < max_iters:
        prev_x = cur_x
        h = - np.linalg.inv(ddf(prev_x)) @ df(prev_x)
        alpha = get_step_size(prev_x, h, 1e-7)
      
        cur_x = euclidean_proj_simplex(prev_x + alpha * h)
        previous_step_size = np.linalg.norm(cur_x - prev_x) 
        iters += 1
        
    np.set_printoptions(precision=4)
    print("The local maximum of newton occurs at", cur_x)
    print("Number of iterations: ", iters)
    return cur_x

In [89]:
t_matrix = np.random.rand(5,3) 
t_matrix /= t_matrix.sum(axis=1)[:,None]
t_matrix = t_matrix.T
c_matrix = (t_matrix * np.log2(t_matrix)).sum(axis=0)

In [90]:
def f(x):
    array = (t_matrix @ x) * np.log2(t_matrix @ x)
    return c_matrix @ x - array.sum() 

def df(x):
    k = t_matrix.shape[0]
    array = np.array([t_matrix[i] * (np.log2(t_matrix[i] @ x) + 1) for i in range(k)])
    return c_matrix - array.sum(axis=0)

def ddf(x):
    k = t_matrix.shape[0]
    a = t_matrix @ x
    array = np.array([np.atleast_2d(t_matrix[i]).T @ np.atleast_2d(t_matrix[i]) for i in range(k)])
    result = - np.array([array[i] / a[i] for i in range(k)]).sum(axis=0)
    return result

In [91]:
print(df(res.x))

[-0.9156 -0.4405 -0.9256 -0.9626 -0.7207]


In [95]:
x0 = np.random.rand(5)
x0 /= x0.sum()
precision = 10**-6
iters = 10**4

print('Starting point:', x0)

x1 = proj_grad_descent(x0, get_step_size, precision, iters)
print("max proj grad capacity:", f(x1))
x2 = proj_grad_descent(x0, get_constant_step_size(0.1), precision, iters)
print("max proj grad capacity with constant step:", f(x2))
x3 = proj_quasi_newton_method (x0, get_step_size, precision * 10, iters)
print("max proj quasi newton capacity:", f(x3))
x4 = proj_newton_method (x0, get_step_size, precision, iters)
print("max proj newton capacity:", f(x4))

Starting point: [0.3876 0.1979 0.0704 0.1998 0.1444]
The local maximum of proj grad occurs at [0.4689 0.5311 0.     0.     0.    ]
Number of iterations:  4
max proj grad capacity: 0.22969701685043664
The local maximum of proj grad occurs at [0.4689 0.5311 0.     0.     0.    ]
Number of iterations:  87
max proj grad capacity with constant step: 0.22969701680355303
The local maximum of quasi newton occurs at [3.6612e-01 5.1730e-01 7.4156e-02 3.6189e-09 4.2429e-02]
Number of iterations:  5
max proj quasi newton capacity: 0.21870935895421062


LinAlgError: Singular matrix

In [62]:
t_matrix = np.matrix([[0.9, 0.1], [0.1, 0.9]])
t_matrix = np.array(t_matrix)

In [63]:
print(1 + 0.1*np.log2(0.1) + 0.9*np.log2(0.9))

0.5310044064107187


In [64]:
c_matrix = (t_matrix * np.log2(t_matrix)).sum(axis=0)

In [79]:
x0 = np.random.rand(5)
x0 /= x0.sum()
print(x0)
alpha = 0.1
precision = 10**-8
iters = 10**5
print(df(x0))

x1 = proj_grad_descent(x0, get_step_size, precision, iters)
print("max proj grad capacity:", f(x1))
x2 = proj_grad_descent(x0, get_constant_step_size(0.1), precision, iters)
print("max proj grad capacity with constant step:", f(x2))
x3 = proj_quasi_newton_method (x0, get_step_size, precision * 10, iters)
print("max proj quasi newton capacity:", f(x3))
x4 = proj_newton_method (x0, get_step_size, precision, iters)
print("max proj newton capacity:", f(x4))

[0.3141 0.1933 0.0976 0.2305 0.1645]
[-0.8062 -0.7965 -0.2735 -0.8624 -0.7303]
The local maximum of proj grad occurs at [0.5057 0.0247 0.4695 0.     0.    ]
Number of iterations:  27
max proj grad capacity: 0.3703796969965967
The local maximum of proj grad occurs at [0.5057 0.0247 0.4695 0.     0.    ]
Number of iterations:  308
max proj grad capacity with constant step: 0.37037969699658957
The local maximum of quasi newton occurs at [2.8927e-01 1.0367e-01 4.1905e-01 8.8516e-11 1.8802e-01]
Number of iterations:  6
max proj quasi newton capacity: 0.36090216749134485


LinAlgError: Singular matrix

In [80]:
from scipy.optimize import minimize

def g(x):
    return -f(x)
cons = ({'type': 'eq', 'fun': lambda x: x.sum() - 1},
        {'type': 'ineq', 'fun': lambda x: x.min()})
res = minimize(g, x0, constraints = cons)
print(res)

     fun: -0.3702487009987574
     jac: array([1.0692, 1.0864, 1.0746, 1.2656, 1.1128])
 message: 'Optimization terminated successfully.'
    nfev: 136
     nit: 15
    njev: 15
  status: 0
 success: True
       x: array([4.9358e-01, 4.1363e-02, 4.6505e-01, 8.3861e-17, 1.1186e-05])
