In [4]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sps
import tqdm
%matplotlib inline

In [63]:
def euclidean_proj_simplex(v, s=1):
    n, = v.shape  # will raise ValueError if v is not 1-D
    # check if we are already on the simplex
    if v.sum() == s and np.alltrue(v >= 0):
        # best projection: itself!
        return v
    # get the array of cumulative sums of a sorted (decreasing) copy of v
    u = np.sort(v)[::-1]
    cssv = np.cumsum(u)
    # get the number of > 0 components of the optimal solution
    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = (cssv[rho] - s) / (rho + 1.0)
    # compute the projection by thresholding v using theta
    w = (v - theta).clip(min=0)
    return w

In [341]:
# Gradient descent method

def grad_descent(x0, alpha, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    
    while previous_step_size > precision and iters < 1000:
        prev_x = cur_x
        cur_x = np.abs(prev_x + alpha * df(prev_x))
        print(f(cur_x))
        previous_step_size = np.linalg.norm(cur_x - prev_x)
        iters += 1
        
    print("The local maximum of proj grad occurs at", cur_x)
    print("Number of iterations: ", iters)
    return cur_x

def ternary_search(func, left, right, precision):
    if abs(right - left) < precision:
        return (left + right) / 2

    left_third = (2*left + right)/3
    right_third = (left + 2*right)/3

    if func(left_third) < func(right_third):
        return ternary_search(func, left_third, right, precision) 
    else:
        return ternary_search(func, left, right_third, precision)

def get_step_size(x, grad, precision):
    zvalue = f(x)
    def h(alpha):
        return f(euclidean_proj_simplex(x + alpha * grad))
    
    return ternary_search(h, 0, 1000000, precision)
    
def proj_grad_descent(x0, alpha, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    while previous_step_size > precision and iters < max_iters:
        prev_x = cur_x
        alpha = get_step_size(cur_x, df(cur_x), precision)
        print(alpha, f(cur_x), cur_x)
        cur_x = euclidean_proj_simplex(prev_x + alpha * df(prev_x))
        
        previous_step_size = np.linalg.norm(cur_x - prev_x)
        iters += 1
        
    print("The local maximum of proj grad occurs at", cur_x)
    print("Number of iterations: ", iters)
    return cur_x

# Quasi-Newton method

def proj_quasi_newton_method (x0, alpha, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    H = np.eye(np.size(x0))
    while previous_step_size > precision and iters < max_iters:
        prev_x = cur_x
        h = -H @ df(prev_x)
        alpha = get_step_size(cur_x, df(cur_x), precision)
        print(alpha, cur_x, f(cur_x))
        cur_x = euclidean_proj_simplex(prev_x + alpha * h)
        s = np.atleast_2d(cur_x - prev_x).T
        y = np.atleast_2d(df(cur_x) - df(prev_x)).T
        #print(s, y)
        H = H - (H @ y @ y.T @ H) / (y.T @ H @ y) + (s @ s.T) / (y.T @ s)
        previous_step_size = np.linalg.norm(cur_x - prev_x) 
        iters += 1
    
    print("The local maximum of quasi newton occurs at", cur_x)
    print("Number of iterations: ", iters)
    return cur_x

# Newton method 

def proj_newton_method (x0, alpha, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    while previous_step_size > precision and iters < max_iters:
        prev_x = cur_x
        h =  - np.linalg.inv(ddf(prev_x)) @ df(prev_x)
        alpha = get_step_size(cur_x, h, precision)
        cur_x = euclidean_proj_simplex(prev_x + alpha * h)
        previous_step_size = np.linalg.norm(cur_x - prev_x) 
        iters += 1
    
    print("The local maximum of newton occurs at", cur_x)
    print("Number of iterations: ", iters)
    return cur_x

In [342]:
t_matrix = np.random.rand(5,5) 
t_matrix /= t_matrix.sum(axis=1)[:,None]

c_matrix = (t_matrix * np.log2(t_matrix)).sum(axis=0)

In [333]:
#f = lambda x: c_matrix.T @ x - ((t_matrix @ x) * np.log2(t_matrix @ x)).sum(axis=-1)

#df = lambda x: c_matrix.T - (t_matrix.T * (np.log2(t_matrix @ x) + 1)).sum(axis=-1)

def f(x):
    array = (t_matrix @ x) * np.log2(t_matrix @ x)
    return c_matrix @ x - array.sum() 

def df(x):
    k = t_matrix.shape[0]
    array = np.array([t_matrix[i] * (np.log2(t_matrix[i] @ x) + 1) for i in range(k)])
    return c_matrix - array.sum(axis=0)

def ddf(x):
    k = t_matrix.shape[0]
    a = t_matrix @ x
    array = np.array([np.atleast_2d(t_matrix[i]).T @ np.atleast_2d(t_matrix[i]) for i in range(k)])
    result = - np.array([a[i] * array[i] for i in range(k)]).sum(axis=0)
    return result

In [305]:
x0 = np.random.rand(5)
x0 /= x0.sum()
alpha = 0.01
precision = 10**-5
iters = 10**5

x1 = proj_grad_descent(x0, alpha, precision, iters)
print("max proj grad capacity:", f(x1))
x2 = proj_newton_method(x0, alpha, precision, iters)
print("max proj newton capacity:", f(x2))
x3 = proj_quasi_newton_method(x0, alpha, precision, iters)
print("max proj quasi newton capacity:", f(x3))

3.183889024768779 0.1878149646467988 [0.17246696 0.19326059 0.19246463 0.24736365 0.19444416]
0.6447574154632922 0.2529902145555407 [0.43731771 0.38229515 0.17432424 0.0060629  0.        ]
0.30236196354157285 0.2549692344938801 [0.41309623 0.3351782  0.22032477 0.03140079 0.        ]
0.20033908909249531 0.2550786357402699 [0.41028645 0.32130277 0.23240641 0.03600437 0.        ]
0.16814950140508084 0.25510872655663186 [0.40931219 0.31362248 0.23915097 0.03791436 0.        ]
0.1721976011535587 0.2551246393204343 [0.40883667 0.30786091 0.24427793 0.03902449 0.        ]
0.21640982156986088 0.25513786685208784 [0.40858404 0.30247722 0.24913882 0.03979993 0.        ]
0.3528849591204743 0.2551544087627211 [0.40851655 0.29630842 0.25480831 0.04036672 0.        ]
0.8920151498878677 0.255187287393988 [0.40882873 0.28734643 0.26326366 0.04056118 0.        ]
5.087857253207961 0.25531736694305884 [0.41097124 0.26863043 0.28187341 0.03852492 0.        ]
9.535865027967928 0.2576602925310567 [4.355482



In [343]:
t_matrix = np.matrix([[0.9, 0.1], [0.1, 0.9]])
t_matrix = np.array(t_matrix)

In [344]:
print(1 + 0.2*np.log2(0.2) + 0.8*np.log2(0.8))

0.27807190511263774


In [345]:
c_matrix = (t_matrix * np.log2(t_matrix)).sum(axis=0)

In [348]:
x0 = np.array([0.45, 0.55])
x0 /= x0.sum()
print(x0)
alpha = 0.1
precision = 10**-8
iters = 10**5
print(df(x0))

x3 = proj_quasi_newton_method(x0, alpha, precision, iters)
print("max proj quasi grad cap:", f(x3))

[0.45 0.55]
[-0.37183391 -0.55689435]
0.5403639815795979 [0.45 0.55] 0.5263828452309445
31.47103936854503 [0.4 0.6] 0.5124583024673183
0.3943310945706878 [1. 0.] 0.0
The local maximum of quasi newton occurs at [1. 0.]
Number of iterations:  3
max proj quasi grad cap: 0.0


