In [3]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sps
import tqdm
%matplotlib inline

In [52]:
t_matrix = np.random.rand(5,5) 
t_matrix /= t_matrix.sum(axis=1)[:,None]

print(t_matrix)

c_matrix = -(t_matrix * np.log2(t_matrix)).sum(axis=-1)

df = lambda x: -c_matrix.T - (t_matrix * (np.log2(t_matrix) + 1)).sum(axis=-1)

[[0.12262093 0.08957625 0.29903679 0.22751046 0.26125558]
 [0.20821353 0.28187651 0.28598962 0.07636855 0.14755178]
 [0.43080048 0.07232964 0.06015189 0.28996765 0.14675034]
 [0.09371345 0.2694553  0.246365   0.06036239 0.33010386]
 [0.06982142 0.17156186 0.12172916 0.59989012 0.03699744]]


In [53]:
def grad_descent(x0, alpha, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    while previous_step_size > precision and iters < max_iters:
        prev_x = cur_x
        cur_x = cur_x + alpha * df(prev_x)
        previous_step_size = np.linalg.norm(cur_x - prev_x) 
        iters += 1
    
    print("The local maximum occurs at", cur_x)
    
def proj_grad_descent(x0, alpha, precision, max_iters):
    cur_x = x0
    previous_step_size = 1
    iters = 0
    while previous_step_size > precision and iters < max_iters:
        prev_x = cur_x
        cur_x = euclidean_proj_simplex(cur_x + alpha * df(prev_x))
        previous_step_size = np.linalg.norm(cur_x - prev_x) 
        iters += 1
    
    print("The local maximum occurs at", cur_x)
    
def euclidean_proj_simplex(v, s=1):
    n, = v.shape  # will raise ValueError if v is not 1-D
    # check if we are already on the simplex
    if v.sum() == s and np.alltrue(v >= 0):
        # best projection: itself!
        return v
    # get the array of cumulative sums of a sorted (decreasing) copy of v
    u = np.sort(v)[::-1]
    cssv = np.cumsum(u)
    # get the number of > 0 components of the optimal solution
    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = (cssv[rho] - s) / (rho + 1.0)
    # compute the projection by thresholding v using theta
    w = (v - theta).clip(min=0)
    return w

In [54]:
x0 = np.random.rand(5) * 5
alpha = 0.1
precision = 10**-2
iters = 10**5

grad_descent(x0, alpha, precision, iters)
proj_grad_descent(x0, alpha, precision, iters)

The local maximum occurs at [-9997.42239241 -9999.86711875 -9999.36347971 -9999.88287137
 -9998.25518535]
The local maximum occurs at [9.16396466e-01 2.77555756e-17 5.55111512e-17 8.32667268e-17
 8.36035337e-02]
