In [120]:
import numpy as np
import scipy.optimize as spopt

# 3.1 
(i) $$ f(x) = 2x_1^3 - x_1^2 x_2 + 2x_2^2 $$

$$
\nabla f(x) = \begin{bmatrix}
                6x_1^2 - 2x_1x_2 \\
                - x_1^2 + 4x_2
                \end{bmatrix}, \ \ 
\nabla^2 f(x) = \begin{bmatrix}
12x_1 - 2x_2 & -2x_1 \\
-2x_1 & 4 
\end{bmatrix} 
$$

(ii) $$ f(x) = (x_1 - 3)^2 + x_1x_2 + \frac{1}{16}x_2^4 + (x_2 - 1)^2 $$

$$
\nabla f(x) = \begin{bmatrix}
                    2x_1 + x_2 - 6 \\
                    \frac{1}{4}x_2^3 + 2x_2 + x_1 - 2
              \end{bmatrix}, \ \ 
\nabla^2 f(x) = \begin{bmatrix}
                    2 & 1 \\
                    1 & \frac{3}{4}x_2^2 + 2 
                \end{bmatrix} 
$$

(iii) $$ f(x) = \| Ax-b \|_2^2 + \gamma \|x\|_2^2 $$

$$
\nabla f(x) = 2(A'A + \gamma I)x - 2A'b, \ \ 
\nabla^2 f(x) = 2(A'A + \gamma I)
$$

(iv) $$ \sum_{j = 1}^n \log(1 + \exp(-x_j)) $$

$$
\nabla f(x) = \begin{bmatrix}
                    \frac{-\exp(-x_1)}{1 + \exp(-x_1)} \\
                    \vdots \\
                    \frac{-\exp(-x_n)}{1 + \exp(-x_n)}
              \end{bmatrix}, \ \ 
\nabla^2 f(x) = \begin{bmatrix}
                    \frac{\exp(-x_1)}{(1 + \exp(-x_1))^2} &  & O \\
                      & \ddots & \\
                    O &  & \frac{\exp(-x_n)}{(1 + \exp(-x_n))^2}
                \end{bmatrix} 
$$


(v) $$ \log\left( \sum_{j = 1}^n \exp(x_j) \right) $$

$$
\nabla f(x) = \begin{bmatrix}
                    \frac{\exp(x_1)}{\sum_{j = 1}^n \exp(x_j)} \\
                    \vdots \\
                    \frac{\exp(x_n)}{\sum_{j = 1}^n \exp(x_j)}
              \end{bmatrix}, \ \ 
\nabla^2 f(x) = (A_{ij}) = \begin{cases}
                              \frac{\exp(x_i) \left( \sum_{l = 1}^n \exp(x_l) \right)^2 - \exp(x_i) \exp(x_j)}{\left( \sum_{l = 1}^n \exp(x_l) \right)^2} \ \ \text{if} \ \ i = j \\
                              -\frac{\exp(x_i) \exp(x_j)}{\left( \sum_{l = 1}^n \exp(x_l) \right)^2} \ \ \text{if} \ \ i \neq j
                          \end{cases} 
$$


# 3.2

(3.33) は $\min \frac{1}{2}x'Ax - b'x$ なので、 $f(x) = \frac{1}{2}x'Ax - b'x$ とおくと、最適条件では $\nabla f(x) = 0$ が必要となる。

$A$ の対称性より、$\nabla f(x) = 0 \Leftrightarrow Ax = b$

今回、 $A$ は正定値なので、 $Ax = b$ を満たす $x = x^*$ が (3.33) の解となるため、 (3.32) と一致する。

# 3.3


In [121]:
def gradient_descent(x0, df, alpha_gd, n):
    x = x0.copy()
    for i in range(1, n+1):
        x = x - alpha_gd * df(x)
        print(f'iteration {i}: x = {x}')

In [122]:
def newton_method(x0, df, d2f, alpha_nm, n):
    x = x0.copy()
    for i in range(1, n+1):
        f = lambda y: d2f(x) @ y + df(x)
        d = spopt.root(f, np.ones(len(x0)))
        x = x + alpha_nm * d.x
        print(f'iteration {i}: x = {x}')

(i)

In [125]:
n = 2
alpha_gd = 0.1
alpha_nm = 1.0
x0 = np.array([0.5, 0.5])
df = lambda x: np.array([6*(x[0]**2) - 2*x[0]*x[1], -(x[0]**2) + 4*x[1]])
d2f = lambda x: np.array([[12*x[0] - 2*x[1], -2*x[0]], [-2*x[0], 4]])

print("Grandient descent")
gradient_descent(x0, df, alpha_gd, n)

print("Newton method")
newton_method(x0, df, d2f, alpha_nm, n)


Grandient descent
iteration 1: x = [0.4   0.325]
iteration 2: x = [0.33  0.211]
Newton method
iteration 1: x = [ 0.19736842 -0.01315789]
iteration 2: x = [0.09978658 0.00010879]


(ii)

In [126]:
n = 2
alpha_gd = 0.1
alpha_nm = 1.0
x0 = np.array([0.5, 0.5])
df = lambda x: np.array([2*x[0] + x[1] - 6, x[0] + (1/4)*(x[1]**3) + 2*x[1] - 2])
d2f = lambda x: np.array([[2, 1], [1, (3/4)*(x[1]**2) + 2]])

print("Grandient descent")
gradient_descent(x0, df, alpha_gd, n)

print("Newton method")
newton_method(x0, df, d2f, alpha_nm, n)

Grandient descent
iteration 1: x = [0.95     0.546875]
iteration 2: x = [1.3053125  0.53841112]
Newton method
iteration 1: x = [ 3.27777778 -0.55555556]
iteration 2: x = [ 3.31352743 -0.62705486]


# 3.4
(i)

\begin{align*}
    & \min \ \frac{1}{2}\| Ax - b \|_2^2 \\
    &  \ \text{s.t. } \ x \geq 0
\end{align*}

\begin{align*}
    & A'Ax - A'b - \lambda = 0, \\
    & \lambda'x = 0, \ x \geq 0, \ \lambda \geq 0
\end{align*}

(ii)

\begin{align*}
    & \min \ \sum_{j = 1}^n x_j \log x_j\\
    & \text{s.t. } \ \sum_{j = 1}^n x_j = 1
\end{align*}

\begin{align*}
    & \log x_i - \lambda_i + \mu = 0 \ \ \text{for} \ \ i = 1, \cdots, n, \\
    & \sum_{j = 1}^n x_j  - 1 = 0, \\
    & \lambda_i x_i = 0, \ x_i \geq 0, \ \lambda_i \geq 0 \ \ \text{for} \ \ i = 1, \cdots, n 
\end{align*}

# 3.5

\begin{align*}
    & \min \ x_2 \\
    &  \ \ \text{s.t. } \ (x_1 + 1)^2 + x_2^2 \leq 1 \\
    &  \ \ \ \ \ \ \ \ \ \ (x_1 - 1)^2 + x_2^2 \leq 1
\end{align*}

KKT条件は、
\begin{align*}
    & 2\lambda_1(x_1 + 1) + 2\lambda_2(x_1 - 1) = 0, \\
    & 1 + 2\lambda_1x_2 + 2\lambda_2x_2 = 0, \\
    & \lambda_1\left\{ (x_1 + 1)^2 + x_2^2 - 1 \right\} = 0, \\
    & \lambda_2\left\{ (x_1 - 1)^2 + x_2^2 - 1 \right\} = 0, \\
    & \lambda_1 \geq 0, \ \lambda_2 \geq 0
\end{align*}

最適解は $(0, 0)$ だが、このとき制約式の勾配は両式ともに $(1, 0)'$ になり、KKT条件を満たす $\lambda_1, \ \lambda_2$ は存在しない。


# 3.6

目的関数を $x_l$ についてまとめると、定数 $C$ を用いて



$$
\frac{1}{2}x_l^2a_l'a_l + \sum_{j \neq i}x_lx_ja_l'a_j - b'x_la_l + \gamma |x_l| + C  =
        \begin{cases}
            \frac{1}{2}x_l^2a_l'a_l + \sum_{j \neq i}x_lx_ja_j'a_l - b'x_la_l + \gamma x_l + C \ \ \text{if} \ \ x_l \geq 0 \\
            \frac{1}{2}x_l^2a_l'a_l + \sum_{j \neq i}x_lx_ja_j'a_l - b'x_la_l - \gamma x_l + C \ \ \text{if} \ \ x_l \leq 0
        \end{cases}
$$
これは、二次関数の場合分け問題に帰着するので、最小値をとる $x_l$ が不等式制約を満たすか場合分けすれば、
$$
x_l =
    \begin{cases}
        \frac{1}{\| a_l \|_2^2} \left( \left( b - \sum_{j \neq i}x_ja_j \right)' a_l - \gamma \right) \ \ & \text{if} \ \ \left( b - \sum_{j \neq i}x_ja_j \right)' a_l - \gamma > 0 \\
        0 & \text{if} \ \ - \gamma \leq \left( b - \sum_{j \neq i}x_ja_j \right)' a_l \leq \gamma \\
        \frac{1}{\| a_l \|_2^2} \left( \left( b - \sum_{j \neq i}x_ja_j \right)' a_l + \gamma \right)  \ \ & \text{if} \ \ \left( b - \sum_{j \neq i}x_ja_j \right)' a_l + \gamma <> 0
    \end{cases}
$$