# Градиентный спуск

In [122]:
import numpy as np

In [123]:
def f(X, Thetta):
    return X @ Thetta

In [213]:
# Кол-во объектов
cnt_obj = 5

# Матрица Объекты-Признаки
X = np.array([[0, 1, 1],
              [1, 1, 1],
              [1, 0, 1],
              [-0.5, 0.5, 1],
              [0, -0.5, 1]])

# Таргет
Y = np.array([1., 1., 1., -1., -1.])

# Начальные веса
theta_0 = np.array([1., 1., 0.])


$f(x,\theta) = x_1\theta_1 + x_2\theta_2 + \theta_3 = \theta^TX$, Где $X = (x_1, x_2, 1)$ 

Градиент функции ошибки можно представить как:
$$\nabla_\theta L(\theta) = \nabla_\theta L_1(\theta) + \nabla_\theta L_2(\theta)$$
Где
$$L_1(\theta) = 0.1\|\theta\|^2 = 0.1\cdot \theta^T\theta$$
$$L_2(\theta) = \frac{1}{N}\sum_{i=1}^Nmax(0, 1-y_if(x_i,\theta)) = \frac{1}{N}\sum_{i=1}^Nmax(0, 1-y_i\theta^TX_i)$$
Получаем, что
$$\nabla_\theta L_1(\theta) = 0.2\cdot \theta$$
$$\nabla_\theta L_2(\theta) = \frac{1}{N}\sum_{i=1}^N\begin{cases}
  0,  & 1-y_i\theta^TX_i < 0 \\
  -y_iX_i, & 1-y_i\theta^TX_i \geq 0
\end{cases}$$
В итоге градиент функции ошибки равен:
$$\nabla_\theta L(\theta) = 0.2*\theta + \frac{1}{N}\sum_{i=1}^N\begin{cases}
  0,  & 1-y_i\theta^TX_i < 0 \\
  -y_iX_i, & 1-y_i\theta^TX_i \geq 0
\end{cases}$$


In [231]:
num_steps = 100
step_size = 0.01

Градиентный спуск
$\theta^{n} = \theta^{n-1} - \alpha_i\cdot\nabla_\theta L(\theta^{n-1})$

In [232]:
theta = theta_0.copy()
theta_list = [theta.copy()]

for _ in range(num_steps):
    summ = np.array([0., 0., 0.])
    for j in range(cnt_obj):
        if (1-Y[j]*(theta @ X[j])) > 0:
            summ += Y[j] * X[j]
            
    summ = 1/cnt_obj * summ # Градиент L_2
    theta -= step_size * (0.2*theta-summ)
    theta_list.append(theta.copy())

theta_list = np.array(theta_list)
theta_list

array([[ 1.        ,  1.        ,  0.        ],
       [ 0.999     ,  0.998     , -0.004     ],
       [ 1.000002  ,  0.998004  , -0.003992  ],
       [ 1.001002  ,  0.99800799, -0.00398402],
       [ 1.00199999,  0.99801198, -0.00397605],
       [ 1.00299599,  0.99801595, -0.0039681 ],
       [ 1.00399   ,  0.99801992, -0.00396016],
       [ 1.00298202,  0.99802388, -0.00595224],
       [ 1.00397606,  0.99802783, -0.00594033],
       [ 1.0049681 ,  0.99803178, -0.00592845],
       [ 1.00595817,  0.99803571, -0.0059166 ],
       [ 1.00494625,  0.99803964, -0.00790476],
       [ 1.00593636,  0.99804356, -0.00788895],
       [ 1.00692449,  0.99804748, -0.00787318],
       [ 1.00791064,  0.99805138, -0.00785743],
       [ 1.00689482,  0.99805528, -0.00984172],
       [ 1.00788103,  0.99805917, -0.00982203],
       [ 1.00886526,  0.99806305, -0.00980239],
       [ 1.00984753,  0.99806692, -0.00978278],
       [ 1.00882784,  0.99807079, -0.01176322],
       [ 1.00981018,  0.99807465, -0.011

In [235]:
loss = 0
summ = 0
for j in range(cnt_obj):
    if (1-Y[j]*(theta @ X[j])) > 0:
        summ += 1-Y[j]*(theta @ X[j])
        
    summ = 1/cnt_obj * summ
    loss += 0.1*(sum(theta**2))+summ
print(f"loss {loss}")

loss 1.3722376627818789


In [234]:
f(X[0], theta)

0.9513221409776224