<div style="font-size:18pt; padding-top:20px; text-align:center; line-height: 1.5;">СЕМИНАР. <b>Оптимизация. Часть 1.</b> Метод наименьших квадратов, градиентный спуск и <span style="font-weight:bold; color:green">NumPy/SciPy</span></div><hr>
<div style="text-align:right;">Папулин С.Ю. <span style="font-style: italic;font-weight: bold;">(papulin.study@yandex.ru)</span></div>

<a name="0"></a>
<div><span style="font-size:14pt; font-weight:bold">Содержание</span>
    <ol>
        <li><a href="#1">Постановка задачи</a></li>
        <li><a href="#2">Производная</a></li>
        <li><a href="#3">Метод наименьших квадратов</a>
            <ol style = "list-style-type:lower-alpha">
                <li><a href="#3a">Матричная форма записи системы уравнений</a></li>
                <li><a href="#3b">Метод наименьших квадратов</a></li>
                <li><a href="#3c">Метод наименьших квадратов в задаче линейной регрессии</a></li>
            </ol>
        </li>
        <li><a href="#4">Градиентный спуск</a>
            <ol style = "list-style-type:lower-alpha">
                <li><a href="#4a">Градиентный спуск</a></li>
                <li><a href="#4b">Градиентный спуск в задаче линейной регрессии</a></li>
            </ol>
        </li>
    </ol>
</div>

<a name="1"></a>
<div style="display:table; width:100%; padding-top:10px; padding-bottom:10px; border-bottom:1px solid lightgrey">
    <div style="display:table-row">
        <div style="display:table-cell; width:80%; font-size:14pt; font-weight:bold">1. Постановка задачи</div>
    	<div style="display:table-cell; width:20%; text-align:center; background-color:whitesmoke; border:1px solid lightgrey"><a href="#0">К содержанию</a></div>
    </div>
</div>

In [None]:
import numpy as np
import pandas as pnd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
%matplotlib inline

<p>Создаем DataFrame с исходными данными</p>

In [None]:
df1 = pnd.read_csv("SAT_GPA.csv", sep=" ")
df1.head(5)

In [None]:
sx = df1["high_GPA"]
sy = df1["univ_GPA"]

<p>Отображаем исходные данные</p>

In [None]:
plt.figure("1", figsize=[10, 6])

ax = plt.subplot(1,1,1)

plt.scatter(sx, sy)
plt.xlabel("High_GPA")
plt.ylabel("Univ_GPA")
plt.axis([2, 4, 2, 4])
plt.grid(True)
ax.set_axisbelow(True) 

xA = df1.loc[36, "high_GPA"]
yA = df1.loc[36, "univ_GPA"]
xyA = "$("+str(xA)+","+str(yA)+")$"
plt.annotate(xyA, xy=(xA, yA), xytext=(50, -100), xycoords='data', textcoords='offset points', 
             arrowprops=dict(arrowstyle="->", connectionstyle="arc,angleA=0,armA=0,angleB=-90,armB=15,rad=7"),)
plt.show()

In [None]:
slope_1 = 2.0; slope_2 = 0.7; slope_3 = 0.4
intercept_1 = -3.0; intercept_2 = 1.1; intercept_3 = 2.0

In [None]:
f_pred_1 = lambda x: slope_1 * x + intercept_1
f_pred_2 = lambda x: slope_2 * x + intercept_2
f_pred_3 = lambda x: slope_3 * x + intercept_3

In [None]:
x_line = np.array([1.5, 4.5])

plt.figure("2", figsize=[10, 6])

ax = plt.subplot(1,1,1)

plt.scatter(sx, sy, label="(High_GPA-Univ_GPA)")
plt.plot(x_line, f_pred_1(x_line), "-", label="$0.9*x+0.3$")
plt.plot(x_line, f_pred_2(x_line), "-", label="$0.7*x+1.1$")
plt.plot(x_line, f_pred_3(x_line), "-", label="$0.4*x+2.0$")

plt.xlabel("High_GPA")
plt.ylabel("Univ_GPA")

plt.axis([2, 4, 2, 4])

plt.grid(True)
ax.set_axisbelow(True)

plt.legend()

xA = df1.loc[36, "high_GPA"]
yA = df1.loc[36, "univ_GPA"]
xyA = "$("+str(xA)+","+str(yA)+")$"
plt.annotate(xyA, xy=(xA, yA), xytext=(50, -100), xycoords='data', textcoords='offset points', 
             arrowprops=dict(arrowstyle="->", connectionstyle="arc,angleA=0,armA=0,angleB=-90,armB=15,rad=7"),)
plt.show()

Какая функция лучше определяет зависимость между High_GPA и Univ_GPA?

In [None]:
y_pred_1 = f_pred_1(sx)
y_pred_2 = f_pred_2(sx)
y_pred_3 = f_pred_3(sx)

In [None]:
err_1 = ((y_pred_1 - sy)**2).sum()
err_2 = ((y_pred_2 - sy)**2).sum()
err_3 = ((y_pred_3 - sy)**2).sum()

err_1, err_2, err_3

$$\hat w_0, \hat w_1 =\operatorname*{arg\,min}_{w_0, w_1} 
\displaystyle\sum_{i=1}^{N} ((w_0+w_1 x_i) - y_i)^2$$

<a name="2"></a>
<div style="display:table; width:100%; padding-top:10px; padding-bottom:10px; border-bottom:1px solid lightgrey">
    <div style="display:table-row">
        <div style="display:table-cell; width:80%; font-size:14pt; font-weight:bold">2. Производная</div>
    	<div style="display:table-cell; width:20%; text-align:center; background-color:whitesmoke; border:1px solid lightgrey"><a href="#0">К содержанию</a></div>
    </div>
</div>

In [None]:
from scipy.misc import derivative

In [None]:
x0 = -4
f = lambda x: x**2

In [None]:
df_x0 = derivative(f, x0, n=1)
df_x0

In [None]:
ddf_x0 = derivative(f, x0, n=2)
ddf_x0

In [None]:
x_st = -4
x_end = 5
step = 1

x = np.arange(x_st, x_end, step)
x

In [None]:
df = derivative(f, x, n=1)
df

In [None]:
ddf = derivative(f, x, n=2)
ddf

In [None]:
f_neg = lambda x: - x**2

In [None]:
df_neg = derivative(f_neg, x, n=1)
ddf_neg = derivative(f_neg, x, n=2)

df_neg, ddf_neg

In [None]:
plt.figure("4", figsize=[15,6])

ax1 = plt.subplot(1,2,1)

plt.plot(x, f(x), "-o", label="$f(x)=x^2$")
plt.plot(x, df, "-o", label="$f'(x)$")
plt.plot(x, ddf, "-o", label="$f''(x)$")

plt.title("$f(x)=x^2$")

plt.xlabel("x")
plt.ylabel("y")

plt.grid(True)

plt.legend()

ax2 = plt.subplot(1,2,2)

plt.plot(x, f_neg(x), "-o",  label="$f_{neg}(x)=-x^2$")
plt.plot(x, df_neg, "-o", label="$f'_{neg}(x)$")
plt.plot(x, ddf_neg, "-o", label="$f''_{neg}(x)$")

plt.title("$f(x)=-x^2$")

plt.xlabel("x")
plt.ylabel("y")

plt.grid(True)

plt.legend()

plt.show()

In [None]:
x = np.arange(-10, 10, 0.1)
f = lambda x: x**2 + 10 * np.sin(x)

In [None]:
df = derivative(f, x, n=1)
ddf = derivative(f, x, n=2)

In [None]:
indx = np.where(np.logical_and(df >= -0.35, df <= 0.25))

In [None]:
plt.figure("4", figsize=[8,6])

ax1 = plt.subplot(1,1,1)

plt.plot(x, f(x), "-", label="$f(x)=x^2$")
plt.plot(x, df, "-", label="$f'(x)$")
plt.plot(x, ddf, "-", label="$f''(x)$")

plt.plot(x[indx], f(x[indx]), "o", color="darkblue")

plt.vlines(x[indx], ymin=-20, ymax=100, color="grey", linestyle="dashed", linewidth=1)

plt.title("$f(x)=x^2$")

plt.xlabel("x")
plt.ylabel("y")

plt.grid(True)

plt.legend()

plt.show()

<a name="3"></a>
<div style="display:table; width:100%; padding-top:10px; padding-bottom:10px; border-bottom:1px solid lightgrey">
    <div style="display:table-row">
        <div style="display:table-cell; width:80%; font-size:14pt; font-weight:bold">3. Метод наименьших квадратов</div>
    	<div style="display:table-cell; width:20%; text-align:center; background-color:whitesmoke; border:1px solid lightgrey"><a href="#0">К содержанию</a></div>
    </div>
</div>

<a name = "3a"></a>
<div style = "display:table; width:100%">
    <div style = "display:table-row">
        <div style = "display:table-cell; width:80%; font-style:italic; font-weight:bold; font-size:12pt">
            a. Матричная форма записи системы уравнений
        </div>
        <div style="display:table-cell; border:1px solid lightgrey; width:20%">
            <div style = "display:table-cell; width:10%; text-align:center; background-color:whitesmoke;">
                <a href="#3">Назад</a>
            </div>
            <div style = "display:table-cell; width:10%; text-align:center;">
                <a href="#3b">Далее</a>
            </div>
        </div>
    </div>
</div>

<a name = "3b"></a>
<div style = "display:table; width:100%">
    <div style = "display:table-row">
        <div style = "display:table-cell; width:80%; font-style:italic; font-weight:bold; font-size:12pt">
            b. Метод наименьших квадратов
        </div>
        <div style="display:table-cell; border:1px solid lightgrey; width:20%">
            <div style = "display:table-cell; width:10%; text-align:center; background-color:whitesmoke;">
                <a href="#3a">Назад</a>
            </div>
            <div style = "display:table-cell; width:10%; text-align:center;">
                <a href="#3c">Далее</a>
            </div>
        </div>
    </div>
</div>

<a name = "3c"></a>
<div style = "display:table; width:100%">
    <div style = "display:table-row">
        <div style = "display:table-cell; width:80%; font-style:italic; font-weight:bold; font-size:12pt">
            c. Метод наименьших квадратов в задаче линейной регрессии
        </div>
        <div style="display:table-cell; border:1px solid lightgrey; width:20%">
            <div style = "display:table-cell; width:10%; text-align:center; background-color:whitesmoke;">
                <a href="#3b">Назад</a>
            </div>
            <div style = "display:table-cell; width:10%; text-align:center;">
                <a href="#4">Далее</a>
            </div>
        </div>
    </div>
</div>

In [None]:
X = np.asmatrix(np.c_[df1["high_GPA"].values, np.ones(df1["high_GPA"].size)])
X[:5,:2]

In [None]:
y = np.asmatrix(df1["univ_GPA"].values.reshape(df1["univ_GPA"].values.size, 1))
y[:5]

In [None]:
w = (X.T * X)**(-1) * X.T * y

# Вариант 2
# w = np.linalg.inv(X.T * X) * X.T * y
w

In [None]:
# NumPy linalg
np.linalg.lstsq(X, y)

In [None]:
w0 = w[1,0]
w1 = w[0,0]

In [None]:
f_pred_ols = lambda x: w1 * x + w0

In [None]:
x_line = np.array([1.5, 4.5])

plt.figure("3", figsize=[10, 6])

ax = plt.subplot(1,1,1)

plt.scatter(sx, sy, label="(High_GPA-Univ_GPA)")
plt.plot(x_line, f_pred_1(x_line), "-", label="$f_1(x)=0.9*x+0.3$")
plt.plot(x_line, f_pred_2(x_line), "-", label="$f_2(x)=0.7*x+1.1$")
plt.plot(x_line, f_pred_3(x_line), "-", label="$f_3(x)=0.4*x+2.0$")
plt.plot(x_line, f_pred_ols(x_line), "-", linewidth=4, 
         label="$f_{OLS}(x)$")

plt.xlabel("High_GPA")
plt.ylabel("Univ_GPA")

plt.axis([2, 4, 2, 4])

plt.grid(True)
ax.set_axisbelow(True) 

plt.legend()

plt.show()

In [None]:
err_ols = ((f_pred_ols(sx) - sy)**2).sum()
err_ols

In [None]:
err_1, err_2, err_3, err_ols

<a name="4"></a>
<div style="display:table; width:100%; padding-top:10px; padding-bottom:10px; border-bottom:1px solid lightgrey">
    <div style="display:table-row">
        <div style="display:table-cell; width:80%; font-size:14pt; font-weight:bold">4. Градиентный спуск</div>
    	<div style="display:table-cell; width:20%; text-align:center; background-color:whitesmoke; border:1px solid lightgrey"><a href="#0">К содержанию</a></div>
    </div>
</div>

<a name = "4a"></a>
<div style = "display:table; width:100%">
    <div style = "display:table-row">
        <div style = "display:table-cell; width:80%; font-style:italic; font-weight:bold; font-size:12pt">
            a. Градиентный спуск
        </div>
        <div style="display:table-cell; border:1px solid lightgrey; width:20%">
            <div style = "display:table-cell; width:10%; text-align:center; background-color:whitesmoke;">
                <a href="#4">Назад</a>
            </div>
            <div style = "display:table-cell; width:10%; text-align:center;">
                <a href="#4b">Далее</a>
            </div>
        </div>
    </div>
</div>

$$ \mathbf{x}^{(i+1)} = \mathbf{x}^{(i)}-\alpha \cdot \bigtriangledown f \left( \mathbf{x}\right)$$

<p>Функция с одной переменной</p>

$$f(x) = x^2 + 10 \sin(x)$$
$$f^{'}(x) = 2x + 10 \cos(x)$$

In [None]:
#Функция
def f(x):
    return x**2 + 10 * np.sin(x)

#Производная
def df(x):
    return 2*x + 10 * np.cos(x)

#Значения аргумента
x = np.arange(-10, 10, 0.1)

In [None]:
plt.figure("1")

plt.grid(True)
plt.plot(x, f(x))
plt.plot(x, df(x))
plt.title("$f(x) = x^2+10\sin(x)$")
plt.xlabel("x")
plt.ylabel("f(x)")
plt.legend(("$f(x)$", "$f^{\prime}(x)$"), loc="lower right")
plt.grid(True)

plt.show()

<p>Исследование влияния значения коэффициента альфа</p>

<p><b><i>Начальная точка 1</i></b></p>

$$x_0 = -8$$
$$\alpha \in \{ 0.02, 0.05, 0.1, 0.2, 0.4, 0.6\}$$
$$err_{min} = 10^{-3}$$
$$iteration_{max} = 20$$

<img src="img/gd-one-var-alpha-left.png">

<p><b><i>Начальная точка 2</i></b></p>

$$x_0 = 8$$
$$\alpha \in \{ 0.02, 0.05, 0.1, 0.2, 0.4, 0.6\}$$
$$err_{min} = 10^{-3}$$
$$iteration_{max} = 20$$

<img src="img/gd-one-var-alpha-right.png">

<p>Функция с двумя переменными</p>

$$f(x_1, x_2) = 2x_1^2 + x_2^2 + x_1x_2$$
$$\frac {\partial f(x_1, x_2)}{\partial x_1}  = 4x_1 + x_2$$
$$\frac {\partial f(x_1, x_2)}{\partial x_1}  = 2x_2 + x_1$$

In [None]:
f = lambda x1, x2: 2*x1**2 + x2**2 +x1*x2 #Функция
dfx1 = lambda x1, x2: 4*x1 + x2 #Производная по x1
dfx2 = lambda x1, x2: 2*x2 + x1 #Производная по x2

coord_x1 = np.arange(-4, 5, 0.1) #Значения x c шагом 1
coord_x2 = np.arange(-4, 5, 0.1) #Значения x c шагом 1

x1, x2 = np.meshgrid(coord_x1, coord_x2)

In [None]:
fig = plt.figure(1, figsize=(10, 10))

ax0 = fig.add_subplot(2, 2, 1, projection="3d")
ax0.plot_surface(x1, x2, f(x1,x2), rstride=1, cstride=1, cmap=cm.coolwarm,
                       linewidth=0, antialiased=True)
ax0.set_xlabel("$x_1$")
ax0.set_ylabel("$x_2$")
ax0.set_zlabel("$f(x_1,x_2)$")

ax1 = plt.subplot(2,2,2)
cf = ax1.contourf(x1, x2, f(x1,x2), 50, alpha=0.5, cmap=cm.coolwarm)
plt.colorbar(cf)
ax1.set_xlabel("$x_1$")
ax1.set_ylabel("$x_2$")

ax2 = plt.subplot(2,2,3)
ax2.set_xlabel("$x_1$")
ax2.set_ylabel("$x_2$")
ax2.quiver(x1[0::5, 0::5], x2[0::5, 0::5], 
           dfx1(x1[0::5, 0::5],x2[0::5, 0::5]), dfx2(x1[0::5, 0::5],x2[0::5, 0::5]), scale=100)

ax3 = plt.subplot(2,2,4)
ax3.set_xlabel("$x_1$")
ax3.set_ylabel("$x_2$")
ax3.contourf(x1, x2, f(x1,x2), 50, cmap=cm.coolwarm)
ax3.quiver(x1[0::5, 0::5], x2[0::5, 0::5], 
           dfx1(x1[0::5, 0::5],x2[0::5, 0::5]), dfx2(x1[0::5, 0::5],x2[0::5, 0::5]), scale=100)

plt.tight_layout()

plt.show()

<p>Исследование влияния значения коэффициента альфа</p>

<p><b><i>Начальная точка 1</i></b></p>

$$x_{1,0} = 3$$
$$x_{2,0} = 0$$
$$\alpha \in \{ 0.02, 0.05, 0.1, 0.2, 0.3, 0.45\}$$
$$err_{min} = 10^{-3}$$
$$iteration_{max} = 20$$

<img src="img/gd-two-var-alpha-right.png">

<p><b><i>Начальная точка 2</i></b></p>

$$x_{1,0} = -3$$
$$x_{2,0} = -2$$
$$\alpha \in \{ 0.02, 0.05, 0.1, 0.2, 0.3, 0.45\}$$
$$err_{min} = 10^{-3}$$
$$iteration_{max} = 20$$

<img src="img/gd-two-var-alpha-left.png">

<a name="4b"></a>
<div style="display:table; width:100%">
    <div style="display:table-row">
        <div style="display:table-cell; width:80%; font-style:italic; font-weight:bold; font-size:12pt">
            b. Градиентный спуск в задаче линейной регрессии
        </div>
        <div style="display:table-cell; border:1px solid lightgrey; width:20%">
            <div style="display:table-cell; width:10%; text-align:center; background-color:whitesmoke;">
                <a href="#4b">Назад</a>
            </div>
            <div style="display:table-cell; width:10%; text-align:center;">
                <a href="#5">Далее</a>
            </div>
        </div>
    </div>
</div>

In [None]:
sx = df1["high_GPA"]
sy = df1["univ_GPA"]

<p>Определяем исходную функцию потерь и её частные производные</p>

In [None]:
# Количество элементов в выборке
n = len(sx)

# Функция потерь (ошибки)  
err = lambda x, y, w0, w1: 1 / n * sum([(w1 * x[i] + w0 - y[i]) ** 2 for i in range(n)])

# Частные производные от функции потерь
derr_w0 = lambda x, y, w0, w1: 2 / n * sum([1 * (w1 * x[i] + w0 - y[i]) for i in range(n)])
derr_w1 = lambda x, y, w0, w1: 2 / n * sum([x[i] * (w1 * x[i] + w0 - y[i]) for i in range(n)])

<p>Задаем критерии поиска</p>

In [None]:
# Критерии поиска
iter_num = 200
min_err = 0.0001

# Параметр альфа
alpha = 0.05

<p>Выполняем градиентный спуск для поиска коэффициентов линейной регрессии</p>

In [None]:
i = 0
w0_gd = 0; w1_gd = 0
w0_prev = 4; w1_prev = 4

while i < iter_num:

    w0_gd = w0_prev - alpha * derr_w0(sx, sy, w0_prev, w1_prev)
    w1_gd = w1_prev - alpha * derr_w1(sx, sy, w0_prev, w1_prev)

    if abs(err(sx, sy, w0_gd, w1_gd) - err(sx, sy, w0_prev, w1_prev)) <= min_err:
        break

    w0_prev = w0_gd
    w1_prev = w1_gd

    i += 1

w0_gd, w1_gd, i

<p>Задаем линию регрессии с учетом найденных коэффициентов</p>

In [None]:
f_pred_gd = lambda x: w1_gd * x + w0_gd

<p>Отображаем линию регрессии</p>

In [None]:
x_line = np.array([1.5, 4.5])

plt.figure("3", figsize=[10, 6])

ax = plt.subplot(1,1,1)

plt.scatter(sx, sy, label="(High_GPA-Univ_GPA)")
plt.plot(x_line, f_pred_1(x_line), "-", label="$f_1(x)=0.9*x+0.3$")
plt.plot(x_line, f_pred_2(x_line), "-", label="$f_2(x)=0.7*x+1.1$")
plt.plot(x_line, f_pred_3(x_line), "-", label="$f_3(x)=0.4*x+2.0$")
plt.plot(x_line, f_pred_ols(x_line), "-", linewidth=4, 
         label="$f_{OLS}(x)$")
plt.plot(x_line, f_pred_gd(x_line), "-", linewidth=2, color="black",
         label="$f_{GD}(x)$")

plt.xlabel("High_GPA")
plt.ylabel("Univ_GPA")

plt.axis([2, 4, 2, 4])

plt.grid(True)
ax.set_axisbelow(True) 

plt.legend()

plt.show()

<p>Определяем ошибку при градиентном спуске</p>

In [None]:
err_gd = ((f_pred_gd(sx) - sy)**2).sum()
err_gd

<p>Выводим все значения полученных ошибок</p>

In [None]:
err_1, err_2, err_3, err_ols, err_gd

<p>Построим контурные графики для функции потерь (ошибки)</p>

In [None]:
coord_w0 = np.arange(-4, 5, 0.1) #Значения x c шагом 0.1
coord_w1 = np.arange(-4, 5, 0.1) #Значения x c шагом 0.1

W0, W1 = np.meshgrid(coord_w0, coord_w1)

In [None]:
coord_w0_large = np.arange(-400, 500, 1)
coord_w1_large = np.arange(-400, 500, 1)

W0_large, W1_large = np.meshgrid(coord_w0_large, coord_w1_large)

In [None]:
plt.figure("12",figsize=[12, 4])

plt.subplot(1,2,1)
plt.contour(W0, W1, err(sx, sy, W0, W1), 20, cmap=cm.bwr, alpha=0.5)
plt.grid(True)
plt.colorbar()

plt.subplot(1,2,2)
plt.contour(W0_large, W1_large, err(sx, sy, W0_large, W1_large), 20, cmap=cm.bwr, alpha=0.5)
plt.grid(True)
plt.colorbar()

plt.show()

<p>Стандартизируем иcходные значения выборки</p>

In [None]:
# По x
sx_mean = sx.mean()
sx_std = sx.std()

sx_stand = sx.apply(lambda el: (el - sx_mean) / sx_std)
sx_stand.head(5)

In [None]:
# По y
sy_mean = sy.mean()
sy_std = sy.std()

sy_stand = sy.apply(lambda el: (el - sy_mean) / sy_std)
sy_stand.head(5)

In [None]:
x_line = np.array([1.5, 4.5])

plt.figure("3", figsize=[12, 4])

ax = plt.subplot(1,2,1)

plt.scatter(sx, sy, label="(High_GPA-Univ_GPA)")
plt.title("Initial")
plt.xlabel("High_GPA")
plt.ylabel("Univ_GPA")

plt.axis([2, 4, 2, 4])

plt.grid(True)
ax.set_axisbelow(True) 

plt.legend()

ax = plt.subplot(1,2,2)

plt.scatter(sx_stand, sy_stand, label="(High_GPA-Univ_GPA)")
plt.title("Standardized")
plt.xlabel("High_GPA")
plt.ylabel("Univ_GPA")

plt.grid(True)
ax.set_axisbelow(True) 

plt.legend()


plt.show()

In [None]:
plt.figure("12", figsize=[12, 4])

plt.subplot(1,2,1)
plt.contour(W0, W1, err(sx, sy, W0, W1), 20, cmap=cm.bwr, alpha=0.5)
plt.title("Initial")
plt.xlabel("w0")
plt.ylabel("w1")
plt.grid(True)
plt.colorbar()

plt.subplot(1,2,2)
plt.contour(W0, W1, err(sx_stand, sy_stand, W0, W1), 20, cmap=cm.bwr, alpha=0.5)
plt.title("Standardized")
plt.xlabel("w0")
plt.ylabel("w1")
plt.grid(True)
plt.colorbar()

plt.show()

In [None]:
# Критерии поиска
iter_num_stand = 100
min_err_stand = 0.0001

# Параметр альфа
alpha_stand = 0.05

In [None]:
i = 0
w0_gd_stand = 0; w1_gd_stand = 0
w0_prev = 4; w1_prev = 4

while i < iter_num_stand:

    w0_gd_stand = w0_prev - alpha_stand * derr_w0(sx_stand, sy_stand, w0_prev, w1_prev)
    w1_gd_stand = w1_prev - alpha_stand * derr_w1(sx_stand, sy_stand, w0_prev, w1_prev)

    if abs(err(sx_stand, sy_stand, w0_gd_stand, w1_gd_stand) - err(sx_stand, sy_stand, w0_prev, w1_prev)) <= min_err_stand:
        break

    w0_prev = w0_gd_stand
    w1_prev = w1_gd_stand

    i += 1

w0_gd_stand, w1_gd_stand, i

In [None]:
f_pred_gr_stand = lambda x: w1_gd_stand * x + w0_gd_stand

In [None]:
x_line_stand = np.array([-2, 2])

plt.figure("3", figsize=[10, 6])

ax = plt.subplot(1,1,1)

plt.scatter(sx_stand, sy_stand, label="(High_GPA-Univ_GPA)")
plt.plot(x_line_stand, f_pred_gr_stand(x_line_stand), "-", linewidth=2, color="black",
         label="$f_{StndGD}(x)$")

plt.xlabel("High_GPA")
plt.ylabel("Univ_GPA")

plt.grid(True)
ax.set_axisbelow(True) 

plt.legend()

plt.show()

In [None]:
f_pred_gr_stand_recover = lambda x: sy_std * (w1_gd_stand / sx_std * (x - sx_mean)) + sy_mean

In [None]:
x_line = np.array([1.5, 4.5])

plt.figure("3", figsize=[10, 6])

ax = plt.subplot(1,1,1)

plt.scatter(sx, sy, label="(High_GPA-Univ_GPA)")
plt.plot(x_line, f_pred_1(x_line), "-", label="$f_1(x)=0.9*x+0.3$")
plt.plot(x_line, f_pred_2(x_line), "-", label="$f_2(x)=0.7*x+1.1$")
plt.plot(x_line, f_pred_3(x_line), "-", label="$f_3(x)=0.4*x+2.0$")
plt.plot(x_line, f_pred_ols(x_line), "-", linewidth=4, 
         label="$f_{OLS}(x)$")
plt.plot(x_line, f_pred_gd(x_line), "-", linewidth=2, color="black",
         label="$f_{GD}(x)$")
plt.plot(x_line, f_pred_gr_stand_recover(x_line), "-", linewidth=2, color="cyan",
         label="$f_{StdGD}(x)$")

plt.xlabel("High_GPA")
plt.ylabel("Univ_GPA")

plt.axis([2, 4, 2, 4])

plt.grid(True)
ax.set_axisbelow(True) 

plt.legend()

plt.show()

In [None]:
err_gr_stand = ((f_pred_gr_stand_recover(sx) - sy)**2).sum()
err_gr_stand

In [None]:
err_1, err_2, err_3, err_ols, err_gd, err_gr_stand

<p>Грубая проверка перебором</p>

In [None]:
f(W0, W1).min(), f_stand(W0, W1).min()

In [None]:
inxd_ws = np.argwhere(f(W0, W1).min() == f(W0, W1))
inxd_ws

In [None]:
W0[inxd_ws[0,0], inxd_ws[0,1]], W1[inxd_ws[0,0], inxd_ws[0,1]]

In [None]:
inxd_ws_stand = np.argwhere(f_stand(W0, W1).min() == f_stand(W0, W1))
inxd_ws_stand

In [None]:
W0[inxd_ws_stand[0,0], inxd_ws_stand[0,1]], W1[inxd_ws_stand[0,0], inxd_ws_stand[0,1]]