In [6]:
import numpy as np

In [32]:
def calc_mae(y, y_pred):
    err = np.mean(np.abs(y - y_pred))
    return err

def calc_mse(y, y_pred):
    err = np.mean((y - y_pred)**2)
    return err

In [33]:
X = np.array([[ 1,  1],
              [ 1,  1],
              [ 1,  2],
              [ 1,  5],
              [ 1,  3],
              [ 1,  0],
              [ 1,  5],
              [ 1, 10],
              [ 1,  1],
              [ 1,  2]])

In [34]:
y = np.array([45, 55, 50, 55, 60, 35, 75, 80, 50, 60])

## Task 1. Подберите скорость обучения (eta) и количество итераций

In [58]:
n = X.shape[0]

eta = 0.02 
n_iter = 250

W = np.array([1, 0.5])
print(f'Number of objects = {n} \
       \nLearning rate = {eta} \
       \nInitial weights = {W} \n')

for i in range(n_iter):
    y_pred = np.dot(X, W)
    err = calc_mse(y, y_pred)
    for k in range(W.shape[0]):
        W[k] -= eta * (1/n * 2 * X[:, k] @ (y_pred - y))
    if i % 100 == 0:
        eta /= 1.1
        print(f'Iteration #{i}: W_new = {W}, MSE = {round(err, 7)}')

Number of objects = 10        
Learning rate = 0.02        
Initial weights = [1.  0.5] 

Iteration #0: W_new = [3.16 8.04], MSE = 3047.75
Iteration #100: W_new = [37.30195981  5.21975431], MSE = 73.2992455
Iteration #200: W_new = [43.36331425  4.12062114], MSE = 45.3705525


## Task 2. В этом коде мы избавляемся от итераций по весам, но тут есть ошибка, исправьте ее

In [62]:
n = X.shape[0]

eta = 1e-2 
n_iter = 100

W = np.array([1, 0.5])
print(f'Number of objects = {n} \
       \nLearning rate = {eta} \
       \nInitial weights = {W} \n')

for i in range(n_iter):
    y_pred = np.dot(X, W)
    err = calc_mse(y, y_pred)
    # Было
    #W -= eta * (1/n * 2 * np.dot(X, y_pred - y))
    # Стало (вектор X надо транспонировать было)
    W -= eta * (1/n * 2 * np.dot(X.T, y_pred - y))
    if i % 10 == 0:
        print(f'Iteration #{i}: W_new = {W}, MSE = {round(err,2)}')

Number of objects = 10        
Learning rate = 0.01        
Initial weights = [1.  0.5] 

Iteration #0: W_new = [2.08 4.27], MSE = 3047.75
Iteration #10: W_new = [ 7.0011236 10.6169007], MSE = 738.65
Iteration #20: W_new = [10.3486292  10.10603105], MSE = 622.03
Iteration #30: W_new = [13.38789582  9.55618391], MSE = 525.24
Iteration #40: W_new = [16.16088505  9.05336203], MSE = 444.66
Iteration #50: W_new = [18.69110735  8.59454545], MSE = 377.58
Iteration #60: W_new = [20.99981865  8.17589626], MSE = 321.72
Iteration #70: W_new = [23.10641138  7.79389815], MSE = 275.22
Iteration #80: W_new = [25.02858024  7.44534246], MSE = 236.5
Iteration #90: W_new = [26.78247081  7.12730145], MSE = 204.27


## Task 3. Вместо того, чтобы задавать количество итераций, задайте другое условие остановки алгоритма - когда веса перестают изменяться меньше определенного порога  𝜖

In [60]:
num_objects = X.shape[0] # 
eta = 1e-3 # изначальный шаг обучения
epsilon = 1e-6 # порог для разницы
weight_dist = np.inf # изначальная разница в весах
W = np.array([1, 0.5]) # начальные веса

mse = [] # список для ошибок
w_list = [W] # список для весов
iter_num = 0 # счетчик итераций


while weight_dist > epsilon:
    # learning rate будет считаться по 'Step Decay', где отсечение в 30%, снижение скорости обучения каждые 20 эпох
    learning_rate = eta + 0.3**(iter_num/ 20) 
    
    # предсказываем целевое значение
    y_pred = np.dot(X,W)
    
    # считаем градиент
    Qd = 2 / num_objects * X.T @ (y_pred - y)
    
    # Считаем новый вектор весов и записываем в список
    new_w = W - learning_rate * Qd
    w_list.append(new_w)

    # Считаем разницу по весам
    weight_dist = np.linalg.norm(new_w - W, ord=2)
    
    # Считаем MSE
    err = calc_mse(y, y_pred)
    mse.append(err)
    
    print(f'Iteration {iter_num}: MSE - {error}, weights: {new_W}, learning_rate: {round(learning_rate, 7)}')
    
    iter_num += 1
    W = new_W
    
print(f'MSE: {round(mse[-1], 7)} \nWeights: {w_list[-1]} \nTotal iterations: {iter_num}')

Iteration 0: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 1.001
Iteration 1: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.9425775
Iteration 2: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.8875682
Iteration 3: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.8357726
Iteration 4: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.7870031
Iteration 5: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.7410828
Iteration 6: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.6978453
Iteration 7: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.6571338
Iteration 8: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.6188009
Iteration 9: MSE - 43.968750005400665, weights: [45.06239389  3.81251924], learning_rate: 0.5827074
Iter