In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# для регрессии
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from scipy.spatial import distance
%matplotlib inline

data = pd.read_csv('3.10_non_linear.csv', sep=',')
data = data[(data.x_train > 1) & (data.x_train < 5)].copy()

X = data['x_train'].values.reshape(-1, 1) # превращаем X из простого массива в вектор-столбец
y = data['y_train'].values

# разбиваем на трейн и валидацию
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

data.head()

Unnamed: 0,x_train,y_train
5,1.182421,1.860341
6,1.251605,1.878928
7,1.270474,2.430015
8,1.402553,2.327856
9,1.427711,2.203649


In [2]:
def gradient(X, y, w) -> np.array:
    #количество примеров в обучающей выборки
    n = X.shape[0]
    #прогноз
    y_hat = X.dot(w.T)
    #вычисляем ошибку
    error = y - y_hat
    #вычисляем градиент функции
    grad = np.multiply(X, error).sum(axis=0)*(-1.0)*2.0/n
    return grad, error

def eval_w_next(X, y, eta, w_current):
    # вычисляем градиент
    grad, error = gradient(X, y, w_current)
    # шаг спуска
    w_next = w_current - eta * grad
    # условие сходимости
    weight_evolution = distance.euclidean(w_current, w_next)
    return (w_next, weight_evolution, grad)

def gradient_descent(X, y, eta=0.01, epsilon=0.001):
    m = X.shape[1] # количество фичей - размерность градиента
    w = np.random.random(m).reshape(1, -1)
    w_next, weight_evolution, grad = eval_w_next(X, y, eta, w)
    step = 0
    while weight_evolution > epsilon:
        w = w_next
        w_next, weight_evolution, grad = eval_w_next(X, y, eta, w)
        step += 1
        if step % 100 == 0:
            print("step %s, |w-w_next|=%0.5f, rrad=%s" %(step, weight_evolution, grad))
    return w

In [7]:
X = data.x_train.values.reshape(-1, 1)
n = X.shape[0]
# добавляем тривиальный признак из едениц
X = np.hstack([
    np.ones(n).reshape(-1, 1),
    X
])
w = gradient_descent(X, data.y_train.values.reshape(-1, 1), eta = 0.008)

step 100, |w-w_next|=0.00520, rrad=[-0.62463509  0.18044467]
step 200, |w-w_next|=0.00428, rrad=[-0.51402834  0.14849256]
step 300, |w-w_next|=0.00352, rrad=[-0.42300719  0.12219836]
step 400, |w-w_next|=0.00290, rrad=[-0.34810353  0.10056019]
step 500, |w-w_next|=0.00239, rrad=[-0.28646338  0.08275357]
step 600, |w-w_next|=0.00196, rrad=[-0.2357381   0.06810005]
step 700, |w-w_next|=0.00162, rrad=[-0.19399497  0.05604129]
step 800, |w-w_next|=0.00133, rrad=[-0.15964347  0.04611782]
step 900, |w-w_next|=0.00109, rrad=[-0.13137473  0.03795155]
