### Стахостический градиентный спуск

In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.utils import shuffle

In [2]:
X, y = make_regression(n_samples=100000, n_features=10, n_informative=10, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

In [15]:
class MyLinearRegression:
    def __init__(self, step_size, epochs, batch_size=None):
        self.step_size = step_size
        self.epochs = epochs
        self.batch_size = batch_size

    def fit(self, X_train, y_train):
        self.w = np.zeros(X_train.shape[1] + 1)

        if self.batch_size is None or self.batch_size >= X_train.shape[0]:
            batch_size_actual = X_train.shape[0]
        else:
            batch_size_actual = self.batch_size

        num_batch = int(np.ceil(X_train.shape[0] / batch_size_actual))

        for epoch in range(self.epochs):
            X_shuffled, y_shuffled = shuffle(X_train, y_train, random_state=epoch)

            for i in range(num_batch):
                start = i * batch_size_actual
                end = min((i + 1) * batch_size_actual, X_shuffled.shape[0])

                X_batch = X_shuffled[start:end]
                y_batch = y_shuffled[start:end]

                X_batch_with_ones = np.concatenate((np.ones((X_batch.shape[0], 1)), X_batch), axis=1)

                y_pred = X_batch_with_ones @ self.w
                gradient = 2 * X_batch_with_ones.T @ (y_pred - y_batch) / X_batch_with_ones.shape[0]

                self.w = self.w - self.step_size * gradient

        self.w0 = self.w[0]
        self.w = self.w[1:]

    def predict(self, X_test):
        return X_test.dot(self.w) + self.w0



In [23]:
%%time
print("--- Классический Градиентный Спуск (Batch GD) ---")
model_classic_gd = MyLinearRegression(step_size=0.01, epochs=5000, batch_size=X_train.shape[0])
model_classic_gd.fit(X_train, y_train)
y_pred_classic_gd = model_classic_gd.predict(X_test)
mae_classic_gd = mean_absolute_error(y_test, y_pred_classic_gd)
print(f"MAE (Batch GD): {mae_classic_gd}")

--- Классический Градиентный Спуск (Batch GD) ---
MAE (Batch GD): 6.173613008612563e-13
CPU times: total: 2min 49s
Wall time: 2min 36s


In [21]:
%%time
print("\n--- Стохастический Градиентный Спуск (SGD) ---")
model_sgd = MyLinearRegression(step_size=0.01, epochs=500, batch_size=64)
model_sgd.fit(X_train, y_train)
y_pred_sgd = model_sgd.predict(X_test)
mae_sgd = mean_absolute_error(y_test, y_pred_sgd)
print(f"MAE (SGD, batch_size=64): {mae_sgd}")


--- Стохастический Градиентный Спуск (SGD) ---
MAE (SGD, batch_size=64): 2.054896671224715e-13
CPU times: total: 16.1 s
Wall time: 17.6 s


Чтобы достичь точности MAE очень близкое к нуля, для классического ГС понадобилось 2,5 минуты, в то время как стахостический ГС справился всего за 18 секунд и его точность оказалась больше