# Логистическая регрессия + Линейный слой

$$
\sigma(x) = \frac{1}{1 + e^{-x}}
$$
$$
\frac{d\sigma}{dx}(x0) = \sigma(x0) * (1 - \sigma(x0))
$$

$$
X -> X*W + b -> \sigma(X_1) -> X_2*W_2 + b_2 -> P -> \frac{1}{N}\sum_{i = 1}^{N}(y_i - P_i)^2
$$

In [30]:
import numpy as np
from sklearn.metrics import mean_squared_error

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def forward(weights, X, y):
    X1 = X @ weights['W1'] + weights['B1']
    X2 = sigmoid(X1)
    P = X2 @ weights['W2'] + weights['B2']
    P = P.flatten()
    loss = np.mean((P - y) ** 2)
    return loss, {'X1': X1, 'X2': X2, 'P': P}

In [31]:
def backward(forward_, X, y, weights):
    P = forward_['P']
    N = len(y)
    dL_dP = (2 / N) * (P - y) # <dL_dP, ...>
    assert dL_dP.shape == (N,)
    dL_dP = dL_dP.reshape(-1, 1)
    dP_dW2 = forward_['X2'].T # left
    dP_dX2 = weights['W2'].T # right
    dP_dB2 = np.ones((N, 1))
    W2_grad = (dP_dW2 @ dL_dP)
    B2_grad = np.sum(dL_dP)
    dX2_dX1 = forward_['X2'] * (1 - forward_['X2'])
    dX1_dW1 = X.T # left
    dL_dX1 = ((dL_dP @ dP_dX2) * dX2_dX1)
    W1_grad = dX1_dW1 @ dL_dX1
    B1_grad = np.sum(dL_dX1)
    return {'W2': W2_grad, 'B2': B2_grad, 'W1': W1_grad, 'B1': B1_grad}

In [3]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

california = fetch_california_housing()
x_train, x_test, y_train, y_test = train_test_split(california.data, california.target, random_state=10)

In [4]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [52]:
def learn(weights, x_train: np.ndarray, y_train: np.ndarray, lr = 0.1):
    for s in range(1001):
        loss, forward_info = forward(weights, x_train, y_train)
        backward_info = backward(forward_info, x_train, y_train, weights)
        weights['W1'] -= lr * backward_info['W1']
        weights['W2'] -= lr * backward_info['W2']
        weights['B1'] -= lr * backward_info['B1']
        weights['B2'] -= lr * backward_info['B2']
        if s % 10 == 0:
            print(f'Iter {s}: loss = {loss:.6f}')

In [53]:
W1 = np.random.randn(x_train.shape[0], 4)
W1.shape

(15480, 4)

In [54]:
np.random.seed(10)
W1 = np.random.randn(x_train.shape[1], 4)
B1 = np.random.randn(1)
W2 = np.random.randn(4, 1)
B2 = np.random.randn(1)
weights = {'W1': W1, 'W2': W2, 'B1': B1, 'B2': B2}
learn(weights, x_train, y_train)

Iter 0: loss = 4.494365
Iter 10: loss = 1.431252
Iter 20: loss = 1.175610
Iter 30: loss = 1.014667
Iter 40: loss = 0.906707
Iter 50: loss = 0.833564
Iter 60: loss = 0.781889
Iter 70: loss = 0.743140
Iter 80: loss = 0.712498
Iter 90: loss = 0.687242
Iter 100: loss = 0.665718
Iter 110: loss = 0.646938
Iter 120: loss = 0.630452
Iter 130: loss = 0.615959
Iter 140: loss = 0.603035
Iter 150: loss = 0.591290
Iter 160: loss = 0.580432
Iter 170: loss = 0.570262
Iter 180: loss = 0.560669
Iter 190: loss = 0.551594
Iter 200: loss = 0.543008
Iter 210: loss = 0.534895
Iter 220: loss = 0.527243
Iter 230: loss = 0.520041
Iter 240: loss = 0.513278
Iter 250: loss = 0.506940
Iter 260: loss = 0.501009
Iter 270: loss = 0.495469
Iter 280: loss = 0.490299
Iter 290: loss = 0.485479
Iter 300: loss = 0.480986
Iter 310: loss = 0.476801
Iter 320: loss = 0.472903
Iter 330: loss = 0.469272
Iter 340: loss = 0.465889
Iter 350: loss = 0.462738
Iter 360: loss = 0.459800
Iter 370: loss = 0.457060
Iter 380: loss = 0.4545

In [57]:
def predict(x, weights):
    x1 = x @ weights['W1'] + weights['B1']
    x2 = sigmoid(x1)
    p = x2 @ weights['W2'] + weights['B2']
    return p.ravel()

In [63]:
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error

y_pred = predict(x_test, weights)
print('MAE:', mean_absolute_error(y_test, y_pred))
print('RMSE:', root_mean_squared_error(y_test, y_pred))
print('MSE:', mean_squared_error(y_test, y_pred))

MAE: 0.4541651383248917
RMSE: 0.6445131808245368
MSE: 0.4153972402565621
