In [1]:
import numpy as np

In [2]:
def generate_batches(X, y, batch_size):
    assert len(X) == len(y)
    np.random.seed(42)
    X = np.array(X)
    y = np.array(y)
    perm = np.random.permutation(len(X))


    for batch_start in range(0, len(X), batch_size):
        batch_end = min(batch_start + batch_size, len(X))
        if batch_end - batch_start < batch_size: break
        yield X[perm[batch_start:batch_end]], y[perm[batch_start:batch_end]]

In [4]:
def logit(x, w):
    return np.dot(x, w)

def sigmoid(h):
    return 1. / (1 + np.exp(-h))

In [7]:
class LogisticRegression:
    def __init__(self):
        self.w = None

    def fit(self, X, y, epochs=10, lr=0.1, batch_size=100):
        n, k = X.shape
        if self.w is None:
            np.random.seed(42)
            self.w = np.random.randn(k + 1)

        X_train = np.concatenate((np.ones((n, 1)), X), axis=1)

        losses = []


        for i in range(epochs):
            for X_batch, y_batch in generate_batches(X_train, y, batch_size):

                predictions = sigmoid(logit(X_batch, self.w))
                loss = -np.sum(y_batch.dot(np.log(predictions)) + (1 - y_batch).dot(np.log(1-predictions)))

                assert (np.array(loss).shape == tuple())

                losses.append(loss)
                self.w -= lr * self.get_grad(X_batch,y_batch,predictions)

        return losses

    def get_grad(self, X_batch, y_batch, predictions):
        grad_basic = X_batch.T.dot(predictions - y_batch)
        assert grad_basic.shape == (X_batch.shape[1],) , "Градиенты должны быть столбцом из k_features + 1 элементов"

        return grad_basic

    def predict_proba(self, X):
        n, k = X.shape
        X_ = np.concatenate((np.ones((n, 1)), X), axis=1)
        return sigmoid(logit(X_, self.w))

    def _predict_proba_internal(self, X):
        return sigmoid(logit(X, self.w))

    def predict(self, X, threshold=0.5):
        return self.predict_proba(X) >= threshold

    def get_weights(self):
        return self.w.copy()

    def __loss(self, y, p):
        p = np.clip(p, 1e-10, 1 - 1e-10)
        return -np.sum(y * np.log(p) + (1 - y) * np.log(1 - p))

In [9]:
class ElasticLogisticRegression(LogisticRegression):
    def __init__(self, l1_coef, l2_coef):
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.w = None

    def get_grad(self, X_batch, y_batch, predictions):
    
        grad_basic = super().get_grad(X_batch,y_batch,predictions)

        grad_l1 = self.l1_coef * np.sign(self.w)
        grad_l1[0] = 0
        grad_l2 = self.l2_coef * 2 * self.w
        grad_l2[0] = 0


        assert grad_l1[0] == grad_l2[0] == 0, "Bias в регуляризационные слагаемые не входит!"
        assert grad_basic.shape == grad_l1.shape == grad_l2.shape == (X_batch.shape[1],) , "Градиенты должны быть столбцом из k_features + 1 элементов"

        return grad_basic + grad_l1 + grad_l2

In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
X, y = make_classification(n_samples=1000,  # Количество образцов
                           n_features=10,  # Количество признаков
                           n_classes=2,  # Два класса
                           n_informative=2,  # Количество информативных признаков
                           n_redundant=2,  # Количество избыточных признаков
                           random_state=42)  # Фиксируем random_state для воспроизводимости
X_train,x_val,y_train,y_val = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
model = LogisticRegression()
