In [2]:
import numpy as np

In [3]:
class LogReg:
    def __init__(self, n_iter=1000, learning_rate=0.001, weights=None, reg=None, coef_l1=0.1, coef_l2=0.1, batch_size=None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.reg = reg
        self.coef_l1 = coef_l1
        self.coef_l2 = coef_l2
        self.batch_size = batch_size

    def Grad(self, X_np, y_np, w, N):
        base_grad = (X_np.T @ (self.sigmoid(X_np @ w) - y_np)) / N

        if self.reg == 'l1':
            base_grad += self.coef_l1 * np.sign(w)
        elif self.reg == 'l2':
            base_grad += self.coef_l2 * 2 * w
        return base_grad

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
        
    def LogLoss(self, y_pred, y_np, w):
        eps = 1e-15
        baseLoss = (- (y_np * np.log(y_pred + 1e-15) + (1 - y_np) * (np.log(1 - y_pred + 1e-15)))).mean() 
        if self.reg == 'l1':
            baseLoss += self.coef_l1 * np.sum(np.abs(w))
        elif self.reg == 'l2':
            baseLoss += self.coef_l2 * np.sum(w ** 2)
        return baseLoss

    def fit(self, X, y):
        X = X.copy()
        X.insert(0, 'bias', 1)
        
        X_np = X.to_numpy()
        N = X_np.shape[0]
        y_np = y.to_numpy().reshape(-1, 1)
        w = np.zeros(X_np.shape[1]).reshape(-1, 1)
        for i in range(self.n_iter):
            if self.batch_size is None:
                X_batch, y_batch = X_np, y_np
            elif self.batch_size > 1:
                indx = np.random.choice(X_np.shape[0], int(self.batch_size), replace=False)
                X_batch = X_np[indx]
                y_batch = y_np[indx]
            elif self.batch_size < 1:
                indx = np.random.choice(X_np.shape[0], int(X_np.shape[0] * self.batch_size), replace=False)
                X_batch = X_np[indx]
                y_batch = y_np[indx]
                
            z = X_batch @ w
            y_pred = self.sigmoid(z)
            w -= self.learning_rate * self.Grad(X_batch, y_batch, w, y_batch.shape[0])

            if i % 1000 == 0:
                print(self.LogLoss(y_pred, y_batch, w))
        self.weights = w
        return self 

    def predict_proba(self, X):
        X = X.copy()
        X.insert(0, 'bias', 1)
        X_np = X.to_numpy()
        z = X_np @ self.weights
        return self.sigmoid(z)
        
    def predict_class(self, X):
        threshold = 0.5
        return (self.predict_proba(X) >= threshold).astype(int)
        

        
        
        
        

In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# 1. Генерируем игрушечные данные
X, y = make_classification(
    n_samples=500,       # 500 объектов
    n_features=4,        # 4 признака
    n_informative=3,     # 3 информативных
    n_redundant=0, 
    n_clusters_per_class=1,
    random_state=42
)

# переводим в pandas DataFrame для совместимости с LogReg
X = pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[1])])
y = pd.Series(y)

# 2. Разделяем на train/test
train_idx = np.random.choice(len(X), size=int(0.8 * len(X)), replace=False)
test_idx = np.setdiff1d(np.arange(len(X)), train_idx)

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]

# 3. Обучаем модель
model = LogReg(n_iter=5000, learning_rate=0.05, batch_size=None, reg='l2', coef_l2=0.01)
model.fit(X_train, y_train)

# 4. Предсказываем на тесте
y_pred = model.predict_class(X_test)

# 5. Считаем метрику
acc = accuracy_score(y_test, y_pred)
print(f"\nAccuracy на тесте: {acc:.3f}")


0.69315339470542
0.30075091558620404
0.29868007690115905
0.2986186519998635
0.2986163363666221

Accuracy на тесте: 0.920


In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

# 1. Генерация данных
X, y = make_classification(
    n_samples=500,
    n_features=4,
    n_informative=3,
    n_redundant=0,
    n_clusters_per_class=1,
    random_state=42
)

X = pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[1])])
y = pd.Series(y)

# 2. Train/test split
train_idx = np.random.choice(len(X), size=int(0.8 * len(X)), replace=False)
test_idx = np.setdiff1d(np.arange(len(X)), train_idx)

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]

# 3. Твоя модель
model = LogReg(n_iter=5000, learning_rate=0.05, batch_size=None, reg='l2', coef_l2=0.01)
model.fit(X_train, y_train)
y_pred_custom = model.predict_class(X_test)
acc_custom = accuracy_score(y_test, y_pred_custom)

# 4. Модель из sklearn
sk_model = LogisticRegression(penalty='l2', C=1/0.01, solver='lbfgs', max_iter=5000)
sk_model.fit(X_train, y_train)
y_pred_sklearn = sk_model.predict(X_test)
acc_sklearn = accuracy_score(y_test, y_pred_sklearn)

# 5. Сравнение
print("\nСравнение моделей:")
print(f"Custom LogReg Accuracy : {acc_custom:.3f}")
print(f"Sklearn LogReg Accuracy: {acc_sklearn:.3f}")
print(f"Разница в предсказаниях: {(y_pred_custom != y_pred_sklearn).sum()} из {len(y_test)}")


0.6931532284603242
0.306517594195982
0.3044605308166225
0.3044048328879162
0.3044028451625117

Сравнение моделей:
Custom LogReg Accuracy : 0.920
Sklearn LogReg Accuracy: 0.970
Разница в предсказаниях: 5000 из 100
