In [496]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [498]:
data = pd.read_csv("heart/heart.csv")
data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [500]:
data_encoded = pd.get_dummies(data, drop_first=True).astype(float)
data_encoded.head()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,Sex_M,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_Normal,RestingECG_ST,ExerciseAngina_Y,ST_Slope_Flat,ST_Slope_Up
0,40.0,140.0,289.0,0.0,172.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,49.0,160.0,180.0,0.0,156.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
2,37.0,130.0,283.0,0.0,98.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
3,48.0,138.0,214.0,0.0,108.0,1.5,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
4,54.0,150.0,195.0,0.0,122.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0


In [636]:
X = data_encoded.drop(columns=["HeartDisease"])
y = data_encoded["HeartDisease"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [638]:
pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter=10000))
pipe.fit(X_train, y_train)

In [648]:
y_preds = pipe.predict(X_test)
y_preds_probs = pipe.predict_proba(X_test)
print(f"""
Acc     = {accuracy_score(y_test, y_preds)}
F1      = {f1_score(y_test, y_preds)}
ROC-AUC = {roc_auc_score(y_test, y_preds_probs[:, 1])}
""")


Acc     = 0.8586956521739131
F1      = 0.8807339449541284
ROC-AUC = 0.9056511056511056



In [652]:
class MyLogReg:
    def __init__(self, lr=0.001, num_iter=1000) -> None:
        self.lr = lr
        self.num_iter = num_iter
        self.scaler = StandardScaler()

    def calculate_loss(self, y, y_pred):
        return (- y * np.log(y_pred) - (1 - y) * np.log(1 - y_pred)).mean()

    def grad_w(self, y, y_pred, x):
        grad_w = (-y + y_pred) @ x / x.shape[0]
        return grad_w

    def grad_b(self, y, y_pred):
        grad_b = (-y + y_pred)
        return grad_b.mean()

    def fit(self, X_train, y_train):
        X_train = np.array(X_train)
        self.scaler.fit(X_train)
        X_train = self.scaler.transform(X_train)
        y_train = np.array(y_train)
        self.weights = np.random.rand(X_train.shape[1])
        self.b = 1
        for i in range(self.num_iter):
            pred = self.predict(X_train)
            self.weights -= self.lr * self.grad_w(y_train, pred, X_train)
            self.b -= self.lr * self.grad_b(y_train, pred)

    def predict(self, X_test):
        X_test = np.array(X_test)
        X_test = self.scaler.transform(X_test)
        x = X_test @ self.weights + self.b
        return 1 / (1 + np.exp(-x))

In [654]:
my_logreg = MyLogReg(lr=0.01, num_iter=1000)
my_logreg.fit(X_train, y_train)
y_preds = (my_logreg.predict(X_test) >= 0.5).astype(int)
y_preds_probs = my_logreg.predict(X_test)
print(f"""
Acc     = {accuracy_score(y_test, y_preds)}
F1      = {f1_score(y_test, y_preds)}
ROC-AUC = {roc_auc_score(y_test, y_preds_probs)}
""")


Acc     = 0.8152173913043478
F1      = 0.8595041322314049
ROC-AUC = 0.9277641277641278



In [656]:
class MyLogReg_:
    def __init__(self, lr=0.001, num_iter=1000, penalty=None, C=1.0):
        self.lr = lr
        self.num_iter = num_iter
        self.penalty = penalty
        self.C = C
        self.scaler = StandardScaler()

    def calculate_loss(self, y, y_pred):
        base_loss = (-y * np.log(y_pred) - (1 - y) * np.log(1 - y_pred)).mean()
        if self.penalty == 'l2':
            reg_loss = (np.sum(self.weights**2)) / (2 * self.C)
            return base_loss + reg_loss
        elif self.penalty == 'l1':
            reg_loss = (np.sum(np.abs(self.weights))) / self.C
            return base_loss + reg_loss
        else:
            return base_loss

    def grad_w(self, y, y_pred, X_train):
        grad_w = (-y + y_pred) @ X_train / X_train.shape[0]
        if self.penalty == 'l2':
            grad_w += self.weights / self.C
        elif self.penalty == 'l1':
            grad_w += np.sign(self.weights) / self.C
        return grad_w

    def grad_b(self, y, y_pred):
        grad_b = (-y + y_pred).mean()
        return grad_b

    def fit(self, X_train, y_train):
        X_train = np.array(X_train)
        self.scaler.fit(X_train)
        X_train = self.scaler.transform(X_train)
        y_train = np.array(y_train)
        self.weights = np.random.rand(X_train.shape[1])
        self.b = 1
        for i in range(self.num_iter):
            y_pred = self.predict(X_train)
            self.weights -= self.lr * self.grad_w(y_train, y_pred, X_train)
            self.b -= self.lr * self.grad_b(y_train, y_pred)

    def predict(self, X_test): 
        X_test = np.array(X_test)
        X_test = self.scaler.transform(X_test)
        logits = X_test @ self.weights + self.b
        return 1 / (1 + np.exp(-logits))

In [664]:
# Обучение вашей модели с L2 регуляризацией
my_model = MyLogReg_(lr=0.01, num_iter=1000, penalty='l1', C=0.01)
my_model.fit(X_train, y_train)

# Предсказания
y_preds = (my_logreg.predict(X_test) >= 0.5).astype(int)
y_preds_probs = my_logreg.predict(X_test)
print(f"""
Acc     = {accuracy_score(y_test, y_preds)}
F1      = {f1_score(y_test, y_preds)}
ROC-AUC = {roc_auc_score(y_test, y_preds_probs)}
""")


Acc     = 0.8152173913043478
F1      = 0.8595041322314049
ROC-AUC = 0.9277641277641278



In [689]:
pipe = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2', C=0.01, solver='liblinear', max_iter=10000))
pipe.fit(X_train, y_train)

In [691]:
y_preds = pipe.predict(X_test)
y_preds_probs = pipe.predict_proba(X_test)
print(f"""
Acc     = {accuracy_score(y_test, y_preds)}
F1      = {f1_score(y_test, y_preds)}
ROC-AUC = {roc_auc_score(y_test, y_preds_probs[:, 1])}
""")


Acc     = 0.8695652173913043
F1      = 0.8888888888888888
ROC-AUC = 0.9257985257985257



In [694]:
import numpy as np
from sklearn.preprocessing import StandardScaler

class MyLogReg:
    def __init__(self, lr=0.001, num_iter=1000, penalty=None, C=1.0):
        """
        :param lr: Learning rate
        :param num_iter: Number of iterations
        :param penalty: 'l1', 'l2', or None for no regularization
        :param C: Regularization strength (higher C means weaker regularization)
        """
        self.lr = lr
        self.num_iter = num_iter
        self.penalty = penalty
        self.C = C
        self.scaler = StandardScaler()

    def calculate_loss(self, y, y_pred):
        # Log loss (cross-entropy loss)
        base_loss = (-y * np.log(y_pred) - (1 - y) * np.log(1 - y_pred)).mean()
        if self.penalty == 'l2':
            reg_loss = (np.sum(self.weights**2)) / (2 * self.C)
            return base_loss + reg_loss
        elif self.penalty == 'l1':
            reg_loss = (np.sum(np.abs(self.weights))) / self.C
            return base_loss + reg_loss
        else:
            return base_loss

    def grad_w(self, y, y_pred, X_train):
        # Gradient for weights
        grad_w = (-y + y_pred) @ X_train / X_train.shape[0]
        if self.penalty == 'l2':
            grad_w += self.weights / self.C
        elif self.penalty == 'l1':
            grad_w += np.sign(self.weights) / self.C
        return grad_w

    def grad_b(self, y, y_pred):
        # Gradient for bias
        grad_b = (-y + y_pred).mean()
        return grad_b

    def fit(self, X_train, y_train):
        # Standardize the input
        X_train = np.array(X_train)
        self.scaler.fit(X_train)
        X_train = self.scaler.transform(X_train)
        y_train = np.array(y_train)

        # Initialize weights and bias
        self.weights = np.random.rand(X_train.shape[1])
        self.b = 1

        # Gradient descent
        for i in range(self.num_iter):
            y_pred = self.predict_proba(X_train)
            self.weights -= self.lr * self.grad_w(y_train, y_pred, X_train)
            self.b -= self.lr * self.grad_b(y_train, y_pred)

    def predict_proba(self, X_test):
        # Calculate probabilities using sigmoid function
        X_test = np.array(X_test)
        X_test = self.scaler.transform(X_test)
        logits = X_test @ self.weights + self.b
        return 1 / (1 + np.exp(-logits))

    def predict(self, X_test, threshold=0.5):
        # Predict binary outcomes
        probs = self.predict_proba(X_test)
        return (probs >= threshold).astype(int)


In [716]:
model = MyLogReg(lr=0.01, num_iter=1000, penalty='l1', C=1)
model.fit(X_train, y_train)

# Предсказания и метрики
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(f"""
Acc     = {accuracy_score(y_test, y_pred)}
F1      = {f1_score(y_test, y_pred)}
ROC-AUC = {roc_auc_score(y_test, y_pred)}
""")
# Сравнение с LogisticRegression из sklearn
from sklearn.linear_model import LogisticRegression
sklearn_model = LogisticRegression(penalty='l1', solver='liblinear', max_iter=10000)
sklearn_model.fit(X_train, y_train)
y_pred_sklearn = sklearn_model.predict(X_test)
print("Sklearn Accuracy:", accuracy_score(y_test, y_pred_sklearn))
print(f"""
Acc     = {accuracy_score(y_test, y_pred_sklearn)}
F1      = {f1_score(y_test, y_pred_sklearn)}
ROC-AUC = {roc_auc_score(y_test, y_pred_sklearn)}
""")

Accuracy: 0.5978260869565217

Acc     = 0.5978260869565217
F1      = 0.7482993197278911
ROC-AUC = 0.5

Sklearn Accuracy: 0.8695652173913043

Acc     = 0.8695652173913043
F1      = 0.8909090909090909
ROC-AUC = 0.8643734643734644

