In [231]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
import pandas as pd

In [232]:
class MyLogisticRegression:
    def __init__(self, learning_rate=0.1, n_iter=1, optimizer='gd', beta=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.optimizer = optimizer
        self.beta = beta
        self.beta2 = beta2
        self.epsilon = epsilon
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        Eg_w = np.zeros(n_features)
        Eg_b = 0

        m_w = np.zeros(n_features)
        v_w = np.zeros(n_features)
        m_b = 0
        v_b = 0

        for t in range(1, self.n_iter + 1):
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = self.sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            if self.optimizer == 'gd':
                self.weights -= self.learning_rate * dw
                self.bias -= self.learning_rate * db

            elif self.optimizer == 'rmsprop':
                Eg_w = self.beta * Eg_w + (1 - self.beta) * (dw ** 2)
                Eg_b = self.beta * Eg_b + (1 - self.beta) * (db ** 2)

                self.weights -= (self.learning_rate / (np.sqrt(Eg_w) + self.epsilon)) * dw
                self.bias -= (self.learning_rate / (np.sqrt(Eg_b) + self.epsilon)) * db

            elif self.optimizer == 'nadam':
                m_w = self.beta * m_w + (1 - self.beta) * dw
                v_w = self.beta2 * v_w + (1 - self.beta2) * (dw ** 2)
                m_b = self.beta * m_b + (1 - self.beta) * db
                v_b = self.beta2 * v_b + (1 - self.beta2) * (db ** 2)

                m_w_hat = m_w / (1 - self.beta ** t)
                v_w_hat = v_w / (1 - self.beta2 ** t)
                m_b_hat = m_b / (1 - self.beta ** t)
                v_b_hat = v_b / (1 - self.beta2 ** t)

                self.weights -= self.learning_rate * (self.beta * m_w_hat + (1 - self.beta) * dw / (1 - self.beta ** t)) / (np.sqrt(v_w_hat) + self.epsilon)
                self.bias -= self.learning_rate * (self.beta * m_b_hat + (1 - self.beta) * db / (1 - self.beta ** t)) / (np.sqrt(v_b_hat) + self.epsilon)

    def predict(self, X):
        y_pred_proba = self.sigmoid(np.dot(X, self.weights) + self.bias)
        return np.where(y_pred_proba >= 0.5, 1, 0)

In [233]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

X = X[y != 0]
y = np.where(y[y != 0] == 1, 0, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [234]:
results = []
methods = ['gd', 'rmsprop', 'nadam']

for method in methods:
    model = MyLogisticRegression(learning_rate=10, n_iter=1500, optimizer=method)
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results.append((method, acc, end_time - start_time))

df_results = pd.DataFrame(results, columns=['Метод', 'Accuracy', 'Время работы (с)'])

best_method = df_results.loc[df_results['Accuracy'].idxmax()]
df_results


Unnamed: 0,Метод,Accuracy,Время работы (с)
0,gd,0.9,0.014998
1,rmsprop,0.8,0.027006
2,nadam,0.8,0.027995


In [235]:
results = []
methods = ['gd', 'rmsprop', 'nadam']

for method in methods:
    model = MyLogisticRegression(learning_rate=0.01, n_iter=1000, optimizer=method)
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results.append((method, acc, end_time - start_time))

df_results = pd.DataFrame(results, columns=['Метод', 'Accuracy', 'Время работы (с)'])

best_method = df_results.loc[df_results['Accuracy'].idxmax()]
df_results

Unnamed: 0,Метод,Accuracy,Время работы (с)
0,gd,0.85,0.008998
1,rmsprop,0.85,0.011
2,nadam,0.85,0.017


In [236]:
results = []
methods = ['gd', 'rmsprop', 'nadam']

for method in methods:
    model = MyLogisticRegression(learning_rate=0.001, n_iter=1100, optimizer=method)
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results.append((method, acc, end_time - start_time))

df_results = pd.DataFrame(results, columns=['Метод', 'Accuracy', 'Время работы (с)'])

best_method = df_results.loc[df_results['Accuracy'].idxmax()]
df_results

Unnamed: 0,Метод,Accuracy,Время работы (с)
0,gd,0.4,0.01
1,rmsprop,0.85,0.014
2,nadam,0.85,0.018998


In [237]:
results = []
methods = ['gd', 'rmsprop', 'nadam']

for method in methods:
    model = MyLogisticRegression(learning_rate=10, n_iter=15, optimizer=method)
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results.append((method, acc, end_time - start_time))

df_results = pd.DataFrame(results, columns=['Метод', 'Accuracy', 'Время работы (с)'])

best_method = df_results.loc[df_results['Accuracy'].idxmax()]
df_results

Unnamed: 0,Метод,Accuracy,Время работы (с)
0,gd,0.9,0.0
1,rmsprop,0.8,0.0
2,nadam,0.85,0.0


# Вывод
Судя по времени работы, а также более стабильному поведению при различный lr и iters, лучше всех был метод GD.
Но видно, что различные методы по разному себя ведут при различном числе итераций и lr. Например, при learning_rate=0.001, n_iter=1100 GD был хуже.
Поэтому в зависимости от задач нужно выбирать свои методы и для каждого метода подбирать свои оптимальные параметры.