In [121]:
import numpy as np
import pandas as pd
import random
import sklearn.metrics as skm
import sklearn.linear_model as sklm

In [123]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=400, n_features=14, n_informative=5, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [129]:
clf = sklm.LogisticRegression(penalty=None).fit(X, y)
y_pred = clf.predict(X)
skm.accuracy_score(y, y_pred)

0.8125

In [139]:
class MyLogReg:
    def __init__(self, n_iter=100, learning_rate=0.5, weights=None, metric=None, reg=None, l1_coef=0.1, l2_coef=0.1, sgd_sample=None, random_state=42):
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.weights = weights
        self.metric = metric
        self.metric_value = None
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.learning_rate = learning_rate
        self.sgd_sample = sgd_sample
        self.random_state = random_state
        self.reg_formulas = {'l1': 'self.l1_coef * sum(abs(self.weights))',
                             'l2': 'self.l2_coef * sum(np.power(self.weights, 2))',
                             'elasticnet': 'self.l1_coef * sum(abs(self.weights)) + self.l2_coef * sum(np.power(self.weights, 2))'}
        self.reg_grad_formulas = {'l1': 'self.l1_coef * np.sign(self.weights)',
                                 'l2': 'self.l2_coef * 2 * self.weights',
                                 'elasticnet': 'self.l1_coef * np.sign(self.weights) + self.l2_coef * 2 * self.weights'}
    def __str__(self):
        return f'MyLogReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}'
        
    def fit(self, X, y, verbose=False):
        def accuracy(X, y):
            TP_TN = np.count_nonzero(y == self.predict(X))
            TP_TN_FP_FN = X.shape[0]
            return TP_TN/TP_TN_FP_FN
            
        def precision(X, y):
            TP = sum([1 if (i == j and j == 1) else 0 for i, j in zip(y, self.predict(X))])
            TP_FP = np.count_nonzero(self.predict(X) == np.ones(X.shape[0]))
            return TP/TP_FP
            
        def recall(X, y):
            TP = sum([1 if (i == j and j == 1) else 0 for i, j in zip(y, self.predict(X))])
            TP_FN = sum(y == np.ones(X.shape[0]))
            return TP/TP_FN
            
        def f1(X, y):
            return 2 * precision(X, y) * recall(X, y) / (precision(X, y) + recall(X, y))
            
        def roc_auc(X, y):
            y_pred = np.round(self.predict_proba(X), 10)
            P = np.count_nonzero(y)
            N = y.shape[0] - P
            y.reset_index(inplace=True, drop=True)
            y_table = pd.concat([pd.Series(y_pred), y], axis = 1)
            y_table.sort_values(by=[0], inplace=True, ascending=False)
            i = 0
            score = 0
            while i < y_table.shape[0]:
                if y_table.iloc[i, 1] == 0:
                    score += np.count_nonzero(y_table.iloc[:i, 1]) - 1
                    the_same = np.count_nonzero(y_table.iloc[:, 0] == y_table.iloc[i, 0])
                    score += the_same / 2
                    i += the_same
                    continue
                i += 1
            return (1 / (N * P) * score)
            
        metrics = {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1, 'roc_auc': roc_auc}
        eps = 10**(-15)
        #random.seed(self.random_state)
        flag = True
        X.reset_index(inplace=True, drop=True)
        X_ones = pd.concat([pd.DataFrame(np.ones(X.shape[0])), X], axis = 1)
        self.weights = np.ones((X_ones.shape[1]))
        if not self.sgd_sample:
            self.sgd_sample = X_ones.shape[0]
        if self.metric:
            formula = metrics[self.metric]
        for i in range(1, self.n_iter + 1):
            sample_rows_idx = range(X_ones.shape[0])#random.sample(range(X.shape[0]), self.sgd_sample if isinstance(self.sgd_sample, int) else round(self.sgd_sample * X_ones.shape[0]))
            X_ones.reset_index(inplace=True, drop=True)
            y.reset_index(inplace=True, drop=True)
            X_batch = X_ones#.reindex(sample_rows_idx)
            y_batch = y#.reindex(sample_rows_idx)
            y_logit = np.dot(X_ones, self.weights)
            y_pred = 1/(1 + np.exp(-y_logit))
            y_batch_logit = np.dot(X_batch, self.weights) #логит
            y_batch_pred = 1/(1 + np.exp(-y_batch_logit)) #предсказание
            if self.reg:
                reg_value = eval(self.reg_formulas[self.reg])
                reg_grad_value = eval(self.reg_grad_formulas[self.reg])
            else:
                reg_value = 0
                reg_grad_value = 0
            loss = - 1 / X_ones.shape[0] * sum(y * np.log(y_pred + eps) + (1 - y_batch) * np.log(1 - y_batch_pred + eps)) + reg_value
            grad = 1 / X_batch.shape[0] * np.dot((y_batch_pred - y_batch), X_batch) + reg_grad_value
            if callable(self.learning_rate):
                self.weights = self.weights - self.learning_rate(i) * grad
            else:
                self.weights = self.weights - self.learning_rate * grad
            if self.metric:
                self.metric_value = formula(X, y)
                print(f'my{self.metric_value}')
                print(f'true{skm.accuracy_score(y, self.predict(X))}')
            if verbose and (i + 1) % verbose == 0:
                if flag:
                    if self.metric:
                        print(f'start| loss: {loss}| {self.metric}: {self.metric_value}')
                    else: 
                        print(f'start| loss: {loss}')
                    flag = False
                else:
                    if self.metric:
                        print(f'{i} | loss: {loss}| {self.metric}: {self.metric_value}')
                    else:
                        print(f'{i} | loss: {loss}')
        
    def get_coef(self):
        return self.weights[1:]
        
    def predict(self, X):
        return np.array([1 if i > 0.5 else 0 for i in np.dot(pd.concat([pd.DataFrame(np.ones(X.shape[0])), X], axis = 1), self.weights)])

    def predict_proba(self, X):
        return np.dot(pd.concat([pd.DataFrame(np.ones(X.shape[0])), X], axis = 1), self.weights)
        
    def get_best_score(self):
        return self.metric_value

In [141]:
for metric in ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']:
    model = MyLogReg(metric=metric)
    model.fit(X, y)
    print(model.get_best_score())

my0.5775
true0.5775
my0.58
true0.58
my0.5875
true0.5875
my0.5925
true0.5925
my0.5925
true0.5925
my0.5975
true0.5975
my0.6
true0.6
my0.605
true0.605
my0.6075
true0.6075
my0.605
true0.605
my0.6075
true0.6075
my0.61
true0.61
my0.615
true0.615
my0.6275
true0.6275
my0.63
true0.63
my0.6325
true0.6325
my0.635
true0.635
my0.6375
true0.6375
my0.65
true0.65
my0.6525
true0.6525
my0.6575
true0.6575
my0.66
true0.66
my0.6625
true0.6625
my0.6725
true0.6725
my0.68
true0.68
my0.6875
true0.6875
my0.6925
true0.6925
my0.7
true0.7
my0.705
true0.705
my0.7025
true0.7025
my0.705
true0.705
my0.705
true0.705
my0.7125
true0.7125
my0.71
true0.71
my0.7175
true0.7175
my0.7225
true0.7225
my0.7225
true0.7225
my0.7175
true0.7175
my0.7175
true0.7175
my0.7175
true0.7175
my0.7175
true0.7175
my0.7225
true0.7225
my0.725
true0.725
my0.725
true0.725
my0.725
true0.725
my0.7325
true0.7325
my0.7325
true0.7325
my0.7325
true0.7325
my0.735
true0.735
my0.74
true0.74
my0.74
true0.74
my0.74
true0.74
my0.745
true0.745
my0.745
true0.74

In [63]:
def f1(a, b):
    return a + b 

def f2(c, d):
    return c + d

def f3(a, b, c, d):
    return (f1(a, b) + f2(c, d))

f3(1, 2, 3, 4)

10

In [49]:
np.count_nonzero(y == model.predict(X))

275

In [33]:
np.count_nonzero(y == model.predict(X))/X.shape[0]

0.6875

In [35]:
np.count_nonzero(y == model.predict(X))

275

In [39]:
True == 1

True

In [41]:
True == 0

False

In [43]:
False == 1

False

In [45]:
False == 0

True