In [306]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression, make_classification
from sklearn.metrics import roc_auc_score

In [180]:
X, y = make_classification(n_samples=1000, n_features=14, n_informative=10, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [181]:
def true_positive(y_true, y_pred):
    return np.sum((y_true == 1) & (y_pred == 1)) 

def false_negative(y_true, y_pred):
    return np.sum((y_true == 1) & (y_pred == 0))

def false_positive(y_true, y_pred):
    return np.sum((y_true == 0) & (y_pred == 1))

def true_negative(y_true, y_pred):
    return np.sum((y_true == 0) & (y_pred == 0))

In [182]:
def accuracy(y_true, y_pred):
    tp = true_positive(y_true, y_pred)
    tn = true_negative(y_true, y_pred)
    return (tp + tn)/len(y_true)

In [247]:
def precision(y_true, y_pred):
    tp = true_positive(y_true, y_pred)
    fp = false_positive(y_true, y_pred)
    return tp/(tp+fp)

In [255]:
def recall(y_true, y_pred):
    tp = true_positive(y_true, y_pred)
    fn = false_negative(y_true, y_pred)
    return tp/(tp+fn)

In [263]:
def f1(y_true, y_pred):
    precision_value = precision(y_true, y_pred)
    recall_value = recall(y_true, y_pred)
    return (2*precision_value*recall_value)/(precision_value + recall_value)

In [317]:
class MyLogReg:
    def __init__(self, n_iter, learning_rate, metric=None, reg = None, l1_coef=0, l2_coef=0):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = None
        self.eps = 1e-15
        self.scores = []
        self.metric = metric
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
    
    def __str__(self):
        return f"MyLogReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"
    
    def log_loss(self, y, y_pred):
        y_pred = y_pred + self.eps
        return - np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    
    def l1(self):
        return self.l1_coef * np.sign(self.weights) 
    
    def l2(self):
        return self.l2_coef * 2*self.weights
    
    def elasticnet(self):
        return self.l1() + self.l2()
    
    def fit(self, X, y, verbose=False):
        n_samples, m_features = X.shape
        
        self.y = y
        self.weights = np.ones(m_features + 1)
        self.X = np.hstack((np.ones((n_samples, 1)), X))
        
        for i in range(self.n_iter):
            y_proba = self.predict_proba(self.X)
            y_pred = self.predict(self.X)
            gradient = np.dot(self.X.T, (y_proba - self.y))/n_samples
            
            if self.reg == 'l1':
                gradient += self.l1()
            elif self.reg == 'l2':
                gradient += self.l2()
            elif self.reg == 'elasticnet':
                gradient += self.elasticnet()
                
            learning_rate = 0.1    
            if callable(self.learning_rate):
                learning_rate = self.learning_rate(i)
            else: 
                learning_rate = self.learning_rate     
            
            self.weights -= self.learning_rate * gradient
            
            if self.metric == 'roc_auc':
                loss = roc_auc_score(y, y_proba)
                self.scores.append(loss)
            
            elif self.metric != None: 
                loss_function = globals()[self.metric]
                loss = loss_function(y, y_pred)
                self.scores.append(loss)
            
    def get_coef(self):
        return self.weights[1:]
    
    def predict(self, X):
        y_pred = self.predict_proba(X)
        return np.where(y_pred >= 0.5, 1, 0)
    
    def predict_proba(self, X):
        xw = -np.dot(X, self.weights)
        return 1/(1+np.exp(xw))
    
    def get_best_score(self):
        loss_function = globals()[self.metric]
        xw = -np.dot(self.X, self.weights)
        y_proba = 1/(1+np.exp(xw))
        if self.metric == 'roc_auc':
            return roc_auc_score(self.y, y_proba)
        elif self.metric != None: 
            y_pred = np.where(y_proba >= 0.5, 1, 0)
            return loss_function(self.y, y_pred)

In [318]:
lr = MyLogReg(100, 0.1, metric='roc_auc', reg='l1')

In [319]:
lr.fit(X, y)

In [316]:
lr.get_best_score()

0.5326141304565218