In [1]:
import numpy as np
import pandas as pd

In [None]:
class MyLogReg():
    
      
    def __init__(self, n_iter=10, learning_rate=0.1, w=None, metric=None,reg = None, l1_coef=0,l2_coef=0,sgd_sample = None, random_state=42): #cla ss initialization
        self.n_iter= n_iter
        self.learning_rate = learning_rate
        self.w = w
        self.metric = metric
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.sgd_sample = sgd_sample
        self.random_state = random_state
     
        
    def __repr__(self):
        return f'MyLogReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}'
    
    def fit(self, X, y, verbose=False):
        eps = 1e-15
        X['new'] = 1
        col = X.pop("new")
        X.insert(0, col.name, col)
        random.seed(self.random_state)
        N_of_features = len(X.columns)
        self.w = np.ones(N_of_features)
        for i in range(int(self.n_iter)):               
            y_ = 1/(1+np.exp(-np.dot(X,self.w)))
            X_data,y_data = self.select_data(X,y)
            y_pred = 1/(1+np.exp(-np.dot(X_data,self.w)))
            LogLoss = -(y*np.log(y_+eps)+(1-y)*(1-np.log(1-y_+eps))).mean() + self.reg_()[0]
            grad_Log = (1/len(y_data))*np.dot(np.subtract(y_pred,y_data),X_data) + self.reg_()[1]
            if isinstance(self.learning_rate, float)== True:
                self.w = self.w - self.learning_rate*grad_Log
            else:
                self.w = self.w - self.learning_rate(i+1)*grad_Log
            if (verbose != False): 
                if (i%verbose ==0):
                    print(f'{i}| loss:{LogLoss}')
        
                
    def get_coef(self):
        
        return self.w[1:]
    
    def predict_proba(self,X):
        X['new'] = 1
        col = X.pop("new")
        X.insert(0, col.name, col)
        return 1/(1+np.exp(-np.dot(X,self.w)))
    
    def predict(self,X):
        X['new'] = 1
        col = X.pop("new")
        X.insert(0, col.name, col)
        P = 1/(1+np.exp(-np.dot(X,self.w)))
        classes = np.zeros(P.shape,dtype=np.int)
        classes[np.where(P > 0.5)]=1
        return classes
    
    def get_best_score(self):
        y_classes = self.predict(X)
        TP=np.count_nonzero((y==1)&(y_classes==1))
        TN = np.count_nonzero((y==0)&(y_classes==0))
        FP = np.count_nonzero((y==0)&(y_classes==1))
        FN = np.count_nonzero((y==1)&(y_classes==0))
        return self.calculate_metric(TP, TN, FP, FN)
    
    def calculate_metric(self, TP, TN, FP, FN, beta = 1):
        global y
        if self.metric == 'accuracy':
            return (TP+TN)/(TP+TN+FP+FN)
        elif self.metric == 'precision':
            return TP/(TP+FP)
        elif self.metric == 'recall':
            return TP/(TP+FN)
        elif self.metric == 'f1':
            pres = TP/(TP+FP)
            rec = TP/(TP+FN)
            return (1+np.square(beta))*pres*rec/(np.square(beta)*pres + rec)
        elif self.metric == 'roc_auc':
            probs = self.predict_proba(X)
            sorted_idx = np.argsort(-probs)
            probs_sorted = probs[sorted_idx]
            y_sorted = y[sorted_idx]
            
            sum=0.
            P = len(np.where(y==1)[0])
            N = len(np.where(y==0)[0])
            
            
            for prob, class_ in zip(probs_sorted,y_sorted):
                if class_ == 0:
                    sum = sum + len(np.where(y_sorted[probs_sorted > prob]==1)[0])
                    sum = sum + 0.5*len(np.where(y_sorted[probs_sorted == prob]==1)[0])
            return sum/(P*N)       
        
    def  reg_(self):
        reg_loss = 0
        reg_grad = 0
        if self.reg == 'l1':
            reg_loss = self.l1_coef*np.sum(np.abs(self.w))
            reg_grad = self.l1_coef*np.sign(self.w)
        elif self.reg == 'l2':
            reg_loss = self.l2_coef*np.sum(np.square(self.w))
            reg_grad = 2*self.l2_coef*self.w
        elif self.reg == 'elasticnet':
            reg_loss = self.l1_coef*np.sum(np.abs(self.w))+self.l2_coef*np.sum(np.square(self.w))
            reg_grad = self.l1_coef*np.sign(self.w)+2*self.l2_coef*self.w  
        return reg_loss, reg_grad
    
    def select_data(self,X,y):
        if self.sgd_sample == None:
            return X, y
        elif isinstance(sgd_sample,int) == True:
            sample_rows_idx = random.sample(range(X.shape[0]), self.sgd_sample)
            return X.iloc[sample_rows_idx,:], y.iloc[sample_rows_idx]
        else:
            sample_rows_idx = random.sample(range(X.shape[0]), int(self.sgd_sample*X.shape[0]))
            return X.iloc[sample_rows_idx,:], y.iloc[sample_rows_idx]