In [1]:
import numpy as np
import pandas as pd

In [None]:
class MyLineReg():
    
    
    def __init__(self, n_iter=100, learning_rate=0.1, w=None, metric = None, reg = None, l1_coef=0,l2_coef=0, sgd_sample = None, random_state=42): #class initialization
        self.n_iter= n_iter
        self.learning_rate = learning_rate
        self.w = w
        self.metric = metric
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.sgd_sample = sgd_sample
        self.random_state = random_state
        
    def __repr__(self):
        return f'MyLineReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}, w={self.w}, metric = {self.metric}, reg = {self.reg}, l1_coef={self.l1_coef}, l2_coef={self.l2_coef}, sgd_sample = {self.sgd_sample}, random_state={self.random_state}'
    
    #@classmethod
    def fit(self, X, y, verbose):
        X['new'] = 1
        col = X.pop("new")
        X.insert(0, col.name, col)
        random.seed(self.random_state)
        N_of_features = len(X.columns)
        self.w = np.ones(N_of_features)
        for i in range(int(self.n_iter)):
            y_ = np.dot(X,self.w)
            X_data,y_data = self.select_data(X,y)
            y_pred = np.dot(X_data,self.w)
            MSE = np.square(np.subtract(y,y_)).mean() + self.reg_()[0]
            grad_MSE = (2/len(y_data.values))*np.dot(np.subtract(y_pred,y_data),X_data) + self.reg_()[1]
            if isinstance(self.learning_rate, float)== True:
                self.w = self.w - self.learning_rate*grad_MSE
            else:
                self.w = self.w - self.learning_rate(i+1)*grad_MSE
            if (verbose != False): 
                if (i%verbose ==0):
                    print(f'{i}| loss:{MSE}')
        
                
    def get_coef(self):
        return self.w[1:]
    
    def predict(self,X):
        X['new'] = 1
        col = X.pop("new")
        X.insert(0, col.name, col)
        return np.dot(X, self.w)
    
    def calculate_metric(self, y, y_):
        if self.metric == 'mse':
            return np.square(np.subtract(y,y_)).mean() 
        elif self.metric == 'mae':
            return np.abs(np.subtract(y,y_)).mean()
        elif self.metric == 'rmse':
            return np.sqrt(np.square(np.subtract(y,y_)).mean())
        elif self.metric == 'mape':
            return 100*(np.abs(np.subtract(y,y_)/y)).mean()
        elif self.metric == 'r2':
            return 1 - np.sum(np.square(np.subtract(y,y_)))/np.sum(np.square(np.subtract(y,y.mean())))
        
    def  get_best_score(self):
        self.fit(X,y,False)
        y_ = np.dot(X,self.w)
        return self.calculate_metric(y, y_)

    def  reg_(self):
        reg_loss = 0
        reg_grad = 0
        if self.reg == 'l1':
            reg_loss = self.l1_coef*np.sum(np.abs(self.w))
            reg_grad = self.l1_coef*np.sign(self.w)
        elif self.reg == 'l2':
            reg_loss = self.l2_coef*np.sum(np.square(self.w))
            reg_grad = 2*self.l2_coef*self.w
        elif self.reg == 'elasticnet':
            reg_loss = self.l1_coef*np.sum(np.abs(self.w))+self.l2_coef*np.sum(np.square(self.w))
            reg_grad = self.l1_coef*np.sign(self.w)+2*self.l2_coef*self.w  
        return reg_loss, reg_grad
    
    def select_data(self,X,y):
        if self.sgd_sample == None:
            return X, y
        elif isinstance(self.sgd_sample,int) == True:
            sample_rows_idx = random.sample(range(X.shape[0]), self.sgd_sample)
            return X.iloc[sample_rows_idx,:], y.iloc[sample_rows_idx]
        else:
            sample_rows_idx = random.sample(range(X.shape[0]), int(self.sgd_sample*X.shape[0]))
            return X.iloc[sample_rows_idx,:], y.iloc[sample_rows_idx]