In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression
import random

In [2]:
X, y = make_regression(n_samples=1000, n_features=14, n_informative=10, noise=15, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [3]:
def mse(y, y_pred):
    return np.mean((y - y_pred) ** 2)    

In [4]:
def mae(y, y_pred):
    return np.mean(abs(y - y_pred))

In [5]:
def rmse(y, y_pred):
    return np.sqrt(mse(y, y_pred))

In [6]:
def r2(y, y_pred):
    mean_y = np.mean(y)
    return 1 - np.sum((y-y_pred)**2)/np.sum((y-mean_y)**2)

In [7]:
def mape(y, y_pred):
    return 100 * np.mean(abs((y-y_pred)/y))

In [103]:
class MyLineReg:
    def __init__(self, n_iter, learning_rate, metric = None, reg = None, l1_coef=0, l2_coef=0, random_state=42):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = None
        self.metric = metric
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.sgd_sample = sgd_sample
        self.random_state = random_state
        
        self.scores = []

    def __str__(self):
        return f"MyLineReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"
    
    def l1(self):
        return self.l1_coef * np.sign(self.weights) 
    
    def l2(self):
        return self.l2_coef * 2*self.weights
    
    def elasticnet(self):
        return self.l1() + self.l2()
    
    def get_idx(self):
        return random.sample(range(self.X.shape[0]), self.sgd_sample)

    def fit(self, X, y, verbose=False):
        random.seed(self.random_state)
        n_samples, m_features = X.shape
        
        self.y = y
        self.X = np.hstack((np.ones((n_samples, 1)), X))
        self.weights = np.ones(m_features + 1)
        
        
        for i in range(self.n_iter):
            y_pred = np.dot(self.X, self.weights)
            
            if self.metric != None: 
                loss_function = globals()[self.metric]
                loss = loss_function(y, y_pred)
                self.scores.append(loss)
                
            gradient = (2 / n_samples) * np.dot(self.X.T, (y_pred - y))
            
            if self.reg == 'l1':
                gradient += self.l1()
            elif self.reg == 'l2':
                gradient += self.l2()
            elif self.reg == 'elasticnet':
                gradient += self.elasticnet()
                
            learning_rate = 0.1    
            if callable(self.learning_rate):
                learning_rate = self.learning_rate(i)
            else: 
                learning_rate = self.learning_rate    
                
            self.weights -= learning_rate * gradient
            
    def predict(self, X):
        return np.dot(X, self.weights)
            
    def get_coef(self):
        return self.weights[1:]
    
    def get_best_score(self):
        loss_function = globals()[self.metric]
        y_pred = np.dot(self.X, self.weights)   
        return loss_function(self.y, y_pred)

In [104]:
X.shape

(1000, 14)

In [105]:
learning_rate = lambda iter: 0.5 * (0.85 ** iter)

In [106]:
lr = MyLineReg(50, learning_rate)

In [107]:
lr.fit(X, y)

In [108]:
lr.get_coef().mean()

30.550000309795404