In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_regression

In [2]:
X, y = make_regression(n_samples=1000, n_features=14, n_informative=10, noise=15, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [3]:
X

Unnamed: 0,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,col_11,col_12,col_13
0,1.191261,-0.308960,0.675137,-1.460501,0.053059,-1.886129,2.710794,-1.716033,0.865290,0.138078,-0.063745,-2.104583,-0.476876,1.677116
1,1.071476,-1.424766,-1.109750,-0.457677,0.399997,1.587401,-1.547570,0.323247,0.165859,-0.302097,0.203944,-0.212452,0.836991,0.368498
2,-0.045929,1.868603,-0.016568,-0.484258,1.089905,-1.147160,0.590744,0.683325,-0.571184,-0.802199,-0.220114,0.034808,0.043829,0.955803
3,-1.487154,2.220322,0.718332,1.682888,-0.420986,-0.054746,1.900832,-0.101198,0.090042,-0.202924,0.340865,0.606237,-0.037008,-0.841048
4,0.344054,0.657763,0.348342,-0.417430,-0.589112,1.057814,-0.487705,-0.897830,-0.935596,-1.186993,1.074333,-0.069532,-0.177918,-0.912811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,-1.422254,0.576557,-0.646573,-0.756351,-0.127918,1.119575,1.687142,-1.081548,-0.955540,3.078881,0.881640,0.311250,-1.606446,0.203464
996,0.190500,-0.132634,0.709452,0.331980,-2.172670,-0.120381,0.513106,-0.435486,0.847422,1.107081,-0.259547,-0.974529,-0.535328,-0.090533
997,-0.326648,-0.062894,2.002427,-0.650657,1.592964,-0.395284,0.360226,-0.307571,1.465211,0.658143,0.541321,-0.447878,-0.891543,0.069704
998,-1.574342,-1.610263,0.407690,1.149487,1.466442,-0.338669,-2.059160,0.581000,-1.409216,-1.082018,0.798501,0.753190,-1.532598,0.269306


In [4]:
y

0      -48.005272
1      145.801614
2      -49.114775
3       24.902238
4     -152.611643
          ...    
995     57.484793
996     19.204280
997     95.095216
998   -107.973750
999    200.616716
Length: 1000, dtype: float64

In [5]:
class MyLineReg():

    def __init__(self, n_iter=100, learning_rate=0.1, weights=None, metric=None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.metric = metric


    def __str__(self) -> str:
        params = [f'{key}={value}' for key, value in self.__dict__.items() if value]
        return 'MyLineReg class: ' + ', '.join(params)


    def fit(self, X, y, verbose=False):
        X['ones_col'] = 1
        X = X[['ones_col'] + list(X.columns[:-1])]
        self.weights = list(1 for feature in range(len(X.columns)))

        if verbose:
            if self.metric:
                print(f'start | loss: {sum((y-np.dot(X, self.weights))**2/len(y))} | metric: {self.metric} = {self.metric_func(X=X, y=y)}')
            else:
                print(f'start | loss: {sum((y-np.dot(X, self.weights))**2/len(y))}')

        for iter in range(self.n_iter):
            y_pred = np.dot(X, self.weights)
            gradient = 2/len(y)*np.dot((y-y_pred), X)
            self.weights = self.weights - self.learning_rate*gradient*(-1)
            mse = sum((y-np.dot(X, self.weights))**2/len(y))
            self.metric_val = self.metric_func(X=X, y=y)
            if verbose:
                if iter%verbose==0 and iter!=0:
                    if self.metric:
                        print(f'{iter} | loss: {mse} | metric: {self.metric} = {self.metric_val}')
                    else:
                        print(f'{iter} | loss: {mse}')


    def get_coef(self):
        return self.weights[1:]
    
    
    def metric_func(self, X, y):
        if self.metric:
            if self.metric == 'mae':
                mtr = sum(np.absolute(y-np.dot(X, self.weights))/len(y))
            elif self.metric == 'mse':
                mtr = sum((y-np.dot(X, self.weights))**2/len(y))
            elif self.metric == 'rmse':
                mtr = (sum((y-np.dot(X, self.weights))**2/len(y)))**(1/2)
            elif self.metric == 'mape':
                mtr = 100*sum(np.absolute((y-np.dot(X, self.weights))/y))/len(y)
            elif self.metric == 'r2':
                mtr = 1 - (sum((y-np.dot(X, self.weights))**2)/sum((y-np.mean(y))**2))
        return mtr


    def get_best_score(self):
        return self.metric_val


    def predict(self, X):
        X['ones_col'] = 1
        X = X[['ones_col'] + list(X.columns[:-1])]
        y_pred = np.dot(X, self.weights)
        return y_pred

In [6]:
lr = MyLineReg(n_iter=500, metric='mape')
print(lr)

MyLineReg class: n_iter=500, learning_rate=0.1, metric=mape


In [7]:
lr.fit(X, y, 100)

start | loss: 20621.08963877847 | metric: mape = 101.14798658984475
100 | loss: 223.6649649270134 | metric: mape = 66.03621928515888
200 | loss: 223.66496492701182 | metric: mape = 66.03621913397846
300 | loss: 223.66496492701177 | metric: mape = 66.03621913397845
400 | loss: 223.66496492701177 | metric: mape = 66.03621913397845


In [8]:
lr.get_best_score()

66.03621913397845

In [9]:
lr.get_coef()

array([42.91964789, 16.59743287,  0.49967404, 65.3746772 , 47.59325115,
       61.77751229,  0.24742549, -0.10563431, 60.01750684, 53.89234101,
       -0.47496737, 14.42781303, 17.68887767, 47.31351427])