<a href="https://colab.research.google.com/github/Dicere/Machine_Learning_algorithms_from_scratch/blob/main/Linear_function.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd

In [4]:
from sklearn.datasets import load_diabetes

data = load_diabetes(as_frame=True)
X, y = data['data'], data['target']

In [47]:
class MyLineReg():
    def __init__(self, n_iter=100, learning_rate=0.1, weights=None, metric=None, verbose=1, score=0, reg=None, l1_coef=0, l2_coef=0):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.verbose = verbose
        self.metric = metric
        self.score = score
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef

    def __str__(self):
        attributes = ', '.join(f"{key}={value}" for key, value in vars(self).items())
        return f"MyLineReg class: {attributes}"

    def __repr__(self):
        attributes = ', '.join(f"{key}={value}" for key, value in vars(self).items())
        return f"MyLineReg class: {attributes}"

    def fit(self, X_fit: pd.DataFrame, y_fit: pd.Series, n_iter=None, lr=None, verbose=None):
        X = X_fit.copy()
        y = y_fit.copy()
        X.insert(0, "One", 1)
        n = X.shape[0]
        self.weights = np.ones(X.shape[1])

        def metrics(n, y, y_pred, metrics):
            if metrics == "mae":
                return np.mean(np.abs(y - y_pred))
            if metrics == "mse":
                return np.mean((y - y_pred) ** 2)
            if metrics == "rmse":
                return np.sqrt(np.mean((y - y_pred) ** 2))
            if metrics == "r2":
                mean_y = np.mean(y)
                ss_total = np.sum((y - mean_y) ** 2)
                ss_residual = np.sum((y - y_pred) ** 2)
                r2 = 1 - (ss_residual / ss_total)
                return r2
            if metrics == "mape":
                return (100 / n * sum(abs((y - y_pred) / y)))

        def regularization(self):
            if self.reg == "l1":
                return self.l1_coef * np.sign(self.weights)
            elif self.reg == "l2":
                return self.l2_coef * 2 * self.weights
            elif self.reg == "elasticnet":
                return self.l1_coef * np.sign(self.weights) + self.l2_coef * 2 * self.weights
            else:
                return 0

        if n_iter is not None:
            self.n_iter = n_iter
        if lr is not None:
            self.learning_rate = lr
        if verbose is not None:
            self.verbose = verbose

        for i in range(1, self.n_iter + 1):

            if callable(self.learning_rate):
                lr = self.learning_rate(i)
            else:
                lr = self.learning_rate

            y_pred_vec = X.to_numpy() @ self.weights
            error = (y_pred_vec - y)
            grad_vec = (2 / n) * (error @ (X.to_numpy())) + regularization(self)

            self.weights -= lr * grad_vec
            self.score = metrics(n, y, y_pred_vec, self.metric)

            if i != 0 and self.verbose != 0:
                if i % self.verbose == 0:
                    print(f'{i}|{((y_pred_vec - y) ** 2).sum()}|{self.metric}:{metrics(n, y, y_pred_vec, self.metric)}')

    def get_coef(self):
        return self.weights[1:]

    def predict(self, X_test: pd.DataFrame):
        X = X_test.copy()
        X.insert(0, "One", 1)
        y_pred_vec = X.to_numpy() @ self.weights
        return np.sum(y_pred_vec, axis=1)

    def get_best_score(self):
        if self.score is not None:
            return float(self.score)
        else:
            return 0


In [48]:
instance = MyLineReg(metric = 'mse',reg=None)

In [49]:
lam = lambda iter: 0.5 * (0.85 ** iter)

In [50]:
instance.fit(X,y,n_iter=100,lr=lam,verbose=10)

10|2540314.5483258236|mse:5747.31798263761
20|2523603.089247834|mse:5709.509251691932
30|2520349.113047279|mse:5702.147314586604
40|2519709.8369846754|mse:5700.700988653111
50|2519584.0319597614|mse:5700.416361899913
60|2519559.2661871887|mse:5700.360330740245
70|2519554.390518624|mse:5700.349299815891
80|2519553.430627309|mse:5700.347128116083
90|2519553.2416493953|mse:5700.346700564242
100|2519553.2044444857|mse:5700.346616390239


In [30]:
instance.get_best_score()

5691.124931023203