<a href="https://colab.research.google.com/github/Dicere/Machine_Learning_algorithms_from_scratch/blob/main/Linear_function.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [51]:
import numpy as np
import pandas as pd
import random

In [4]:
from sklearn.datasets import load_diabetes

data = load_diabetes(as_frame=True)
X, y = data['data'], data['target']

In [91]:
class MyLineReg():
    def __init__(self, n_iter=100, learning_rate=0.1, weights=None, metric=None, verbose=1, score=0, reg=None, l1_coef=0, l2_coef=0,sgd_sample=None,random_state=42):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.verbose = verbose
        self.metric = metric
        self.score = score
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.sgd_sample  = sgd_sample
        self.random_state  = random_state

    def __str__(self):
        attributes = ', '.join(f"{key}={value}" for key, value in vars(self).items())
        return f"MyLineReg class: {attributes}"

    def __repr__(self):
        attributes = ', '.join(f"{key}={value}" for key, value in vars(self).items())
        return f"MyLineReg class: {attributes}"

    def fit(self, X_fit: pd.DataFrame, y_fit: pd.Series, n_iter=None, lr=None, verbose=None):
        random.seed(self.random_state)
        X = X_fit.copy()
        y = y_fit.copy()
        X.insert(0, "One", 1)
        n = X.shape[0]
        self.weights = np.ones(X.shape[1])

        def metrics(n, y, y_pred, metrics):
            if metrics == "mae":
                return np.mean(np.abs(y - y_pred))
            if metrics == "mse":
                return np.mean((y - y_pred) ** 2)
            if metrics == "rmse":
                return np.sqrt(np.mean((y - y_pred) ** 2))
            if metrics == "r2":
                mean_y = np.mean(y)
                ss_total = np.sum((y - mean_y) ** 2)
                ss_residual = np.sum((y - y_pred) ** 2)
                r2 = 1 - (ss_residual / ss_total)
                return r2
            if metrics == "mape":
                return (100 / n * sum(abs((y - y_pred) / y)))

        def regularization(self):
            if self.reg == "l1":
                return self.l1_coef * np.sign(self.weights)
            elif self.reg == "l2":
                return self.l2_coef * 2 * self.weights
            elif self.reg == "elasticnet":
                return self.l1_coef * np.sign(self.weights) + self.l2_coef * 2 * self.weights
            else:
                return 0

        if n_iter is not None:
            self.n_iter = n_iter
        if lr is not None:
            self.learning_rate = lr
        if verbose is not None:
            self.verbose = verbose
        if self.sgd_sample is None:
            self.sgd_sample = X.shape[0]

        if type(self.sgd_sample) == float:
                self.sgd_sample = int(len(X)*self.sgd_sample)

        for i in range(1, self.n_iter + 1):

            sample_rows_idx = random.sample(range(X.shape[0]), self.sgd_sample)

            X_sgd = X.iloc[sample_rows_idx]
            y_sgd = y.iloc[sample_rows_idx]

            if callable(self.learning_rate):
                lr = self.learning_rate(i)
            else:
                lr = self.learning_rate

            ### pred vec on sample
            y_pred_vec_sgd = X_sgd.to_numpy() @ self.weights
            ### pred vec on all data
            y_pred_vec = X.to_numpy() @ self.weights
            ### sgd grad
            err=(y_pred_vec_sgd - y_sgd)
            grad_vec = (2 / X_sgd.shape[0]) * (err @ (X_sgd.to_numpy())) + regularization(self)
            ### calc weights
            self.weights -= lr * grad_vec
            self.score = metrics(n, y, y_pred_vec, self.metric)

            if i != 0 and self.verbose != 0:
                if i % self.verbose == 0:
                    print(f'{i}|{(np.mean((y - y_pred_vec) ** 2))}|{self.metric}:{metrics(n, y, y_pred_vec, self.metric)}')

    def get_coef(self):
        return self.weights[1:]

    def predict(self, X_test: pd.DataFrame):
        X = X_test.copy()
        X.insert(0, "One", 1)
        y_pred_vec = X.to_numpy() @ self.weights
        return np.sum(y_pred_vec, axis=1)

    def get_best_score(self):
        if self.score is not None:
            return float(self.score)
        else:
            return 0


In [92]:
instance = MyLineReg(metric = 'rmse',reg=None)

In [93]:
instance.fit(X,y,n_iter=100,lr=0.01,verbose=10)

10|21781.535493513606|rmse:147.58568864735363
20|16496.15491751777|rmse:128.43735795132883
30|12965.051224299325|rmse:113.86417884611176
40|10605.124985637844|rmse:102.98118753266465
50|9027.0943460392|rmse:95.01102223447131
60|7971.071606731548|rmse:89.28085800848662
70|7263.553639572946|rmse:85.22648437881821
80|6788.708344513858|rmse:82.39361834823045
90|6469.205259701399|rmse:80.43136987333611
100|6253.417923970879|rmse:79.07855539886195


In [90]:
instance.get_best_score()

79.07855539886195