<a href="https://colab.research.google.com/github/OneSll/ML_Algorithms/blob/main/My_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import math
from sklearn.datasets import load_diabetes
from sklearn.datasets import make_regression

data = load_diabetes(as_frame=True)
X, y = data['data'], data['target']

In [85]:
class MyLineReg:

  def __init__(self, n_iter=100, learning_rate=0.1, metric=None):
    self.n_iter = n_iter
    self.learning_rate = learning_rate
    self.weights = None
    self.metric = metric
    self.best_score = None


  def __str__(self):
    oup_string = "MyLineReg class: "
    d = vars(self)
    for key, val in d.items():
      oup_string += str(key) + "=" + str(val) + ", "

    return oup_string

  def get_best_score(self):
      return self.best_score


  @staticmethod
  def _mse_loss(y, y_pred):
        return np.mean((y_pred.T - y) ** 2)


  def _get_metric(self, y, y_hat):
    if (self.metric == "mae"):
      return np.mean(np.fabs(y_hat.T - y))
    elif (self.metric == "mse"):
      return np.mean((y_hat.T - y) ** 2)
    elif (self.metric == "rmse"):
      return (np.mean((y_hat.T - y) ** 2))**(1/2)
    elif (self.metric == "mape"):
      return (np.mean(np.fabs((y_hat.T - y)/ y)) * 100)
    elif (self.metric == "r2"):
      return (1 - (((y_hat.T - y) ** 2).sum() /((y - y.mean()) ** 2).sum()).iloc[0])


  def fit(self, X, y, verbose=False):
    y = y.to_numpy().reshape(-1, 1)
    new_X = X.copy()
    new_X.insert(0, "x_0", [1 for i in range(len(X))]) # add additional column consists of all 1 for x_0
    self.weights = np.ones((1, X.shape[1] + 1)) #vector of weights inited by 1
    for i in range(self.n_iter):
      y_hat = self.weights @ new_X.T
      loss = self._mse_loss(y, y_hat)
      grad = (2 / len(y)) * np.dot(new_X.T, (y_hat.T - y))
      self.weights -= self.learning_rate * grad.T
      if (verbose and ((i % verbose) == 0) and self.metric is None):
        if (i == 0):
          print(f"start|loss: {loss}")
        else:
          print(f"{i}|loss:{loss}")
      elif (verbose and ((i % verbose) == 0) and self.metric is not(None)):
        if (i == 0):
          print(f"start|loss: {loss}|{self.metric}:{self._get_metric(y, y_hat)}")
        else:
          print(f"{i}|loss:{loss}|{self.metric}:{self._get_metric(y, y_hat)}")

    if self.metric is not(None):
      self.best_score = self._get_metric(y, y_hat)


  def get_coef(self):
    return np.mean(self.weights.reshape(-1,1)[1:])

  def predict(self, X):
    new_X = X.copy()
    new_X.insert(0, "x_0", [1 for i in range(len(X))]) # add additional column consists of all 1 for x_0
    y_hat = np.dot(new_X, self.weights.T)
    return y_hat.sum()

In [73]:
test = MyLineReg(50, 0.1, metric = "mae")

In [None]:
test.fit(X, y, verbose = 10)

In [75]:
test.get_best_score()

0.05234897834990915

In [77]:
X, y = make_regression(n_samples=400, n_informative=5, noise=5)
X = pd.DataFrame(X)
y = pd.Series(y)

In [76]:
test2 = MyLineReg(50, 0.1, "mae")

In [78]:
test2.fit(X, y, verbose = 10)

start|loss: 33683.54291951528|mae:144.64879493950062
10|loss:711.6438605442331|mae:21.372086633580047
20|loss:102.68128026627714|mae:8.06150675962899
30|loss:32.6365936814222|mae:4.4940711275948
40|loss:21.238048109578987|mae:3.6052381699072247


In [None]:
test2.predict(X)