In [None]:
import numpy as np

def f1_score(x: np.array, y: np.array, coef: np.array, b: float, f1_score_obj: bool = 0):
    """
    A function that calculates precision, recall and F1-score for a linear
    regression model.

    Parameters:
    x: Array with values of variables
    y: Array with values of dependent value
    coef: Array with coefficients for variables
    b: Constant intercept for linear regression
    f1_score_obj: Boolean value that is objective for the calculation. Equals 0 por default
    result: A tuple with precision, recall and F1-score
    """

    conf_matrix = np.array([[0, 0], [0, 0]])
    for i in range(len(y)):
        y_pred = np.round(np.sum(coef * x[i]) + b)
        if y[i] == f1_score_obj and y_pred == f1_score_obj:
            conf_matrix[0,0] += 1
        elif y[i] == f1_score_obj and y_pred != f1_score_obj:
            conf_matrix[1,0] += 1
        elif y[i] != f1_score_obj and y_pred == f1_score_obj:
            conf_matrix[0,1] += 1
        else:
            conf_matrix[1,1] += 1

    precision = conf_matrix[0,0] / (conf_matrix[0,0] + conf_matrix[0,1])
    recall = conf_matrix[0,0] / (conf_matrix[0,0] + conf_matrix[1,0])
    f1 = (2 * precision * recall) / (precision + recall)

    return (precision, recall, f1, conf_matrix, y_pred)


# def linear_regression(x, y, learning_speed = 0.1, echoes = 1000, approach = 'mse', f1 = False, f1_obj = 0):

#     n = len(x)
#     a = 0.0
#     b = 0.0

#     for _ in range(echoes):
#         y_pred = a * x + b

#         if approach == 'mse':
#             da = (2 / n) * np.sum((x) * (a * x + b - y))
#             db = (2 / n) * np.sum((1) * (a * x + b - y))
#         elif approach == 'mae':
#             da = (1 / n) * np.sum((x) * (a * x + b - y))
#             db = (1 / n) * np.sum((1) * (a * x + b - y))

#         a -= learning_speed * da
#         b -= learning_speed * db

#     print(f'a = {a}')
#     print(f'b = {b}')

#     if f1 == True:
#         temp = f1_score(x, y, a, b, f1_obj)

#         print(f'Precision = {temp[0]}')
#         print(f'Recall = {temp[1]}')
#         print(f'F1-score = {temp[2]}')
#         print(f'matrix = {temp[3]}')
#         print(f'matrix = {temp[4]}')

def linear_regression(x: np.array, y: np.array, learning_speed: float = 0.1, echoes: int = 1000, approach: str = 'mse', f1: bool = False, f1_obj: bool = 0): # limit approach options
      """
      A function that calculates coeficients of linear regression trough optimization
      of selected metrics and returs F1 metrics if requested.

      Parameters:
      x: Array with values of variables
      y: Array with values of dependent value
      learning_speed: Learning speed for optimization
      echoes: Number of iterations for optimization
      approach: Selected metrics for optimization. Realized are MSE and MAE
      f1: Value that requests F1 metrics. False por default
      f1_obj: Boolean value that is objective for the calculation. Equals 0 por default
      """


      n = len(y)
      coef = np.zeros(len(x[0]) + 1)
      # print(coef)

      # for val in range(len(x[0])): # Array with initial coefficients
      #     coef = np.append(coef, 0) # np.zeros()

      for _ in range(echoes):

  # Calculations for MSE
          if approach == 'mse':
              d = np.array([]) # Array to contain a step for each coef
              db = 0

              for idx in range(len(coef)):
                  d_coef = 0
                  for row in range(len(x)):
                      if idx == len(coef) - 1:
                          d_coef += (1) * (np.sum(coef[:-1] * x[row]) + coef[-1] - y[row])
                      else:
                          d_coef += (x[row, idx]) * (np.sum(coef[:-1] * x[row]) - y[row] + coef[-1]) # Sum of derivatives of (V1X1 + ... + VnXn + b - y) related to coef idx
                  d = np.append(d, (2 / n) * d_coef)

              coef -= learning_speed * d
            # print(d)
            # for row in range(len(x)):
            #     db += (1) * (np.sum(coef * x[row]) + b - y[row])
            # b -= learning_speed * ((2 / n) * db) #optimization. B is calculated after all the other coef

# Calculations for MAE
          elif approach == 'mae':
            d = np.array([]) # Array to contain a step for each coef
            db = 0
            for idx in range(len(coef)):
                d_coef = 0
                for row in range(len(x)):
                    if idx == len(coef) - 1:
                        d_coef += (1) * (np.sum(coef[:-1] * x[row]) + coef[-1] - y[row])
                    else:
                        d_coef += (x[row, idx]) * (np.sum(coef[:-1] * x[row]) - y[row] + coef[-1]) # Sum of derivatives of (V1X1 + ... + VnXn + b - y) related to coef idx
                d = np.append(d, (1 / n) * d_coef)
            coef -= learning_speed * d
            # for row in range(len(x)):
            #     db += (1) * (np.sum(coef * x[row]) + b - y[row])
            # b -= learning_speed * ((1 / n) * db)

      print(f'coef = {coef[:-1]}')
      print(f'b = {coef[-1]}')

# Calculations for F1
      if f1 == True:
          temp = f1_score(x, y, coef, b, f1_obj)
          print(f'Precision = {temp[0]}')
          print(f'Recall = {temp[1]}')
          print(f'F1-score = {temp[2]}')




In [None]:
x = np.array([[3], [5], [7], [8]])
y = np.array([7, 11, 15, 17])

# x = np.array([3, 5, 7, 8])
# y = np.array([15, 25, 35, 40])

linear_regression(x, y, learning_speed = 0.025, approach = 'mse')

In [None]:
x = np.array([[0.75], [0.04], [0.6], [0.1], [0.8]])
y = np.array([0, 1, 1, 0, 1])

linear_regression(x, y, learning_speed = 0.025, approach = 'mse', f1 = True, f1_obj = 1)

In [None]:
# x = np.array([[2, 3], [7, 5]]) # coef1 = 4, coef2 = 0.5, b = 2
# y = np.array([11.5, 32.5])

# x = np.array([[2, 3], [7, 5]]) # coef1 = 1, coef2 = 1, b = 0
# y = np.array([5, 12])

x1_rnd = np.array([])
x2_rnd = np.array([])
c1 = 5
c2 = 0.75
b = 15
for _ in range(500):
    x1_rnd = np.append(x1_rnd, np.random.randint(1, 100))
    x2_rnd = np.append(x2_rnd, np.random.randint(1, 100))

combo = zip(x1_rnd, x2_rnd)
x = np.array([list(elem) for elem in combo])
y_rnd = c1 * x1_rnd + c2 * x2_rnd + b

linear_regression(x, y_rnd, learning_speed = 0.0001, echoes = 2000)

coef = [5.11684285 0.8789533 ]
b = 0.9677832159795476


In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(x, y_rnd)
print(reg.coef_)
print(reg.intercept_)

[5.   0.75]
15.0


In [None]:
len(y_rnd)

500