Libraries

In [None]:

%matplotlib qt5
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.linear_model import LinearRegression

: 

In [2]:
data = pd.read_csv("../Datasets/Salary_Data.csv")

X = np.array(data.iloc[:, 0])
y = np.array(data.iloc[:, 1])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

def draw_test_and_train_data(ax):
    ax.scatter(X_train, y_train, label="Train", s=75, c="#1f77b4")
    ax.scatter(X_test, y_test, label="Test", marker="x", s=75, c="#1f77b4")
    ax.set_xlabel("Years experience", fontsize=14)
    ax.set_ylabel("Salary", fontsize=14)
    ax.legend()


In [3]:

def ssr(m, b):
    sum = 0
    for i in range(len(X)):
      sum += (y[i] - (m * X[i] + b) ) ** 2
    
    return sum

def draw_prediction_line(ax, min, max, m, b):
    x = np.arange(min-1, max+1)
    y =  m * x + b
    ax.plot(x, y)

In [4]:
class LinearRegressionModel():
    def __init__(self):
        self.X = None
        self.y = None

    def ssr_gradient(self, m, b):
        sum_m = 0
        sum_b = 0
        n = len(self.X)
        for i in range(n):
            error = self.y[i] - (m * self.X[i] + b)
            derivative_m = -2 * self.X[i] * error  # Derivative w.r.t. m
            derivative_b = -2 * error              # Derivative w.r.t. b
            sum_m += derivative_m
            sum_b += derivative_b

        return sum_m, sum_b
    
    def fit(self, X, y, m, b): # Gradient Descent
        self.X = X
        self.y = y

        current_pos = (m, b)
        learning_rate = 0.001
        min_step_size = 0.001
        max_steps = 10000
        current_steps = 0

        while(current_steps < max_steps):
            M_derivative, B_derivative = self.ssr_gradient(current_pos[0], current_pos[1])
            M_step_size, B_step_size = M_derivative * learning_rate, B_derivative * learning_rate

            if  abs(M_step_size) < min_step_size and abs(B_step_size) < min_step_size:
                break

            M_new, B_new = current_pos[0] - M_step_size, current_pos[1] - B_step_size
            
            current_pos = (M_new, B_new)

            print(f"Parameters: m: {current_pos[0]}; b: {current_pos[1]}")
            print(f"Epoch: {current_steps}")
            print(f"SSR: {ssr(current_pos[0], current_pos[1])}")

            current_steps += 1
        
        self.m = current_pos[0]
        self.b = current_pos[1]

    def predict(self, X_test):
        return self.m * X_test + self.b

In [5]:
lin_reg_model = LinearRegressionModel()
lin_reg_model.fit(X_train, y_train, 9911, 9221)

sklearn_lin_reg_model = LinearRegression()
sklearn_lin_reg_model.fit(X_train.reshape(-1, 1), y_train)

plt.close("all")
fig, (ax, ax_sklearn) = plt.subplots(1, 2, figsize=(10, 5))
ax.set_title(f"Own Lin-Reg-model")
ax_sklearn.set_title("Sklearn Lin-Reg-model")

draw_test_and_train_data(ax)
draw_test_and_train_data(ax_sklearn)

m_own = round(lin_reg_model.m, 2)
b_own = round(lin_reg_model.b, 2)
m_sklearn = round(sklearn_lin_reg_model.coef_[0], 2)
b_sklearn = round(sklearn_lin_reg_model.intercept_, 2)
ssr_own = round(ssr(m_own, b_own), 2)
ssr_sklearn = round(ssr(m_sklearn, b_sklearn), 2)

draw_prediction_line(ax, np.min(X), np.max(X), m_own, b_own)
draw_prediction_line(ax_sklearn, np.min(X), np.max(X), m_sklearn, b_sklearn)

for years_experience, salary in zip(X_test, y_test):
    predicted_salary = lin_reg_model.predict(years_experience)
    #print(f"Experience: {years_experience} years - Salary: {salary} - Predicted salary: {predicted_salary}")
    ax.plot([years_experience, years_experience], [salary, predicted_salary], lw=1, c="red")

    sklearn_predicted_salary = sklearn_lin_reg_model.predict(years_experience.reshape(-1, 1))[0]
    #print(f"Experience: {years_experience} years - Salary: {salary} - Predicted salary: {sklearn_predicted_salary}")
    ax_sklearn.plot([years_experience, years_experience], [salary, sklearn_predicted_salary], lw=1, c="red")


print(f"Own ssr = {ssr_own} with m = {m_own} and b = {b_own}")
print(f"Sklearn ssr = {ssr_sklearn} with m = {m_sklearn} and b = {b_sklearn}")
print(f"Sklearn score {sklearn_lin_reg_model.score(X_test.reshape(-1, 1), y_test)}")


Parameters: m: 11916.05122; b: 9592.0382
Epoch: 0
SSR: 2646257380.8947744
Parameters: m: 11477.7434462804; b: 9611.487556844
Epoch: 1
SSR: 2775367495.543277
Parameters: m: 11556.483394573122; b: 9704.778092960269
Epoch: 2
SSR: 2693275496.921913
Parameters: m: 11525.904118019149; b: 9781.899869772944
Epoch: 3
SSR: 2688691128.520905
Parameters: m: 11518.53266930173; b: 9861.900354440104
Epoch: 4
SSR: 2669404434.074282
Parameters: m: 11506.328374776172; b: 9940.752496559468
Epoch: 5
SSR: 2653501743.338522
Parameters: m: 11495.2236808936; b: 10019.31136362569
Epoch: 6
SSR: 2637096311.9894204
Parameters: m: 11483.962961866693; b: 10097.399041701185
Epoch: 7
SSR: 2621004414.5996614
Parameters: m: 11472.811455516932; b: 10175.056159525184
Epoch: 8
SSR: 2605051194.6705594
Parameters: m: 11461.712615868964; b: 10252.277089592653
Epoch: 9
SSR: 2589271384.32704
Parameters: m: 11450.677982748815; b: 10329.065974730323
Epoch: 10
SSR: 2573655455.137651
Parameters: m: 11439.704692679867; b: 10405.424

Linear regression class