In [None]:
%config IPCompleter.use_jedi = False
%config Completer.evaluation = 'limited'
import warnings
warnings.filterwarnings('ignore')

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

<h2> Let's Make the Linear Regression Class

In [2]:
import numpy as np

class LinearRegression:
    # constructor
    def __init__(self):
        self.m = None
        self.b = None 
        self.learning_rate = None 
        self.epochs = None 
        
    # gradient descent where we will find the value of m and b 
    # equation: y = mx + b 
    # where m is the slope or called weight , and b is called intercept(bias)
    # we will use gradient descent to minimize the mean squared error [MSE]
    def gradient_descent(self , X_train , y_train , learning_rate , epochs):
        # set the learning rate
        self.learning_rate = learning_rate
        # In gradient descent we start with m and b by initializing 0
        self.m , self.b = 0 , 0

        # In gradient descent we perform a finite steps to find the value of m and b which is called epoch
        self.epochs = epochs
        n = len(X_train) # size of the data

        # Now perform the task
        for epoch in range(self.epochs):
            # Predict y using current m and b
            y_pred = self.m * X_train + self.b

            # Find derivative with respect to m
            dm = (-2 / n) * sum(X_train * (y_train - y_pred))
            # Find derivative with respect to b 
            db = (-2 / n) * sum((y_train - y_pred))

            # update m and b 
            self.m = self.m - self.learning_rate * dm
            self.b = self.b - self.learning_rate * db

            # See the update at each 20th iteration
            if epoch % 20 == 0:
               print(f"Iteration = {epoch} - m : {self.m} - b = {self.b}")
            
    # Make the predict function
    def predict(self , X_test):
        # Task for each value x find y = mx + b 
        return self.m * X_test + self.b 
         
    # Make MAE
    def mae(self , y_test , y_pred):
        return np.mean(np.abs(y_test - y_pred))
    
    # Make MSE
    def mse(self , y_test , y_pred):
        error = (y_test - y_pred) ** 2
        return error.mean()
    
    # Make RMSE
    def rmse(self , y_test , y_pred):
        return np.sqrt(self.mse(y_test, y_pred))
    
    # Find R2 score
    def r2_score(self , y_test , y_pred):
        # Find the sum of squared error in the regression line 
        ssr = np.sum((y_test - y_pred) ** 2)
        # Find the total sum of squares (difference from mean line)
        sst = np.sum((y_test - np.mean(y_test)) ** 2)
        return 1 - (ssr / sst)


<h2> Load the data

In [3]:
df = pd.read_csv('placement.csv')
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [4]:
X = df.iloc[ : , 0].values
y = df.iloc[ : , 1].values 

In [47]:
X 

array([6.89, 5.12, 7.82, 7.42, 6.94, 7.89, 6.73, 6.75, 6.09, 8.31, 5.32,
       6.61, 8.94, 6.93, 7.73, 7.25, 6.84, 5.38, 6.94, 7.48, 7.28, 6.85,
       6.14, 6.19, 6.53, 7.28, 8.31, 5.42, 5.94, 7.15, 7.36, 8.1 , 6.96,
       6.35, 7.34, 6.87, 5.99, 5.9 , 8.62, 7.43, 9.38, 6.89, 5.95, 7.66,
       5.09, 7.87, 6.07, 5.84, 8.63, 8.87, 9.58, 9.26, 8.37, 6.47, 6.86,
       8.2 , 5.84, 6.6 , 6.92, 7.56, 5.61, 5.48, 6.34, 9.16, 7.36, 7.6 ,
       5.11, 6.51, 7.56, 7.3 , 5.79, 7.47, 7.78, 8.44, 6.85, 6.97, 6.94,
       8.99, 6.59, 7.18, 7.63, 6.1 , 5.58, 8.44, 4.26, 4.79, 7.61, 8.09,
       4.73, 6.42, 7.11, 6.22, 7.9 , 6.79, 5.83, 6.63, 7.11, 5.98, 7.69,
       6.61, 7.95, 6.71, 5.13, 7.05, 7.62, 6.66, 6.13, 6.33, 7.76, 7.77,
       8.18, 5.42, 8.58, 6.94, 5.84, 8.35, 9.04, 7.12, 7.4 , 7.39, 5.23,
       6.5 , 5.12, 5.1 , 6.06, 7.33, 5.91, 6.78, 7.93, 7.29, 6.68, 6.37,
       5.84, 6.05, 7.2 , 6.1 , 5.64, 7.14, 7.91, 7.19, 7.91, 6.76, 6.93,
       4.85, 6.17, 5.84, 6.07, 5.66, 7.57, 8.28, 6.

In [5]:
y 

array([3.26, 1.98, 3.25, 3.67, 3.57, 2.99, 2.6 , 2.48, 2.31, 3.51, 1.86,
       2.6 , 3.65, 2.89, 3.42, 3.23, 2.35, 2.09, 2.98, 2.83, 3.16, 2.93,
       2.3 , 2.48, 2.71, 3.65, 3.42, 2.16, 2.24, 3.49, 3.26, 3.89, 3.08,
       2.73, 3.42, 2.87, 2.84, 2.43, 4.36, 3.33, 4.02, 2.7 , 2.54, 2.76,
       1.86, 3.58, 2.26, 3.26, 4.09, 4.62, 4.43, 3.79, 4.11, 2.61, 3.09,
       3.39, 2.74, 1.94, 3.09, 3.31, 2.19, 1.61, 2.09, 4.25, 2.92, 3.81,
       1.63, 2.89, 2.99, 2.94, 2.35, 3.34, 3.62, 4.03, 3.44, 3.28, 3.15,
       4.6 , 2.21, 3.  , 3.44, 2.2 , 2.17, 3.49, 1.53, 1.48, 2.77, 3.55,
       1.48, 2.72, 2.66, 2.14, 4.  , 3.08, 2.42, 2.79, 2.61, 2.84, 3.83,
       3.24, 4.14, 3.52, 1.37, 3.  , 3.74, 2.82, 2.19, 2.59, 3.54, 4.06,
       3.76, 2.25, 4.1 , 2.37, 1.87, 4.21, 3.33, 2.99, 2.88, 2.65, 1.73,
       3.02, 2.01, 2.3 , 2.31, 3.16, 2.6 , 3.11, 3.34, 3.12, 2.49, 2.01,
       2.48, 2.58, 2.83, 2.6 , 2.1 , 3.13, 3.89, 2.4 , 3.15, 3.18, 3.04,
       1.54, 2.42, 2.18, 2.46, 2.21, 3.4 , 3.67, 2.

In [6]:
# --- Train-test split ---
from sklearn.model_selection import  train_test_split

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 2) 

In [7]:
y_train

array([3.13, 3.91, 2.25, 2.3 , 4.06, 3.18, 2.7 , 2.49, 3.15, 2.99, 4.08,
       4.14, 2.6 , 2.44, 2.71, 2.72, 1.63, 2.31, 2.51, 3.5 , 2.24, 2.58,
       2.42, 2.54, 4.37, 2.17, 3.14, 2.19, 3.54, 1.54, 2.72, 3.98, 2.46,
       3.  , 1.98, 2.65, 3.75, 3.67, 2.93, 3.08, 2.21, 2.84, 3.76, 3.03,
       2.09, 3.26, 2.16, 2.7 , 3.83, 2.88, 3.16, 3.04, 3.13, 3.04, 4.6 ,
       3.26, 2.85, 2.09, 3.31, 3.89, 1.87, 3.11, 2.4 , 3.48, 3.08, 2.76,
       2.93, 3.39, 2.35, 3.08, 2.14, 3.24, 1.73, 3.24, 3.44, 2.69, 3.33,
       2.42, 2.57, 3.55, 2.66, 1.53, 2.98, 2.84, 3.52, 3.16, 3.82, 2.6 ,
       1.61, 2.2 , 2.99, 3.12, 2.74, 2.83, 2.77, 2.35, 2.19, 3.42, 4.02,
       3.89, 3.15, 3.99, 3.42, 3.08, 2.46, 2.61, 3.49, 3.76, 2.82, 4.03,
       2.99, 2.73, 2.18, 3.09, 3.96, 2.1 , 3.33, 4.62, 3.47, 2.48, 4.43,
       4.11, 3.44, 3.18, 3.02, 4.06, 2.26, 1.48, 2.94, 2.89, 2.72, 3.09,
       2.73, 4.36, 3.  , 3.79, 2.59, 2.87, 3.57, 1.37, 3.6 , 2.43, 3.33,
       2.31, 3.4 , 3.89, 4.25, 3.26, 3.67, 2.79, 3.

In [9]:
# --- Train your model ---
model = LinearRegression()
model.gradient_descent(X_train , y_train , learning_rate = 0.001 , epochs = 1000)

Iteration = 0 - m : 0.04325971874999999 - b = 0.006007875
Iteration = 20 - m : 0.38005731499038725 - b = 0.05225316617240961
Iteration = 40 - m : 0.41937825534710826 - b = 0.056887927284660245
Iteration = 60 - m : 0.4240615169140111 - b = 0.05667830859813803
Iteration = 80 - m : 0.4247115433907738 - b = 0.05590528635631451
Iteration = 100 - m : 0.4248918549362763 - b = 0.05506732825897218
Iteration = 120 - m : 0.42501738077734563 - b = 0.05422247429385271
Iteration = 140 - m : 0.4251364345241412 - b = 0.05337748192929727
Iteration = 160 - m : 0.4252546417992468 - b = 0.05253313740041286
Iteration = 180 - m : 0.42537265772146987 - b = 0.05168953166440392
Iteration = 200 - m : 0.4254905586540481 - b = 0.05084667472223462
Iteration = 220 - m : 0.4256083535710078 - b = 0.050004567149011504
Iteration = 240 - m : 0.4257260435996369 - b = 0.04916320842283719
Iteration = 260 - m : 0.42584362895349204 - b = 0.04832259789460508
Iteration = 280 - m : 0.42596110973966317 - b = 0.04748273490091975


In [10]:
m = model.m
b = model.b 
print(f"m = {m}")
print(f"b = {b}")

m = 0.430115869083091
b = 0.017780613036703946


In [11]:
y_pred = model.predict(X_test)

In [56]:
y_test[0]

np.float64(4.1)

In [57]:
y_pred[0]

np.float64(3.7081747697696246)

In [58]:
# ---  Print metrics ---
print("MAE:", model.mae(y_test , y_pred))
print("MSE:", model.mse(y_test, y_pred))
print("RMSE:", model.rmse(y_test, y_pred))
print("R² Score:", model.r2_score(y_test, y_pred))

MAE: 0.33544554080376293
MSE: 0.15915801113153374
RMSE: 0.3989461256003544
R² Score: 0.7122773800544345
