In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_regression
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
x,y = make_regression(n_samples = 100, n_features = 2, n_informative = 2, n_targets = 1, noise = 50)


In [3]:
df = pd.DataFrame({'feature1':x[:,0],'feature2':x[:,1],'target':y})

In [4]:
df.shape

(100, 3)

In [5]:
df.head()

Unnamed: 0,feature1,feature2,target
0,0.305659,0.485654,-28.635943
1,2.08312,0.120414,2.231308
2,1.307762,0.372855,26.976144
3,-0.115149,0.771275,21.863847
4,0.208092,-0.46948,-35.140534


In [6]:
fig = px.scatter_3d(df, x='feature1', y='feature2', z = 'target')

In [7]:
fig.show()

In [8]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 2)

In [9]:
from sklearn.linear_model import LinearRegression

In [10]:
lr = LinearRegression()

In [11]:
lr.fit(x_train, y_train)

In [12]:
y_pred = lr.predict(x_test)

In [13]:
print('MAE', mean_absolute_error(y_test, y_pred))
print('MSE', mean_squared_error(y_test, y_pred))
print('R2 Score', r2_score(y_test, y_pred))

MAE 46.3288964220301
MSE 3437.487014987449
R2 Score 0.23839500418344717


In [16]:
x = np.linspace(-5,5,10)
y = np.linspace(-5,5,10)
xGrid, yGrid = np.meshgrid(y, x)

final = np.vstack((xGrid.ravel().reshape(1,100), yGrid.ravel().reshape(1,100))).T

z_final = lr.predict(final).reshape(10, 10)

z = z_final



In [17]:
fig = px.scatter_3d(df, x = 'feature1', y = 'feature2', z = 'target')
fig.add_trace(go.Surface(x = x, y = y, z = z))
fig.show()

In [18]:
lr.coef_

array([16.51994636, 24.5810567 ])

In [19]:
lr.intercept_

np.float64(2.944462048304633)

Creating Multiple Linear Regression Class

In [23]:
from sklearn.datasets import load_diabetes
x,y = load_diabetes(return_X_y = True)

In [24]:
x

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [26]:
x.shape

(442, 10)

In [27]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [29]:
y.shape

(442,)

In [34]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 2)

In [35]:
print(x_train.shape)
print(x_test.shape)

(353, 10)
(89, 10)


In [36]:
reg = LinearRegression()

In [37]:
reg.fit(x_train, y_train)

In [38]:
y_pred = reg.predict(x_test)

In [39]:
r2_score(y_test, y_pred)

0.4399338661568968

In [40]:
reg.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [41]:
reg.intercept_

np.float64(151.88331005254167)

In [90]:
class MLR:
  def __init__(self):
    self.coef_ = None
    self.intercept_ = None

  def fit(self, x_train, y_train):
    # Add a bias (intercept) term to x_train
    X_b = np.insert(x_train, 0, 1, axis = 1)

    # Calculate betas using the normal equation: (X_b.T @ X_b)^-1 @ X_b.T @ y
    betas = np.linalg.inv(np.dot(X_b.T, X_b)).dot(X_b.T).dot(y_train)

    self.intercept_ = betas[0]
    self.coef_ = betas[1:]

  def predict(self, x_test):
    # Ensure x_test aligns with coef_ for multiplication and then add intercept
    return np.dot(x_test, self.coef_) + self.intercept_

In [82]:
lr = MLR()

In [83]:
lr.fit(x_train, y_train)

In [84]:
x_train.shape

(353, 10)

In [85]:
np.insert(x_train, 0 ,1, axis = 1).shape

(353, 11)

In [86]:
y_pred = lr.predict(x_test)

In [87]:
r2_score(y_test, y_pred)

0.43993386615689634

In [88]:
lr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [89]:
lr.intercept_

np.float64(151.88331005254165)