# Multiple Linear Regression from Scratch
## Ordinary Least Square

# Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Class 

In [3]:
import numpy as np

class MultipleLinearRegression:
    def __init__(self):
        """
        Initialize a MultipleLinearRegression object.
        """
        self.coeff_ = None # beta 1 to beta n values
        self.intercept_ = None # beta 0 value

    def fit(self, X, y):
        """
        Fit the linear regression model.

        Parameters:
        X (numpy.ndarray): The input feature matrix with shape (n_samples, n_features).
        y (numpy.ndarray): The target values with shape (n_samples,).

        Computes and sets the coefficients (betas) and intercept for the linear regression model.
        """
        X = np.insert(X, 0, 1, axis=1) #Diindex ke 0 ditambah 1 semua, axis 1 = kebawah
        
        betas = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)), X.T), y) # follows the equation as above
        self.intercept_ = betas[0]
        self.coeff_ = betas[1:]

    def predict(self, X):
        """
        Make predictions using the linear regression model.

        Parameters:
        X (numpy.ndarray): The input feature matrix for which predictions are to be made with shape (n_samples, n_features).

        Returns:
        numpy.ndarray: Predicted target values with shape (n_samples,).
        """

        y_pred = np.dot(X, self.coeff_) * self.intercept_
        return y_pred

        
        

# Training Model

Splitting the dataset into training and test set.

In [4]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=22)

Fitting the MLR Model to the training set

In [5]:
mlr = MultipleLinearRegression()
mlr.fit(X_train, y_train)
mlr.coeff_, mlr.intercept_

(array([  28.58297885, -260.7460662 ,  494.21420338,  294.55710928,
        -911.28612744,  601.93675888,   64.93690777,  140.09313478,
         813.06920641,   76.14511679]),
 151.34554771656755)

Predicting the Test Result

In [6]:
y_pred = mlr.predict(X_test)

# Evaluation

Evaluation Matrix

In [7]:
from sklearn.metrics import mean_squared_error, r2_score

mae = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print('Multiple Linear Regression')
print(f'Mean Squarred Error: {mae}')
print(f'R2 Score: {r2}')

Multiple Linear Regression
Mean Squarred Error: 78726415.20740592
R2 Score: -12840.401387693682
