# Manual Implementation

Actually, there are two different methods to find the optimal coefficients (theta) for a linear regression model. These methods are:
1. Gradient Descent (Utilizing gradients; Optimization through iteration)

 ![Gradient-desc](https://images.datacamp.com/image/upload/v1649238227/image1_b93ju9.png)

2. Normal Equation (Utilizing system of linear equations; Without iteration)

 ![Normal-eq](https://images.datacamp.com/image/upload/v1649238228/image4_sckrl4.png)

But, because the dataset used in this example is fairly simple, we will use the normal equation method to find the coefficients of the model.

In [None]:
import numpy as np

In [None]:
class MultipleLinearRegression:
  def __init__(self):
    # Include both regression coefficients & the bias term
    self.coefficients = None

  def fit(self, X, y):
    # Add a column of ones for the bias term (as if the feature of bias term is 1)
    X = np.column_stack((np.ones(len(X)), X))

    # Calculating the model coefficients (theta) using Normal equation
    self.coefficients = np.linalg.inv(X.T @ X) @ X.T @ y

  def predict(self, X):
    # Add a column of ones for the bias term (as if the features of bias term is 1)
    X = np.column_stack((np.ones(len(X)), X))

    return X @ self.coefficients

In [None]:
# Dummy dataset with 2 features (column) & 4 rows
X = np.array([
    [10, 15],
    [20, 25],
    [30, 35],
    [40, 45]
])

# Dataset labels
y = np.array([70, 80, 90, 100])

In [None]:
# Instantiating the model
model = MultipleLinearRegression()

# Fitting the dataset
model.fit(X, y)

# Testing the model
X_new = np.array([[50, 55]])
predictions = model.predict(X_new)
print("Predictions: ", predictions)

Predictions:  [-2.34375]


In [None]:
# Model coefficients
print("Bias term: ", model.coefficients[0])
print("Regression Coefficients: ", model.coefficients[1:])

Bias term:  -384.375
Regression Coefficients:  [11.6796875 -3.671875 ]


Sandbox


---



In [None]:
# Testing column stack & matrix transpose
print(np.column_stack((np.ones(len(X)), X)))
print("")
print(np.column_stack((np.ones(len(X)), X)).T)
print("")
np.column_stack((np.ones(len(X)), X)) @ np.column_stack((np.ones(len(X)), X)).T

[[1. 1. 2.]
 [1. 2. 3.]
 [1. 3. 4.]
 [1. 4. 5.]]

[[1. 1. 1. 1.]
 [1. 2. 3. 4.]
 [2. 3. 4. 5.]]



array([[ 6.,  9., 12., 15.],
       [ 9., 14., 19., 24.],
       [12., 19., 26., 33.],
       [15., 24., 33., 42.]])

In [None]:
# Testing matrix inverse
# Inverse matrix is a matrix that when multiplied with the original matrix will result
# in multiplicative identity.
# Matrix with determinant 0 has no inverse.
A = np.array([
    [2, 1],
    [7, 4]
])
print(A)
print("")
A_inv = np.linalg.inv(A)
print(A_inv)
print("")
Identity_matrix = A @ A_inv
print(Identity_matrix)

[[2 1]
 [7 4]]

[[ 4. -1.]
 [-7.  2.]]

[[1. 0.]
 [0. 1.]]


# With library (Scikit-Learn)

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
# Load the dataset
data = pd.read_csv("/content/sample_data/california_housing_train.csv")

In [None]:
# Inspecting the dataset
data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.925,65500.0


In [None]:
# Splitting the dataset
X = data[['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income']]
y = data['median_house_value']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=22)

In [None]:
# Model training
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Getting the model prediction
y_predicted = model.predict(X_test)

In [None]:
# Getting the parameters of the model
error_term = mean_squared_error(y_test, y_predicted)
reg_coef = model.coef_
intercept = model.intercept_
print(error_term)
print(reg_coef)
print(intercept)

4982710407.847904
[-4.27642409e+04 -4.26348143e+04  1.14298244e+03 -6.49056111e+00
  1.08298743e+02 -4.76941413e+01  6.69712395e+01  4.02803200e+04]
-3582532.716038684
