# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Multiple Linear Regression using Scikit Learn Library

In [46]:
df = pd.read_csv('/content/house_price_dataset.csv')
df.head()

Unnamed: 0,sqft,bedrooms,bathrooms,age_of_house,distance_to_city_center_km,schools_nearby,crime_rate_index,lot_size_sqft,floors,construction_quality_score,house_price
0,1698.685661,1,2,33,11.802587,3,5.911182,2182.054792,1,8,290257.373549
1,1444.69428,4,1,37,15.808072,8,4.999775,2875.83764,1,6,321478.51273
2,1759.075415,2,3,27,1.885496,5,1.132004,2734.691131,1,5,357825.874809
3,2109.211943,5,1,33,5.405359,8,6.283555,2611.762601,2,5,414323.03628
4,1406.33865,1,2,4,22.271992,5,2.515999,1730.383656,2,8,316274.083037


In [47]:
X = df.drop(columns=['house_price'])
y = df['house_price']

X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [48]:
mlr = LinearRegression()
mlr.fit(X_train , y_train)
mlr.predict(X_test)

array([415052.61111613, 424250.44862859, 335559.8899877 , 370365.92679898,
       258445.24442901, 390139.16334661, 319758.06285135, 269644.43393991,
       326163.37936747, 344022.56032254, 301601.51876518, 283463.50189599,
       286723.94824944, 303208.57556837, 229555.44505219, 255247.97758933,
       265926.80069638, 396378.48006511, 360211.72859166, 332591.71069168,
       312999.54314792, 479762.80976713, 268818.4891464 , 304222.99237872,
       305896.84995384, 389165.01093058, 336605.30685428, 278761.50943535,
       364327.40294974, 274478.19484649, 274452.42104003, 436989.10030591,
       262334.71924438, 340343.75144199, 340705.80285292, 427908.57526779,
       401601.9651494 , 280897.86888998, 345040.0286031 , 470840.590574  ,
       325112.56513611, 323065.1144385 , 302104.50982078, 272736.88078835,
       310994.87033424, 277300.13309699, 418834.81756437, 364791.87609909,
       326146.89415229, 400600.55449367, 433109.88184602, 307504.09871268,
       345012.57500956, 4

In [49]:
mlr.coef_

array([ 1.24371628e+02,  1.52959625e+04,  1.88709945e+04, -8.71260432e+02,
       -2.11861824e+02,  2.49207434e+03, -1.81771312e+03,  1.04265415e+01,
        8.47532652e+03,  5.37226196e+03])

In [50]:
mlr.intercept_

np.float64(3624.5408163801767)

# Multiple Linear Regression from Scratch

In [51]:
class MyMultipleLinearRegression:
  def __init__(self):
    self.coef_ = None
    self.intercept_ = None

  def fit(self , X_train , y_train):
    X = np.c_[np.ones((X_train.shape[0], 1)) , X_train]
    b = np.dot(np.linalg.inv(np.dot(X.T , X)) , (np.dot(X.T , y_train)))

    self.intercept_ = b[0]
    self.coef_ = b[1:]

  def predict(self , X_test):
    return np.dot(X_test , self.coef_) + self.intercept_

In [52]:
mlr = MyMultipleLinearRegression()
mlr.fit(X_train , y_train)
mlr.predict(X_test)

array([415052.61111616, 424250.44862859, 335559.8899877 , 370365.926799  ,
       258445.24442901, 390139.1633466 , 319758.06285134, 269644.43393995,
       326163.37936748, 344022.56032256, 301601.51876521, 283463.50189599,
       286723.94824947, 303208.57556837, 229555.44505223, 255247.97758935,
       265926.80069641, 396378.48006508, 360211.72859164, 332591.7106917 ,
       312999.54314796, 479762.80976716, 268818.48914643, 304222.9923787 ,
       305896.84995384, 389165.01093058, 336605.30685428, 278761.5094354 ,
       364327.40294971, 274478.19484652, 274452.42104006, 436989.1003059 ,
       262334.71924437, 340343.75144201, 340705.80285294, 427908.57526777,
       401601.9651494 , 280897.86888999, 345040.02860308, 470840.59057398,
       325112.56513613, 323065.11443849, 302104.5098208 , 272736.8807884 ,
       310994.87033421, 277300.133097  , 418834.81756436, 364791.87609913,
       326146.89415232, 400600.55449367, 433109.881846  , 307504.0987127 ,
       345012.57500956, 4

In [53]:
mlr.coef_

array([ 1.24371628e+02,  1.52959625e+04,  1.88709945e+04, -8.71260432e+02,
       -2.11861824e+02,  2.49207434e+03, -1.81771312e+03,  1.04265415e+01,
        8.47532652e+03,  5.37226196e+03])

In [54]:
mlr.intercept_

np.float64(3624.540816452354)