In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

# My own linear regression class

In [4]:
class my_linear_regression:
    def __init__(self):
        self.coef=None
        self.intercept=None
    
    def fit(self,x_train,x_test):
        x_train=np.insert(x_train,0,1,axis=1)
        
        beats=np.linalg.inv(np.dot(x_train.T,x_train)).dot(x_train.T).dot(y_train)
        self.intercept=beats[0]
        self.coef=beats[1:]
        
        
    def predict(self,x_test):
        y_pred=np.dot(x_test,self.coef)+self.intercept
        return y_pred

In [5]:
data=pd.read_csv('cleaned_data.csv')

In [6]:
data.head(-5)

Unnamed: 0.1,Unnamed: 0,location,total_sqft,bath,price,BHK
0,0,1st Block Jayanagar,2850.0,4.0,428.0,4
1,1,1st Block Jayanagar,1630.0,3.0,194.0,3
2,2,1st Block Jayanagar,1875.0,2.0,235.0,3
3,3,1st Block Jayanagar,1200.0,2.0,130.0,3
4,4,1st Block Jayanagar,1235.0,2.0,148.0,2
...,...,...,...,...,...,...
10271,10271,other,1508.0,3.0,77.0,3
10272,10272,other,1200.0,2.0,70.0,2
10273,10273,other,1800.0,1.0,200.0,1
10274,10274,other,1095.0,2.0,57.0,2


In [40]:
x=data.drop(columns=['price'])
y=data['price']

In [41]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2, random_state=0)

In [42]:
column_trains=make_column_transformer((OneHotEncoder(sparse=False),['location']),remainder='passthrough')

In [43]:
scaller=StandardScaler()

In [44]:
lr_my=my_linear_regression()

In [45]:
pipe_my=make_pipeline(column_trains,scaller,lr_my)

In [46]:
pipe_my.fit(x_train,y_train)

In [47]:
y_pred_my=pipe.predict(x_test)

In [48]:
r2_score(y_test,y_pred_my)

0.7926576829005657

In [49]:
y_pred_my

array([246.33790079,   1.03119551,  64.36099934, ..., 101.95491882,
       113.07405944,  63.13834309])

In [50]:
coef_my=lr_my.coef
intercept_my=lr_my.intercept

In [51]:
coef_my

array([ 9.50448166e+00,  2.50223398e+00,  6.24145452e-01,  5.01650256e+00,
        3.39539671e-01,  5.41336505e-01,  6.18445038e-01,  9.50844071e+00,
        3.87974711e+00, -1.69702803e+00,  9.20380276e-01,  4.81025690e+00,
        9.06056311e-01,  2.46268230e+00,  2.13331710e+00,  2.50678910e-01,
       -3.88416396e+00,  2.79939402e+00,  3.62492330e+00,  7.21487209e+00,
       -2.95390976e-01,  1.87157497e-01,  6.76237486e+00,  5.13251032e+00,
        9.13669052e-01,  4.97529837e+00, -6.67667415e-01,  3.72043954e-01,
       -2.72500308e-01, -4.92493180e+00,  8.08903876e+00,  4.19037476e+00,
        3.02653005e+00,  6.15905033e+00, -6.46189354e-01,  4.04576914e+00,
       -5.58024654e-01, -1.20997964e+00,  4.88560485e+00,  6.45158423e+00,
        3.68797276e+00,  1.11815968e+00, -4.53839378e+00, -6.62523522e+00,
        2.47043510e+00,  1.47418476e+00, -7.14114403e+00,  1.52562420e+00,
        2.55150753e-02, -3.76970412e+00,  7.32460215e-01,  4.04721503e+00,
       -4.57498267e-01,  

In [52]:
intercept_my

91.4160524075876

# Using Sklearn 

In [53]:
lr=LinearRegression(normalize=True)

In [54]:
pipe=make_pipeline(column_trains,scaller,lr)

In [55]:
pipe.fit(x_train,y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




In [62]:
y_pred=pipe.predict(x_test)

In [63]:
coef=lr.coef_
intercept=lr.intercept_

In [64]:
y_pred

array([246.33790079,   1.03119551,  64.36099934, ..., 101.95491882,
       113.07405944,  63.13834309])

In [65]:
coef

array([-6.92632166e+11, -1.04658778e+12, -5.85452258e+11, -4.53544542e+11,
       -5.23676274e+11, -1.33332992e+12, -9.79112487e+11, -2.25777045e+12,
       -1.56796788e+12, -1.33332992e+12, -4.53544542e+11, -1.04658778e+12,
       -1.61073710e+12, -1.16983556e+12, -1.10993872e+12, -9.43553859e+11,
       -1.07873227e+12, -1.22678432e+12, -1.35864609e+12, -8.27702644e+11,
       -9.79112487e+11, -7.85275471e+11, -1.40789566e+12, -6.92632166e+11,
       -1.16983556e+12, -9.06592426e+11, -1.04658778e+12, -7.85275471e+11,
       -1.43187663e+12, -1.91824865e+12, -9.43553859e+11, -1.01341599e+12,
       -7.85275471e+11, -8.27702644e+11, -8.68049012e+11, -4.53544542e+11,
       -7.40409874e+11, -2.93993161e+12, -1.01341599e+12, -6.92632166e+11,
       -1.01341599e+12, -9.06592426e+11, -1.91824865e+12, -2.05442898e+12,
       -7.40409874e+11, -6.92632166e+11, -1.38349306e+12, -7.85275471e+11,
       -9.43553859e+11, -1.47865533e+12, -1.07873227e+12, -1.33332992e+12,
       -9.79112487e+11, -

In [66]:
intercept

91.41737651992368