## Machine Learning Model

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression,Lasso,Ridge

In [2]:
data = pd.read_csv("Cleaned_data.csv")
data.head()

Unnamed: 0,location,total_sqft,bath,price,bhk
0,1st Block Jayanagar,2850.0,4.0,428.0,4
1,1st Block Jayanagar,1630.0,3.0,194.0,3
2,1st Block Jayanagar,1875.0,2.0,235.0,3
3,1st Block Jayanagar,1200.0,2.0,130.0,3
4,1st Block Jayanagar,1235.0,2.0,148.0,2


In [3]:
X,Y = data.drop(columns='price'),data['price']
X_train,x_test,Y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=0)

In [4]:
X_train.shape,x_test.shape

((5888, 4), (1473, 4))

In [5]:
col_trans = make_column_transformer((OneHotEncoder(sparse_output=False),['location']),remainder="passthrough")

In [6]:
scaler = StandardScaler()

## Applying Linear Regression

In [7]:
pipe = make_pipeline(col_trans,scaler,LinearRegression())
pipe.fit(X_train,Y_train)
pipe.score(X_train,Y_train)*100,pipe.score(x_test,y_test)*100

(86.17007374637254, 82.51977016381458)

In [8]:
pipe = make_pipeline(col_trans,LinearRegression())
pipe.fit(X_train,Y_train)
pipe.score(X_train,Y_train)*100,pipe.score(x_test,y_test)*100

(86.17007374637254, 82.51977016381508)

## Applying Lasso

In [9]:
pipe = make_pipeline(col_trans,scaler,Lasso())
pipe.fit(X_train,Y_train)
pipe.score(X_train,Y_train)*100,pipe.score(x_test,y_test)*100

(84.93537906204466, 81.4689475169039)

In [10]:
pipe = make_pipeline(col_trans,Lasso())
pipe.fit(X_train,Y_train)
pipe.score(X_train,Y_train)*100,pipe.score(x_test,y_test)*100

(73.71606368647623, 67.1688263276343)

## Applying Ridge

In [11]:
pipe = make_pipeline(col_trans,scaler,Ridge())
pipe.fit(X_train,Y_train)
pipe.score(X_train,Y_train)*100,pipe.score(x_test,y_test)*100

(86.17006997170607, 82.52348502290106)

In [12]:
pipe = make_pipeline(col_trans,Ridge())
pipe.fit(X_train,Y_train)
pipe.score(X_train,Y_train)*100,pipe.score(x_test,y_test)*100

(86.03449670805759, 82.23431153904205)

In [13]:
import joblib
joblib.dump(pipe, "Model.joblib")

['Model.joblib']