In [61]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import make_column_transformer
from sklearn .pipeline import make_pipeline
from sklearn .metrics import r2_score
from sklearn.linear_model import LinearRegression,Lasso,Ridge

In [62]:
import pandas as pd
import numpy as np

df = pd.read_csv('cleaned_train.csv')

In [63]:
df.shape

(23238, 12)

In [64]:
df.head()

Unnamed: 0.1,Unnamed: 0,POSTED_BY,UNDER_CONSTRUCTION,RERA,BHK_NO.,SQUARE_FT,READY_TO_MOVE,RESALE,ADDRESS,LONGITUDE,LATITUDE,TARGET(PRICE_IN_LACS)
0,0,Dealer,0,0,3,1282.568807,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,69.9
1,1,Owner,0,0,2,1200.087279,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,55.0
2,2,Dealer,0,0,2,1099.796334,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,54.0
3,3,Owner,0,0,3,1439.884809,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,120.0
4,4,Dealer,0,0,2,1100.16044,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,48.0


In [65]:
df = df.iloc[:,1:]

In [66]:
df.head()

Unnamed: 0,POSTED_BY,UNDER_CONSTRUCTION,RERA,BHK_NO.,SQUARE_FT,READY_TO_MOVE,RESALE,ADDRESS,LONGITUDE,LATITUDE,TARGET(PRICE_IN_LACS)
0,Dealer,0,0,3,1282.568807,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,69.9
1,Owner,0,0,2,1200.087279,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,55.0
2,Dealer,0,0,2,1099.796334,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,54.0
3,Owner,0,0,3,1439.884809,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,120.0
4,Dealer,0,0,2,1100.16044,1,1,"6th Phase JP Nagar,Bangalore",35.0,136.0,48.0


In [67]:
X = df.drop(columns = ['TARGET(PRICE_IN_LACS)','POSTED_BY'])

In [68]:
y = df['TARGET(PRICE_IN_LACS)']

In [69]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 10)

In [70]:
colum_trans = make_column_transformer((OneHotEncoder(sparse = False,handle_unknown = "ignore"),['ADDRESS']),
                                     remainder = 'passthrough')

In [71]:
scaler = StandardScaler()

Applying Linear_Regression model

In [72]:
lr = LinearRegression(normalize = True)

In [73]:
pipeline = make_pipeline(colum_trans,scaler,lr)
pipeline.fit(X_train,y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(handle_unknown='ignore',
                                                                sparse=False),
                                                  ['ADDRESS'])])),
                ('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression(normalize=True))])

In [74]:
y_pred = pipeline.predict(X_test)

In [75]:
r2_linear = r2_score(y_pred,y_test)

Applying Lasso regression in this

In [76]:
lasso = Lasso()

In [77]:
pipeline = make_pipeline(colum_trans,scaler,lasso)

In [78]:
pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)

In [79]:
r2_lasso = (r2_score(y_pred,y_test))

Applying Ridge model 

In [80]:
ridge = Ridge()

In [81]:
pipeline = make_pipeline(colum_trans,scaler,ridge)
pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)

In [82]:
r2_ridge = (r2_score(y_pred,y_test))

In [83]:
print('Linear model score',r2_linear)
print('Lasso model score',r2_lasso)
print('Ridge model score',r2_ridge)

Linear model score 0.8733171704411304
Lasso model score 0.8732912989996411
Ridge model score 0.8732936032166538


# Creating the final model

Here we Have all the model having the nearly same Score so we can choose anyone now i am choosing the linear model

In [84]:
pipeline = make_pipeline(colum_trans,scaler,lr)
pipeline.fit(X,y)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(handle_unknown='ignore',
                                                                sparse=False),
                                                  ['ADDRESS'])])),
                ('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression(normalize=True))])

Now importing the test data to predict the output

In [85]:
test = pd.read_csv('cleaned_test.csv')

In [87]:
test = test.iloc[:,1:]

In [88]:
test_data = test.drop(columns = ['POSTED_BY'])

Now predicting the cleaned_test data value as y_pred

In [90]:
y_pred = pipeline.predict(test_data)