#                                  Model Building

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('House_data_EDA.csv')

# Model Building

In [3]:
x = df.drop('price',axis='columns')
y = df.price

In [4]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

In [9]:
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
lr_model.fit(x_train,y_train)
lr_model.score(x_test,y_test)

0.8623901916889615

In [10]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

cv_model = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
cross_val_score(LinearRegression(),x,y, cv=cv_model)

array([0.84265888, 0.78859871, 0.86644793, 0.87130427, 0.88159352])

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor

In [12]:
def model_selector(x,y):
    algorithms={
        'linear_regression':{
            'model':LinearRegression(),
            'params':{
                'normalize':[True,False]
            }
        },
        'lasso':{
            'model':Lasso(),
            'params':{
                'alpha':[1,2],
                'selection':['random','cyclic']
            }
        },
        'decision_tree':{
            'model':DecisionTreeRegressor(),
            'params':{
                'criterion':['mse','friedman_mse'],
                'splitter':['best','random']
            }
        }
    }
    scores =[]
    cv_model = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
    for algo_name, config in algorithms.items():
        grid_search = GridSearchCV(config['model'], config['params'], cv=cv_model, return_train_score=False)
        grid_search.fit(x,y)
        scores.append({
            'model':algo_name,
            'best_score':grid_search.best_score_,
            'best_params':grid_search.best_params_
        })
    return pd.DataFrame(scores,columns=['model','best_score','best_params'])

model_selector(x,y)
        

Unnamed: 0,model,best_score,best_params
0,linear_regression,0.850121,{'normalize': False}
1,lasso,0.711262,"{'alpha': 1, 'selection': 'random'}"
2,decision_tree,0.754881,"{'criterion': 'friedman_mse', 'splitter': 'best'}"


# Testing the model

In [20]:
def price_predictor(location,sqft,bath,bhk):
    location_index = np.where(x.columns==location)[0]
    z = np.zeros(len(x.columns))
    z[0] = sqft
    z[1] = bath
    z[2] = bhk
    if location_index >=0:
        z[location_index]=1
    return lr_model.predict([z])[0]

In [25]:
price_predictor('1st Pahse JP Nagar', 1000, 3,3)

  if location_index >=0:


83.56498469034396

In [23]:
price_predictor('Electronic City Phase II', 1056, 2,2)

35.76998090075631