# Business Problem - Predict the Price of Bangalore House
Using Linear Regression - Supervised Machine Learning Algorithm

### Load Libraries

In [1]:
import pandas as pd

### Load Data

In [3]:
df=pd.read_csv('../DATASET/house_price_data.csv')

In [4]:
df.head()

Unnamed: 0,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
0,3,2.0,150.0,1672.0,3,8971.291866
1,3,3.0,149.0,1750.0,3,8514.285714
2,3,2.0,150.0,1750.0,3,8571.428571
3,2,2.0,40.0,1250.0,2,3200.0
4,2,2.0,83.0,1200.0,2,6916.666667


### Split Data

In [45]:
X = df.drop('price', axis=1)
y = df['price']

print('Shape of X = ', X.shape)
print('Shape of y = ', y.shape)

Shape of X =  (7120, 5)
Shape of y =  (7120,)


In [26]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=51)

print('Shape of X_train = ', X_train.shape)
print('Shape of y_train = ', y_train.shape)
print('Shape of X_test = ', X_test.shape)
print('Shape of y_test = ', y_test.shape)

Shape of X_train =  (5696, 5)
Shape of y_train =  (5696,)
Shape of X_test =  (1424, 5)
Shape of y_test =  (1424,)


### Feature Scaling

In [27]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

## Linear Regression - ML Model Training

In [28]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

lr.fit(X_train, y_train)

In [29]:
lr.coef_

array([ -7.77049848,  -1.1126361 ,  81.76972071, -13.18490812,
        51.52311584])

In [30]:
lr.intercept_

95.0802729985955

## Predict the value of Home and Test

In [31]:
X_test[0, :]

array([ 0.71301986,  0.0112734 ,  0.30202307,  0.65677518, -0.48064341])

In [32]:
lr.predict([X_test[0, :]])

array([80.79978544])

In [33]:
lr.predict(X_test)

array([ 80.79978544,  17.23665021, 127.21199414, ...,  23.02870026,
        64.50535966, 232.74481144])

In [34]:
y_test

2435     80.00
3113     40.00
426     120.00
1124     79.00
1161     45.00
         ...  
2078     28.34
6855     84.00
4381     32.00
3862     63.00
43      180.00
Name: price, Length: 1424, dtype: float64

In [35]:
lr.score(X_test, y_test)

0.7837532911177937

## Implementing Ridge and Lasso Regression

In [36]:
from sklearn.linear_model import Ridge, Lasso

In [37]:
rd = Ridge()

rd.fit(X_train,y_train)

rd.score(X_test, y_test)

0.7839330765868542

In [38]:
ls = Lasso()

ls.fit(X_train,y_train)

ls.score(X_test, y_test)

0.801577541513995

In [39]:
rd2 = Ridge(alpha = 2)

rd2.fit(X_train,y_train)

rd2.score(X_test, y_test)

0.7841123733246895

In [40]:
ls2 = Lasso(alpha=2)

ls2.fit(X_train,y_train)

ls2.score(X_test, y_test)

0.8157943701141783

In [41]:
rd3 = Ridge(alpha = 3)

rd3.fit(X_train,y_train)

rd3.score(X_test, y_test)

0.7842911826411716

In [42]:
ls3 = Lasso(alpha=3)

ls3.fit(X_train,y_train)

ls3.score(X_test, y_test)

0.8263447005747205