## Import needed modules

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [2]:
# reading data
data = pd.read_csv("homeprices.csv")
data

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


### Data Preprocessing: Fill NA values with median value of a column

In [3]:
# replace numeric value missing with median
data.bedrooms.median()

4.0

In [4]:
data.bedrooms.fillna(data.bedrooms.median(), inplace=True)
data

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [5]:
# making area, bedrooms and age as a feature (x)
x = data.drop('price', axis='columns')
x

Unnamed: 0,area,bedrooms,age
0,2600,3.0,20
1,3000,4.0,15
2,3200,4.0,18
3,3600,3.0,30
4,4000,5.0,8
5,4100,6.0,8


In [6]:
# making price as a feature (y)
y = data.price
y

0    550000
1    565000
2    610000
3    595000
4    760000
5    810000
Name: price, dtype: int64

In [7]:
# creating a model
model = LinearRegression()
# fitting data to train the model
model.fit(x, y)

LinearRegression()

### y = mx + cy = mx1 + m2x2 + m3x3 + c

In [8]:
# to get m
model.coef_

array([  112.06244194, 23388.88007794, -3231.71790863])

In [9]:
# to get c
model.intercept_

221323.00186540408

## Predict price for homes 

In [11]:
# predict price of home with 3000 sqr ft area, 3 bedrooms and 40 year old
model.predict([[3000, 3, 40]])



array([498408.25158031])

In [12]:
# predict price of home with 3000 sqr ft area, 4 bedrooms and 40 year old
model.predict([[3000, 4, 40]])



array([521797.13165825])

In [14]:
# making prediction with m and c
112.06244194*3000 + 23388.88007794*4 + -3231.71790863*40 + 221323.00186540408    

521797.13165196404

In [15]:
# predict price of home with 2500 sqr ft area, 4 bedrooms and 5 year old
model.predict([[2500, 4, 5]])



array([578876.03748933])

In [17]:
# predict price of home with 2500 sqr ft area, 4 bedrooms and 0 year old
model.predict([[2500, 4, 0]])



array([595034.6270325])

## To claculate accuracy

In [18]:
model.score(x, y)

0.9550196399325818