In [1]:
#importing packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
%matplotlib inline

In [2]:
#read input data
df = pd.read_csv('../Datasets/home_price.csv')
df.head()

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


In [3]:
# data preprocessing
# there are some missing values so replace it withe median of the particular column
import math
median = math.floor(df.bedrooms.median())
print('Median is',median)
df['bedrooms'] = df.bedrooms.fillna(median)
df.head()

Median is 3


Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,3.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


In [4]:
#create an instance of regression model(class object) and fit the input data
reg = linear_model.LinearRegression()
reg.fit(df[['area','bedrooms','age']],df.price)

# Multivariate linear Regression works based on the mathematical equation y = m1x1 + m2x2 + m3x3 + c
# where y - dependent variable, m1, m2, m3 - coefficients, x1, x2, x3 - features(independent variables) and c - intercept

LinearRegression()

In [5]:
#predict
reg.predict([[3000,3,40],[2500,4,5]])

array([444400., 588625.])

In [6]:
#working process
print('Intercept is',reg.intercept_)                              # value c
print('Coefficients are', [i for i in reg.coef_])                 # values m1,m2,m3

pred1 = reg.coef_[0]*3000 + reg.coef_[1]*3 + reg.coef_[2]* 40+ reg.intercept_
pred2 = reg.coef_[0]*2500 + reg.coef_[1]*4 + reg.coef_[2]*5 + reg.intercept_

print('Predicted Prices are',pred1, pred2)

Intercept is 383724.9999999998
Coefficients are [137.25000000000003, -26024.999999999996, -6824.999999999997]
Predicted Prices are 444400.0 588624.9999999999


In [7]:
#testing accuracy
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

Y_true = df.price
Y_pred = reg.predict(df[['area','bedrooms','age']])

# Calculation of Mean Absolute Error (MAE)
MAE = mean_absolute_error(Y_true,Y_pred)
print('Mean Absolute Error is',MAE)

# Calculation of Mean Squared Error (MSE)
MSE = mean_squared_error(Y_true,Y_pred)
print('Mean Squared Error is',MSE)

#Calculation of R2 Score
r2 = r2_score(Y_true,Y_pred)
print('R2 Score is',r2)

Mean Absolute Error is 14399.999999999976
Mean Squared Error is 287999999.99999887
R2 Score is 0.9488817891373804
