# Multiple Linear Regression

In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model

In [2]:
df = pd.read_csv('homeprice_2.csv')
df

Unnamed: 0,Area,Bedrooms,Age,Price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


### Data Preprocessing: Fill NA values with median value of a column

In [3]:
df.Bedrooms.median()

4.0

In [4]:
df.Bedrooms = df.Bedrooms.fillna(df.Bedrooms.median())
df

Unnamed: 0,Area,Bedrooms,Age,Price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


## Find X and Y

In [5]:
X = df.drop('Price', axis = 'columns')
X

Unnamed: 0,Area,Bedrooms,Age
0,2600,3.0,20
1,3000,4.0,15
2,3200,4.0,18
3,3600,3.0,30
4,4000,5.0,8
5,4100,6.0,8


In [7]:
Y = df.Price
Y

0    550000
1    565000
2    610000
3    595000
4    760000
5    810000
Name: Price, dtype: int64

## Fit Linear Regression Model

In [8]:
reg = linear_model.LinearRegression()
reg.fit(X, Y)

LinearRegression()

In [9]:
reg.coef_

array([  112.06244194, 23388.88007794, -3231.71790863])

In [10]:
reg.intercept_

221323.00186540408

## Find Price of home with 3000sqr ft area, 3 bedrooms, 40 years old

In [13]:
reg.predict([[3000, 3, 40]])

array([498408.25158031])

In [14]:
221323.00186540408 + 112.06244194 * 3000 + 23388.88007794 * 3 + -3231.71790863* 40

498408.2515740241

## Find Price of home with 2500sqr ft area, 4 bedrooms, 5 years old

In [15]:
reg.predict([[2500, 4, 5]])

array([578876.03748933])

## Generate CSV file with list of homeprice prediction

In [17]:
x_df = df.drop('Price', axis = 'columns')
x_df

Unnamed: 0,Area,Bedrooms,Age
0,2600,3.0,20
1,3000,4.0,15
2,3200,4.0,18
3,3600,3.0,30
4,4000,5.0,8
5,4100,6.0,8


In [18]:
p = reg.predict(x_df)
p

array([518217.63297611, 602590.07937407, 615307.4140366 , 597962.89583192,
       760663.42675457, 795258.55102673])

In [19]:
x_df['Price'] =p

In [20]:
x_df

Unnamed: 0,Area,Bedrooms,Age,Price
0,2600,3.0,20,518217.632976
1,3000,4.0,15,602590.079374
2,3200,4.0,18,615307.414037
3,3600,3.0,30,597962.895832
4,4000,5.0,8,760663.426755
5,4100,6.0,8,795258.551027


In [22]:
x_df.to_csv('Prediction_2.csv')