# Linear Regression with Multiplye variables a.k.a Multivariate Regression.

# Given data set for the Monroe township, 
# Find the home price of the houses having --- 
# 1. 3000 sqft area , 3 bedrooms , 40 years old.
# 2. 2500 sqft area , 4 bedrooms , 5 years old.

In [43]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model

In [44]:
data_monroe = pd.read_csv(r"C:\Users\DELL\Desktop\ML Youtube\Monroe TW.csv")

In [45]:
data_monroe

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


In [46]:
# linear equation formula
# price = (m1 * area + m2 * bedrooms + m3 * age) + b
# Price is dependent of three factors - area , bedrooms and age.
# price is dependent variable.
# area , bedrooms , age are independent variables. (These are called features).
# m1,m2,m3 are called cofficients.
# b is the intercept.

In [47]:
# Generalised formula for multiple linear regression / Multivariate linear regression.
# y = (m1*x1 + m2*x2 + m3*x3) + b

In [48]:
# Topics covered
# 1. Data processing - Handling NA values.
# 2. Linear regression using multiple variables.

# Step - 1 - Data preprocessing.

In [49]:
data_monroe

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


In [50]:
# There is a NaN value in the bedrooms column. So taking the median value of the bedrooms and placing it in the missing value.

In [51]:
data_monroe["bedrooms"].median()

3.5

In [52]:
import math
bedrooms_median = math.floor(data_monroe["bedrooms"].median())

In [53]:
bedrooms_median

3

In [54]:
data_monroe["bedrooms"] = data_monroe["bedrooms"].fillna(bedrooms_median)

In [55]:
data_monroe

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,3.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


# Creating Linear Regression object.

In [56]:
reg = linear_model.LinearRegression()

In [57]:
# Fitting the data ( Training the linear regression model for the available data set).

In [58]:
reg.fit(data_monroe[["area" , "bedrooms" , "age"]] , data_monroe["price"])

LinearRegression()

In [59]:
reg.coef_

array([   137.25, -26025.  ,  -6825.  ])

In [60]:
reg.intercept_

383725.0

# Predicting the prices for the following house properties.

# 1. 3000 sqft area , 3 bedrooms , 40 years old.

In [65]:
reg.predict([[3000 , 3 , 40]])

array([444400.])

In [66]:
# y = (m1*x1 + m2*x2 + m3*x3) + b
((137.25 * 3000) +  (-26025 * 3) + (-6825 * 40)) + 383725

444400.0

# 2. 2500 sqft area , 4 bedrooms , 5 years old.

In [63]:
reg.predict([[2500 , 4 , 5]])

array([588625.])

In [64]:
# y = (m1*x1 + m2*x2 + m3*x3) + b
((137.25 * 2500) +  (-26025 * 4) + (-6825 * 5)) + 383725

588625.0