# Linear Regression with multiple variables

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model

In [2]:
df = pd.read_csv('houses_multiple.csv')
df.head()

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


<b> Data Preprocessing </b>

In [8]:
# Here we shall first handle null values
# We may calculate a median and fill it there (since it's in middle)
import math
median_bedrooms = math.floor(df.bedrooms.median())
median_bedrooms

3

In [10]:
df.bedrooms = df.bedrooms.fillna(median_bedrooms)
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,3.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


<b> Linear Regression </b>

In [17]:
reg = linear_model.LinearRegression()
reg.fit(df[['area','bedrooms','age']].values,df.price)
# Fitting independant variables to the target variable
# y = m1x1 + m2x2 + m3x3 + b

In [18]:
# Checking coefficients m1, m2, m3
reg.coef_

array([   137.25, -26025.  ,  -6825.  ])

In [19]:
# Checking constant
reg.intercept_

383724.9999999998

In [21]:
reg.predict([[3000,3,40]])

array([444400.])

<b> Small Exercise </b>

In [33]:
df1 = pd.read_csv('hiring.csv')
df1

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [41]:
#Data Preprocessing
from text_to_num import text2num
df1['experience'] = df1.experience.fillna('zero')
df1.rename(columns={'test_score(out of 10)':'test_score', 'interview_score(out of 10)':'interview_score'}, inplace=True)
medscore = (int)(df1.test_score.median())
df1['test_score'] = df1.test_score.fillna(medscore)
df1.rename(columns={'salary($)':'salary_usd'}, inplace=True)
df1['experience'] = text2num(df1['experience'], 'en')
df1

ModuleNotFoundError: No module named 'text_to_num'

Unnamed: 0,experience,test_score,interview_score,salary_usd
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000
