In [48]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [49]:
# Import the data-set
df = pd.read_csv('homeprices.csv')

In [50]:
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [51]:
# df['bedrooms'] has the missing data-points. Hence data-preprocessing is essential
# Take the mean values of the 'bedrooms' and replace it with NaN

df = df.fillna(df['bedrooms'].median())

In [55]:
# Linear-Regression model

from sklearn.linear_model import LinearRegression

lm = LinearRegression()
lm.fit(df[['area','bedrooms','age']],df.price)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [56]:
print("Coefficient : ",lm.coef_)
print("Intercept   : ",lm.intercept_)

Coefficient :  [  112.06244194 23388.88007794 -3231.71790863]
Intercept   :  221323.00186540408


In [57]:
## Calculate 3000 sqrt ft,3 bed-room,40 year old
lm.predict([[3000,3,40]])

array([498408.25158031])

In [59]:
lm.predict([[2600,3,20]])

array([518217.63297611])

## Splitting into training and testing 

In [93]:
x = df[['area','bedrooms','age']]   # features
y = df[['price']]                   # target variables

In [94]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4)

In [95]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()

lm.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [96]:
print("Coefficient : ",lm.coef_)
print("Intercept   : ",lm.intercept_)

Coefficient :  [[  382.52466277 11747.53372257 16776.7263942 ]]
Intercept   :  [-963050.1308637]


In [97]:
lm.predict([[4100,6,8]])  # But the actual price is 810000

array([[810000.]])

TypeError: unsupported operand type(s) for ** or pow(): 'str' and 'int'

In [100]:
x

Unnamed: 0,area,bedrooms,age
0,2600,3.0,20
1,3000,4.0,15
2,3200,4.0,18
3,3600,3.0,30
4,4000,5.0,8
5,4100,6.0,8


In [122]:
# Equation: y(predicted) = (m1*x1 + m2*x2 + m3*x3) + c

def Gradient_Descent(x1,x2,x3,y,iterations,learning_rate):
    m=m1=m2=m3=c=0
    n=len(x)   # Number of rows in the data-set
    
## J = 1/n { summation(from i=1 to n) [y(observed) - y(predict)]^2 }
    for i in range(iterations):
        y_predict = (m*x) + c
        J  = (1/n)*sum([val**2 for val in (y-y_predict)])  # Cost function
        
        Jm1 = -(2/n)*sum( x1*(y-y_predict))  # Partial derivative of J with respect to m1
        Jm2 = -(2/n)*sum( x2*(y-y_predict))  # Partial derivative of J with respect to m2
        Jm3 = -(2/n)*sum( x3*(y-y_predict))  # Partial derivative of J with respect to m3
        
        Jm  = (Jm1 + Jm2 + Jm3)  #(Overall addition of all cost function)
        
        Jc = -(2/n)*sum(y-y_predict)         # Partial derivative of J with respect to c
         
        m1 = m1-(learning_rate * Jm1)
        m2 = m2-(learning_rate * Jm2)
        m3 = m3-(learning_rate * Jm3)
        
        m = m1+m2+m3
        c = c-(learning_rate * Jc)
        
        print("m: {}, c: {}, J: {}, iterations: {}".format(m,c,J,i))

In [123]:
x1 = np.array([1,2,3,4,5])
x2 = np.array([22,33,44,55,66])
x3 = np.array([30,40,50,60,70])

y=np.array([10,20,30,40,50])
Gradient_Descent(x1,x2,x3,y,1000,0.001)

m: 6.7, c: 0.06, J: 1100.0, iterations: 0
m: 8.89936, c: 0.07968, J: 118.60560000000001, iterations: 1
m: 9.621330879999999, c: 0.08612448, J: 12.805647436799994, iterations: 2
m: 9.85833104128, c: 0.08822424576000001, J: 1.3890346666002495, iterations: 3
m: 9.93613373994496, c: 0.08889781102080002, J: 0.15356272786876196, iterations: 4
m: 9.9616779588438, c: 0.08910321295908866, J: 0.01870528741001796, iterations: 5
m: 9.970067703104391, c: 0.08915493878010768, J: 0.0036060478167777702, iterations: 6
m: 9.97282628390111, c: 0.08915622268392112, J: 0.0017922968971444443, iterations: 7
m: 9.973736366486683, c: 0.08914095253514662, J: 0.001535116054145037, iterations: 8
m: 9.974039656148788, c: 0.08912025243115622, J: 0.001486680466946886, iterations: 9
m: 9.974143757557455, c: 0.08909777398940118, J: 0.001474198991679896, iterations: 10
m: 9.974182471840017, c: 0.08907471589607764, J: 0.0014700094634244858, iterations: 11
m: 9.974199720823368, c: 0.0890514716332454, J: 0.001468163459236

m: 9.976631310292081, c: 0.08067473889681706, J: 0.0012045192001366855, iterations: 389
m: 9.976637433050403, c: 0.08065360155727094, J: 0.001203888098102051, iterations: 390
m: 9.976643554204522, c: 0.08063246975585399, J: 0.0012032573267302608, iterations: 391
m: 9.976649673754856, c: 0.08061134349111515, J: 0.0012026268858480898, iterations: 392
m: 9.976655791701827, c: 0.08059022276160378, J: 0.0012019967752823987, iterations: 393
m: 9.976661908045852, c: 0.08056910756586962, J: 0.0012013669948599361, iterations: 394
m: 9.976668022787353, c: 0.08054799790246277, J: 0.0012007375444080615, iterations: 395
m: 9.97667413592675, c: 0.08052689376993373, J: 0.0012001084237538468, iterations: 396
m: 9.976680247464458, c: 0.08050579516683336, J: 0.001199479632724174, iterations: 397
m: 9.976686357400906, c: 0.08048470209171295, J: 0.001198851171146657, iterations: 398
m: 9.976692465736505, c: 0.08046361454312409, J: 0.0011982230388485166, iterations: 399
m: 9.97669857247168, c: 0.0804425325

m: 9.978857244362466, c: 0.07299024087085802, J: 0.0009859801774414884, iterations: 771
m: 9.978862783910666, c: 0.07297111692294152, J: 0.000985463577875325, iterations: 772
m: 9.97886832200747, c: 0.07295199798563164, J: 0.0009849472489792078, iterations: 773
m: 9.978873858653252, c: 0.07293288405761555, J: 0.000984431190610871, iterations: 774
m: 9.978879393848395, c: 0.07291377513758081, J: 0.0009839154026291257, iterations: 775
m: 9.978884927593281, c: 0.07289467122421528, J: 0.0009833998848919243, iterations: 776
m: 9.978890459888284, c: 0.07287557231620716, J: 0.0009828846372576144, iterations: 777
m: 9.97889599073379, c: 0.07285647841224505, J: 0.0009823696595849414, iterations: 778
m: 9.978901520130176, c: 0.07283738951101781, J: 0.00098185495173223, iterations: 779
m: 9.97890704807782, c: 0.07281830561121472, J: 0.0009813405135583504, iterations: 780
m: 9.978912574577105, c: 0.07279922671152536, J: 0.0009808263449218122, iterations: 781
m: 9.978918099628409, c: 0.072780152810

In [92]:
x 

array([1, 2, 3, 4, 5])