In [294]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize
%matplotlib inline

In [295]:
#Computing the cost function

def computeCost(theta,X,y):
    theta=np.matrix(theta)
    m=len(y) #len of an array returns the number of columns of the array
    cost=(1/(2*m))*np.sum(np.power(((X.dot(theta.transpose()))-y),2)) 
    #.dot is to make sure matrix multiplication of array occurs. * is for element-wise
    #.sum returns the summation of all elements of the array
    return(cost)

In [296]:
#Vectorized approach for gradient descent

def gradient(theta,X,y): 
    theta=np.matrix(theta)
    m=len(y)
    Xt=X.transpose() #.transpose is to transpose the matrix
    grad=((1/m)*(Xt.dot((X.dot(theta.transpose()))-y)))
    return(grad)

Insert the location of your text or csv file in quotes below. Ensure that all the features of your dataset are labelled.

In [297]:
#Reading the data

raw_data=pd.read_csv('HousePricing.txt')#Insert location of file here

#Mean Normalization and inserting ones

data=(raw_data-raw_data.mean())/(raw_data.std()) 
#.mean,.std find the mean and standard deviation for every column in your Dataframe so you are simultaneously processing it
data.insert(0,'Ones',1) #DataFrameName.insert(location, column name, value, allow_duplicates = False)
data.head() #always print it at the end, else it won't come

Unnamed: 0,Ones,Square feet,#bedrooms,cost
0,1,0.13001,-0.223675,0.475747
1,1,-0.50419,-0.223675,-0.084074
2,1,0.502476,-0.223675,0.228626
3,1,-0.735723,-1.537767,-0.867025
4,1,1.257476,1.090417,1.595389


In [298]:
#Separating X and Y data

cols=data.shape[1] #.shape is a tuple hence you call with []. 0 for row size and 1 for column size
X=data.iloc[:,0:cols-1] #iloc can be used to access the data in various ways
y=data.iloc[:,cols-1:cols]
X=np.array(X)
y=np.array(y) 

Various methods can be chosen for minimization based on your problems, number of features etc. The method used here is the TNC or Truncated Newton Algorithm

In [299]:
#Learning
theta=np.zeros(X.shape[1]) #Initialization your parameter
costfirst=computeCost(theta,X,y)
fmin=minimize(fun=computeCost,x0=theta,args=(X,y),method='TNC',jac=gradient)
theta=fmin.x
costend=fmin.fun
print('The values of the parameters at the end of optimization are',theta,'\n')
print('The initial value of the cost function =',costfirst)
print('The final value of the cost function =',costend)

The values of the parameters at the end of optimization are [ -3.26293460e-16   8.84765991e-01  -5.31788171e-02] 

The initial value of the cost function = 0.489361702128
The final value of the cost function = 0.130686480539


In [300]:
#Checking error and accuracy

er0=((np.sqrt(costend*2*len(y)))/len(y))*100 #Error formula
print('error % = {}'.format(er0))
print('accuracy % = {}'.format(100-er0))

error % = 7.4572963139790795
accuracy % = 92.54270368602091


Insert your new input values in the inputs array [1,_,_...]<br>
For example if your data has two input features/variables, then inputs=[1,234,22] (input depending on your data)<br>
If your data has five input features/variables, then inputs=[1,2323,65,74,5474,35] (again, input depending on your data)

In [301]:
#Predicting new outputs

#Enter your input values here (Leave the first '1')
inputs=[1,1650,3]

mean=raw_data.mean()
stdi=raw_data.std()

for i in range (len(inputs)-1):
    inputs[i+1]=(inputs[i+1]-mean[i])/(stdi[i]) #Converting input to mean normalized input

new_out=(np.array([inputs])).dot(theta) 
corrected_out=(new_out*stdi[-1])+mean[-1] #Converting mean normalized output to actual output

print('Predicted output = ',float(corrected_out))

Predicted output =  293081.4641010747
