In [7]:
import numpy as np
import pandas as pd

In [8]:
#Reading data file and shape
data = pd.read_csv('Multi_linear.txt', header = None)
m,n = data.shape

#Initializing X and Y according to shape and converting to numpy arrays
X = data.iloc[:,0:n-1].values
y = data.iloc[:,n-1:n].values



#Initializing theta
theta = np.zeros((n,1),dtype = 'int8')


In [9]:
def feature_normalization(X):
    
    standard_deviation = np.std(X, axis= 0)
    mean = np.mean(X, axis= 0)
    X_normalized = (X - mean)/ standard_deviation
    
    return X_normalized, mean, standard_deviation

In [10]:
theta = np.zeros((n,1),dtype = 'int8')

def cost_function(X,y,theta):
    #Initialisation of useful values 
    m = np.size(y)
    J = 0
    
    #Hypothesis function in vectorized form
    h = np.dot(X,theta)

    #Cost function in vectorized form
    J = float((1./(2*m)) * np.dot((h - y).T, (h - y)));    
    return J;

def gradient_descent(X,y,theta,alpha = 0.0005,num_iters=1000):
    #Initialisation of useful values 
    m = np.size(y)
    J_history = np.zeros(num_iters)
    J_vec = []  #Used to plot the cost function convergence
    thetahistory = [] #Used for three d plot of convergence

    for i in range(num_iters):
        #Hypothesis function
        h = np.dot(X,theta)
        
        #Calculating the grad function in vectorized form
        theta = theta - alpha * (1/m)* (X.T.dot(h-y))
        J_history[i] = cost_function(X,y,theta)
        
        #Calculate the cost for each iteration(used to plot convergence)
        J_vec.append(cost_function(X,y,theta))
        thetahistory.append(list(theta[:,0]))
    
    return theta,J_history,J_vec, thetahistory;
def grad_descent_loop(X,y,theta,alpha = 0.015,num_iters=1000):
    #Initialisation of useful values 
    m = np.size(y)
    theta0 = 0
    theta1 = 0
    h = 0

    for _ in range(num_iters):
        grad0,grad1 = 0,0

        for i in range(m):
            h = theta0 + theta1 * X[:,1][i]
            grad0 += (h - y[i])
            grad1 += (h - y[i]) * X[:,1][i]
    
        #Calculating the grad function in vectorized form 
        theta0 = theta0 - alpha * (1./m)* grad0
        theta1 = theta1 - alpha * (1./m)* grad1
         
    return np.array([theta0, theta1])

In [11]:
normalized_X, mean_X, standard_deviation_X = feature_normalization(X)

In [15]:
#Adding the columns of 1s to X 
X = np.concatenate((np.ones((m,1)),normalized_X), axis = 1)

In [18]:
gradient_descent(X,y,theta,.02, 500)

(array([[340398.69449058],
        [108749.51089445],
        [ -5880.08275303]]),
 array([6.30176584e+10, 6.05506545e+10, 5.81859739e+10, 5.59192555e+10,
        5.37463303e+10, 5.16632127e+10, 4.96660918e+10, 4.77513239e+10,
        4.59154245e+10, 4.41550612e+10, 4.24670469e+10, 4.08483329e+10,
        3.92960032e+10, 3.78072680e+10, 3.63794581e+10, 3.50100200e+10,
        3.36965099e+10, 3.24365896e+10, 3.12280211e+10, 3.00686628e+10,
        2.89564646e+10, 2.78894643e+10, 2.68657833e+10, 2.58836235e+10,
        2.49412632e+10, 2.40370538e+10, 2.31694170e+10, 2.23368413e+10,
        2.15378792e+10, 2.07711444e+10, 2.00353093e+10, 1.93291023e+10,
        1.86513052e+10, 1.80007511e+10, 1.73763223e+10, 1.67769478e+10,
        1.62016016e+10, 1.56493006e+10, 1.51191029e+10, 1.46101059e+10,
        1.41214445e+10, 1.36522899e+10, 1.32018476e+10, 1.27693562e+10,
        1.23540858e+10, 1.19553371e+10, 1.15724394e+10, 1.12047500e+10,
        1.08516527e+10, 1.05125568e+10, 1.01868958e+1