In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('linear-regression.txt',names=["X","Y","Z"])      
print(data.shape)
data.head()

(3000, 3)


Unnamed: 0,X,Y,Z
0,0.693781,0.697544,3.25229
1,0.693737,0.575576,2.898651
2,0.000576,0.458192,1.986979
3,0.194953,0.470199,2.272075
4,0.031775,0.026546,0.231178


In [3]:
X = data['X'].values
Y = data['Y'].values
Z = data['Z'].values
# X and Y are the independent variables and Z is the dependent variable 
# Z=a0+a1X+a2Y

In [4]:
# In this implementation, I use gradient descent algorithm:
# The cost function J(a0,a1,a2) is computed and I update the coefficient a0,a1,a2 based 
# on the partial derivative of cost function J every iteration. The updating equation is:
# C=c - learning rate* d/dax(J).
# I predefine the learning rate as 0.001 and set iteration 7000 times.

l = len(X)
X0 = np.array([np.ones(l), X, Y]).T     # Here I put the first column as all "1"s because the a0 is the intercept, there is no corresponding x
Coefficient = np.array([0, 0, 0])       # Here are the coefficients. There are 3 entries: the 1st is intercept, the 2nd is X's coefficient and 3rd is Y's coefficient
# Coefficient = np.zeros((1,3))
Y0 = np.array(Z)                        # Actual value of Z
X0

array([[1.00000000e+00, 6.93780796e-01, 6.97543511e-01],
       [1.00000000e+00, 6.93737070e-01, 5.75575902e-01],
       [1.00000000e+00, 5.75595955e-04, 4.58192235e-01],
       ...,
       [1.00000000e+00, 1.53260958e-01, 4.28193331e-01],
       [1.00000000e+00, 6.04550350e-01, 8.62078270e-01],
       [1.00000000e+00, 2.12577119e-01, 1.15651970e-01]])

In [5]:
## Here is the cost function:
#  J=sigma(h0(xi)-yi)^2/2m
## The gradient is the partial derivative of J: gradient= sigma(h0(xi)-yi)*xi
## Then we update the coefficient every iteration.
def gradient_descent(X, Y, C, learning_rate, iterations):
    l = len(Y)    
    for iteration in range(iterations):
        H = X.dot(C)  # H is the hypothesis value (X bar) 
        delta_x = H - Y # delta_x is the difference between hypothesis value and actural value of Z       
        gradient = X.T.dot(delta_x) / l  # Here is the gradient   
        C = C - learning_rate * gradient   # We update the coefficient by subtracting learning rate multipled by the partial derivative of cost func
    return C, iteration

In [6]:


# 7000 Iterations with learning rate of 0.001
Coefficient, iteration = gradient_descent(X0, Y0, Coefficient, 0.001, 7000)

# Intercept a0, Coefficient of X: a1, Coefficient of Y:a2
print 'Epoch #',iteration, Coefficient



Epoch # 6999 [1.20456537 0.76637821 2.04944651]
