In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('linear-regression.txt',names=["X","Y","Z"])      
print(data.shape)
data.head()

(3000, 3)


Unnamed: 0,X,Y,Z
0,0.693781,0.697544,3.25229
1,0.693737,0.575576,2.898651
2,0.000576,0.458192,1.986979
3,0.194953,0.470199,2.272075
4,0.031775,0.026546,0.231178


In [3]:
X = data['X'].values
Y = data['Y'].values
Z = data['Z'].values
# X and Y are the independent variables and Z is the dependent variable 
# Z=a0+a1X+a2Y

In [4]:
# In this implementation, I use gradient descent algorithm:
# The cost function J(a0,a1,a2) is computed and I update the coefficient a0,a1,a2 based 
# on the partial derivative of cost function J every iteration. The updating equation is:
# C=c - learning rate* d/dax(J).
# I predefine the learning rate as 0.001 and set iteration 7000 times.

l = len(X)
X0 = np.array([np.ones(l), X, Y]).T     # Here I put the first column as all "1"s because the a0 is the intercept, there is no corresponding x
Coefficient = np.array([0, 0, 0])       # Here are the coefficients. There are 3 entries: the 1st is intercept, the 2nd is X's coefficient and 3rd is Y's coefficient
# Coefficient = np.zeros((1,3))
Y0 = np.array(Z)                        # Actual value of Z
X0

array([[1.00000000e+00, 6.93780796e-01, 6.97543511e-01],
       [1.00000000e+00, 6.93737070e-01, 5.75575902e-01],
       [1.00000000e+00, 5.75595955e-04, 4.58192235e-01],
       ...,
       [1.00000000e+00, 1.53260958e-01, 4.28193331e-01],
       [1.00000000e+00, 6.04550350e-01, 8.62078270e-01],
       [1.00000000e+00, 2.12577119e-01, 1.15651970e-01]])

In [5]:
## Here is the cost function:
#  J=sigma(h0(xi)-yi)^2/2m
## The gradient is the partial derivative of J: gradient= sigma(h0(xi)-yi)*xi
## Then we update the coefficient every iteration.
def gradient_descent(X, Y, C, learning_rate, iterations):
    l = len(Y)    
    for iteration in range(iterations):
        H = X.dot(C)  # H is the hypothesis value (X bar) 
        delta_x = H - Y # delta_x is the difference between hypothesis value and actural value of Z       
        gradient = X.T.dot(delta_x) / l  # Here is the gradient   
        C = C - learning_rate * gradient   # We update the coefficient by subtracting learning rate multipled by the partial derivative of cost func
#         print iteration, C, np.sum(delta_x**2)
    return C, iteration, np.sum(delta_x**2)

In [6]:


# 7000 Iterations with learning rate of 0.001
Coefficients, iteration, square_error = gradient_descent(X0, Y0, Coefficient, 0.1, 2000)

# Intercept a0, Coefficient of X: a1, Coefficient of Y:a2
print 'Epoch #',iteration, Coefficients, square_error



0 [0.25454677 0.134647   0.15929597] 23689.97308835832
1 [0.4690072  0.24829533 0.29731865] 17684.628593839654
2 [0.64949364 0.34415149 0.41728713] 13361.060319231892
3 [0.80118745 0.42493348 0.52193034] 10246.094073651542
4 [0.92848076 0.49294524 0.61356174] 7999.71565315346
5 [1.03509672 0.55013973 0.69414256] 6377.596705554794
6 [1.12419136 0.59817233 0.76533549] 5204.166644016832
7 [1.19843998 0.63844622 0.8285501 ] 4353.262718387107
8 [1.26011037 0.67215077 0.88498144] 3734.2255833693034
9 [1.31112489 0.70029412 0.93564268] 3281.907880469439
10 [1.35311306 0.72373077 0.98139283] 2949.4946252385757
11 [1.38745618 0.74318498 1.02296019] 2703.3440957158737
12 [1.41532513 0.75927062 1.0609623 ] 2519.280608422408
13 [1.43771242 0.77250798 1.09592279] 2379.930588693527
14 [1.45545935 0.78333802 1.12828565] 2272.8083292900988
15 [1.46927907 0.79213446 1.15842742] 2188.940458457695
16 [1.47977605 0.79921402 1.18666741] 2121.8775127046283
17 [1.48746268 0.8048451  1.21327644] 2066.98367448

189 [0.64726259 0.78456729 3.09003294] 341.5530434211431
190 [0.64393951 0.78527075 3.09564404] 338.9479420256995
191 [0.6406338  0.7859781  3.10121826] 336.3744712394471
192 [0.63734538 0.78668924 3.10675587] 333.83222733593107
193 [0.63407416 0.7874041  3.11225712] 331.32081203295576
194 [0.63082004 0.78812258 3.11772226] 328.8398324150696
195 [0.62758294 0.7888446  3.12315154] 326.3889008572079
196 [0.62436277 0.7895701  3.12854521] 323.96763494947606
197 [0.62115944 0.79029897 3.13390352] 321.5756574230552
198 [0.61797286 0.79103115 3.13922672] 319.21259607721265
199 [0.61480295 0.79176655 3.14451504] 316.87808370740197
200 [0.61164962 0.7925051  3.14976872] 314.5717580344319
201 [0.60851279 0.79324672 3.15498802] 312.2932616346915
202 [0.60539237 0.79399133 3.16017316] 310.04224187141233
203 [0.60228827 0.79473886 3.16532438] 307.8183508269543
204 [0.59920041 0.79548924 3.17044192] 305.621245236096
205 [0.59612871 0.79624239 3.17552601] 303.4505864203171
206 [0.59307308 0.79699824

476 [0.15377974 0.98360853 3.82903439] 127.04366430273171
477 [0.15304732 0.98407243 3.82996328] 126.94599706568694
478 [0.15231877 0.98453451 3.83088664] 126.84939309793023
479 [0.15159406 0.9849948  3.83180449] 126.75384064241703
480 [0.15087318 0.98545328 3.83271688] 126.65932807510335
481 [0.1501561  0.98590996 3.83362384] 126.56584390339233
482 [0.1494428  0.98636485 3.83452539] 126.47337676460018
483 [0.14873327 0.98681795 3.83542158] 126.38191542444004
484 [0.14802747 0.98726927 3.83631244] 126.29144877552521
485 [0.14732541 0.98771881 3.837198  ] 126.20196583588981
486 [0.14662704 0.98816657 3.83807829] 126.11345574752767
487 [0.14593236 0.98861256 3.83895335] 126.02590777494919
488 [0.14524135 0.98905678 3.83982321] 125.93931130375526
489 [0.14455398 0.98949924 3.84068791] 125.85365583922885
490 [0.14387024 0.98993994 3.84154746] 125.76893100494345
491 [0.1431901  0.99037889 3.84240191] 125.68512654138837
492 [0.14251356 0.99081608 3.84325129] 125.60223230461087
493 [0.1418405

823 [0.03719802 1.06684837 3.96752275] 118.19947419517136
824 [0.03708164 1.06694321 3.9676493 ] 118.19714249105081
825 [0.03696588 1.06703758 3.96777514] 118.19483553827797
826 [0.03685073 1.06713148 3.96790028] 118.19255307361647
827 [0.03673619 1.06722491 3.96802473] 118.19029483663792
828 [0.03662225 1.06731788 3.96814849] 118.18806056969193
829 [0.03650892 1.06741039 3.96827157] 118.18585001787615
830 [0.03639619 1.06750245 3.96839396] 118.18366292900697
831 [0.03628406 1.06759404 3.96851567] 118.1814990535903
832 [0.03617252 1.06768518 3.9686367 ] 118.17935814479279
833 [0.03606157 1.06777587 3.96875707] 118.17723995841337
834 [0.03595121 1.06786611 3.96887676] 118.17514425285492
835 [0.03584144 1.0679559  3.9689958 ] 118.17307078909656
836 [0.03573224 1.06804524 3.96911417] 118.17101933066584
837 [0.03562362 1.06813414 3.96923189] 118.16898964361167
838 [0.03551558 1.0682226  3.96934896] 118.16698149647715
839 [0.03540811 1.06831062 3.96946537] 118.16499466027288
840 [0.03530121

1177 [0.01858105 1.08250516 3.98728181] 117.98463055704605
1178 [0.0185633  1.08252066 3.98730006] 117.9845768372964
1179 [0.01854566 1.08253609 3.98731821] 117.98452368617055
1180 [0.0185281  1.08255143 3.98733626] 117.98447109764848
1181 [0.01851064 1.0825667  3.98735421] 117.98441906577386
1182 [0.01849327 1.08258188 3.98737207] 117.98436758465348
1183 [0.01847599 1.08259699 3.98738983] 117.98431664845653
1184 [0.01845881 1.08261202 3.9874075 ] 117.98426625141397
1185 [0.01844171 1.08262697 3.98742507] 117.98421638781781
1186 [0.01842471 1.08264184 3.98744255] 117.98416705202058
1187 [0.0184078  1.08265663 3.98745993] 117.98411823843458
1188 [0.01839097 1.08267135 3.98747722] 117.98406994153122
1189 [0.01837424 1.08268599 3.98749442] 117.98402215584053
1190 [0.01835759 1.08270055 3.98751152] 117.98397487595048
1191 [0.01834104 1.08271504 3.98752853] 117.98392809650632
1192 [0.01832457 1.08272946 3.98754545] 117.98388181220997
1193 [0.01830818 1.08274379 3.98756228] 117.9838360178195

1522 [0.01576955 1.08498509 3.9901507 ] 117.97968412349843
1523 [0.01576671 1.08498762 3.99015356] 117.9796827557077
1524 [0.01576389 1.08499014 3.99015641] 117.97968140239092
1525 [0.01576109 1.08499264 3.99015924] 117.97968006339491
1526 [0.0157583  1.08499513 3.99016205] 117.97967873856811
1527 [0.01575553 1.0849976  3.99016486] 117.9796774277606
1528 [0.01575277 1.08500007 3.99016764] 117.97967613082403
1529 [0.01575003 1.08500252 3.99017041] 117.97967484761159
1530 [0.0157473  1.08500495 3.99017317] 117.97967357797805
1531 [0.01574458 1.08500738 3.99017591] 117.97967232177976
1532 [0.01574188 1.08500979 3.99017864] 117.9796710788745
1533 [0.01573919 1.08501218 3.99018136] 117.97966984912162
1534 [0.01573652 1.08501457 3.99018405] 117.97966863238193
1535 [0.01573386 1.08501694 3.99018674] 117.9796674285177
1536 [0.01573122 1.0850193  3.99018941] 117.97966623739273
1537 [0.01572859 1.08502165 3.99019207] 117.9796650588722
1538 [0.01572597 1.08502399 3.99019471] 117.9796638928227
153

1782 [0.01536936 1.08534311 3.99055404] 117.97956299803582
1783 [0.01536865 1.08534375 3.99055476] 117.9795629119775
1784 [0.01536794 1.08534438 3.99055547] 117.97956282682983
1785 [0.01536724 1.08534501 3.99055617] 117.97956274258317
1786 [0.01536654 1.08534564 3.99055688] 117.97956265922798
1787 [0.01536584 1.08534627 3.99055758] 117.97956257675483
1788 [0.01536515 1.08534689 3.99055827] 117.97956249515437
1789 [0.01536446 1.08534751 3.99055896] 117.9795624144174
1790 [0.01536378 1.08534812 3.99055965] 117.97956233453476
1791 [0.0153631  1.08534873 3.99056034] 117.9795622554974
1792 [0.01536242 1.08534934 3.99056102] 117.9795621772964
1793 [0.01536175 1.08534995 3.9905617 ] 117.9795620999229
1794 [0.01536108 1.08535055 3.99056237] 117.97956202336813
1795 [0.01536041 1.08535115 3.99056304] 117.97956194762344
1796 [0.01535974 1.08535174 3.99056371] 117.97956187268026
1797 [0.01535908 1.08535233 3.99056437] 117.97956179853011
1798 [0.01535843 1.08535292 3.99056503] 117.97956172516459
17

In [7]:
# Epoch # 6999 [1.20456537 0.76637821 2.04944651] 1068.1817282174793