In [3]:
import numpy as np
import pandas as pd

Linear regression generally have the form of $Y_{i} = \theta_{0} + \theta_{1} x_{1} + \theta_{2} x_{2} + ...$ <br>
There are several ways to find the coefficients of the regression: <br>
1. Linear Algebra: $\hat{\theta} = (X^{T}X)^{-1}X^{T}Y$ (When X is invertible) <br>
2. Gradient Descent: In this case, we need to write out the loss function and try to minimize the loss. <br>
$\hspace{30mm}$ $F(x)$ = Loss Function = SE = $ \sum^{n}_{i=1} (Y_{i} - \hat{Y_{i}})^{2}$ <br>

In [4]:
class Linear_Regression():
    def __init__(self, alpha = 1e-10 , num_iter = 10000, early_stop = 1e-50, intercept = True, init_weight = None):
        
        
        """
            Some initializations, if neccesary
            
            attributes: 
                        alpha: Learning Rate, default 1e-10
                        num_iter: Number of Iterations to update coefficient with training data
                        early_stop: Constant control early_stop.
                        intercept: Bool, If we are going to fit a intercept, default True.
                        init_weight: Matrix (n x 1), input init_weight for testing.
                        
            
            TODO: 1. Initialize all variables needed.
        """
        
        self.model_name = 'Linear Regression'
        
        self.alpha = alpha
        self.num_iter = num_iter
        self.early_stop = early_stop
        self.intercept = intercept
        self.init_weight = init_weight  ### For testing correctness.
        
    
    def fit(self, X_train, y_train):
        """
            Save the datasets in our model, and perform gradient descent.
            
            Parameter:
                X_train: Matrix or 2-D array. Input feature matrix.
                Y_train: Matrix or 2-D array. Input target value.
                
                
                TODO: 2. If we are going to fit the intercept, add a col with all 1's to the first column. (hint: np.hstack, np.ones)
                      3. Initilaize our coef with uniform from [-1, 1] with the number of col in training set.
                      4. Call the gradient_descent function to train.
        """
        
        self.X = np.mat(X_train)
        self.y = np.mat(y_train)
        
        if self.intercept:
            ones = np.ones(len(self.X)).reshape(-1, 1)
            self.X = np.hstack([ones, self.X])
        
        #print(self.X)
        self.coef = np.random.uniform(-1, 1, self.X.shape[1])
        self.gradient_descent()
        #self.coef = self.init_weight #### Please change this after you get the example right.
        
    def gradient(self):
        """
            Helper function to calculate the gradient respect to coefficient.
            
            TODO: 5. Think about the matrix format of the gradient of the loss function.
        """
        y_pred = self.X.dot(self.coef)
        self.grad_coef = np.array(-(self.y - y_pred).dot(self.X)).flatten()
        #self.grad_coef = 0
        
    def gradient_descent(self):
        
        """
            Training function
            
            TODO: 6. Calculate the loss with current coefficients.
                  7. Update the temp_coef with learning rate and gradient.
                  8. Calculate the loss with temp_coef.
                  9. Implement the self adeptive learning rate. 
                      a. If current error is less than previous error, increase learning rate by a factor 1.3. 
                         And update coef, with temp_coef.
                      b. If previous error is less than current error, decrease learning rate by a factor of 0.9.
                         Don't update coef.
                  10. Add the loss to loss list we create.
        """
        
        self.loss = []
        
        for i in range(self.num_iter):

                
                
            self.gradient()

            
            previous_y_hat = self.X.dot(self.coef)
            
            
            temp_coef = self.coef - self.alpha * self.grad_coef
            
            #ones = 0  # Matrix with 1's (1 x n), help with calculate the sum of a mattrix. hint: Think about dot product.
            
            pre_error = np.mean(0.5 * np.square(self.y - previous_y_hat))
            
            current_error = np.mean(0.5 * np.square(self.y - self.X.dot(temp_coef)))
            
            ### This is the early stop, don't modify fllowing three lines.
            if (abs(pre_error - current_error) < self.early_stop) | (abs(abs(pre_error - current_error) / pre_error) < self.early_stop):
                self.coef = temp_coef
                return self
            
            if current_error <= pre_error:
                self.alpha *= 1.3
                self.coef = temp_coef
            else:
                self.alpha *= 0.9
                
            self.loss.append(current_error)
            
            if i % 10000 == 0:
                print('Iteration: ' +  str(i))
                print('Coef: '+ str(self.coef))
                print('Loss: ' + str(current_error))            
        return self
    
    def ind_predict(self, x: list):
        """
            Predict the value based on its feature vector x.

            Parameter:
            x: Matrix, array or list. Input feature point.
            
            Return:
                result: prediction of given data point
        """
        
        """
            TODO: 11. Implement the prediction function
        """
        result = 1
        
        return result
    
    def predict(self, X):
        """
            X is a matrix or 2-D numpy array, represnting testing instances. 
            Each testing instance is a feature vector. 
            
            Parameter:
            X: Matrix, array or list. Input feature point.
            
            Return:
                ret: prediction of given data matrix
        """
        
        """
            TODO: 12. Make sure add the 1's column like we did to add intercept.
                  13. Revise the following for-loop to call ind_predict to get predictions.

        """
        
        X = np.mat(X)
        if self.intercept:
            ones = np.ones(X.shape[0]).reshape(-1, 1)
            X = np.hstack([ones, X])
        ret = [np.array(x).dot(self.coef)[0] for x in X]
        return ret
        
        

In [5]:
X = np.array(np.mat(np.arange(1, 1000, 5)).T)
y = np.array((30 * X)).flatten() +  20

In [6]:
clf = Linear_Regression(alpha = 1, num_iter = 10000000, init_weight= np.mat([15,25]).T)
clf.fit(X,y)

Iteration: 0
Coef: [-0.78310295 -0.50009803]
Loss: 6.812143356038825e+23
Iteration: 10000
Coef: [-0.64905035 30.03108386]
Loss: 53.538141211590826
Iteration: 20000
Coef: [-0.55961158 30.0308198 ]
Loss: 53.07629495438401
Iteration: 30000
Coef: [-0.47055681 30.03083743]
Loss: 52.61806585107836
Iteration: 40000
Coef: [-0.38189466 30.03054797]
Loss: 52.16218568435227
Iteration: 50000
Coef: [-0.29358143 30.03040865]
Loss: 51.71106178356396
Iteration: 60000
Coef: [-0.20568492 30.03042373]
Loss: 51.263897939780485
Iteration: 70000
Coef: [-0.11816604 30.0301483 ]
Loss: 50.82206832356986
Iteration: 80000
Coef: [-0.03102559 30.0301694 ]
Loss: 50.38244603988198
Iteration: 90000
Coef: [ 0.05573364 30.02989612]
Loss: 49.94610183488889
Iteration: 100000
Coef: [ 0.14215128 30.02975871]
Loss: 49.51420480818213
Iteration: 110000
Coef: [ 0.22816112 30.02976933]
Loss: 49.086082042474644
Iteration: 120000
Coef: [ 0.31380168 30.02950164]
Loss: 48.662999568623036
Iteration: 130000
Coef: [ 0.3990712  30.0295

Iteration: 1110000
Coef: [ 7.1909436  30.01928708]
Loss: 20.602087195168163
Iteration: 1120000
Coef: [ 7.24642505 30.01910928]
Loss: 20.424005823392807
Iteration: 1130000
Coef: [ 7.30166448 30.01912081]
Loss: 20.247105499893504
Iteration: 1140000
Coef: [ 7.35666654 30.01894578]
Loss: 20.072025323265972
Iteration: 1150000
Coef: [ 7.41144754 30.01886663]
Loss: 19.898323052800876
Iteration: 1160000
Coef: [ 7.46597269 30.01887076]
Loss: 19.726776016822647
Iteration: 1170000
Coef: [ 7.52026327 30.01870035]
Loss: 19.556300043341817
Iteration: 1180000
Coef: [ 7.57431469 30.01870301]
Loss: 19.38679424476352
Iteration: 1190000
Coef: [ 7.62813586 30.01854439]
Loss: 19.219217898191197
Iteration: 1200000
Coef: [ 7.68174298 30.01845658]
Loss: 19.053077947882922
Iteration: 1210000
Coef: [ 7.73509554 30.01846362]
Loss: 18.888248241191036
Iteration: 1220000
Coef: [ 7.78822065 30.01829923]
Loss: 18.72549739852908
Iteration: 1230000
Coef: [ 7.84111212 30.01830101]
Loss: 18.563197049490476
Iteration: 124

Iteration: 2200000
Coef: [12.01969568 30.01196254]
Loss: 7.9965498174646426
Iteration: 2210000
Coef: [12.05426246 30.01196773]
Loss: 7.92775219649323
Iteration: 2220000
Coef: [12.08867801 30.01185461]
Loss: 7.8591108621273
Iteration: 2230000
Coef: [12.12294337 30.01185705]
Loss: 7.791002233866338
Iteration: 2240000
Coef: [12.15706392 30.01174785]
Loss: 7.723757407300112
Iteration: 2250000
Coef: [12.19104336 30.01170827]
Loss: 7.656822689877082
Iteration: 2260000
Coef: [12.22486814 30.01170849]
Loss: 7.590902535019036
Iteration: 2270000
Coef: [12.25854494 30.0116014 ]
Loss: 7.525212490921802
Iteration: 2280000
Coef: [12.29207477 30.01160207]
Loss: 7.460020992562451
Iteration: 2290000
Coef: [12.32546279 30.01149448]
Loss: 7.395655218457241
Iteration: 2300000
Coef: [12.35871298 30.01145605]
Loss: 7.331552780932515
Iteration: 2310000
Coef: [12.39181178 30.01145854]
Loss: 7.268465318899305
Iteration: 2320000
Coef: [12.42476438 30.01135525]
Loss: 7.2054697216920625
Iteration: 2330000
Coef: [

Iteration: 3300000
Coef: [15.04964408 30.00742065]
Loss: 3.0770642929680965
Iteration: 3310000
Coef: [15.07108685 30.0074254 ]
Loss: 3.050612895412962
Iteration: 3320000
Coef: [15.09243423 30.00735799]
Loss: 3.0241215125051593
Iteration: 3330000
Coef: [15.11369166 30.00735874]
Loss: 2.997999474012364
Iteration: 3340000
Coef: [15.13485605 30.00728992]
Loss: 2.972068661436087
Iteration: 3350000
Coef: [15.15593536 30.00726169]
Loss: 2.9463429541459165
Iteration: 3360000
Coef: [15.17691769 30.007267  ]
Loss: 2.9210369617417045
Iteration: 3370000
Coef: [15.19780682 30.0071988 ]
Loss: 2.895663996793338
Iteration: 3380000
Coef: [15.21860717 30.00719913]
Loss: 2.8706238094704077
Iteration: 3390000
Coef: [15.23931722 30.00713434]
Loss: 2.8458027367191825
Iteration: 3400000
Coef: [15.259944   30.00710596]
Loss: 2.8211763609150915
Iteration: 3410000
Coef: [15.28047609 30.00711039]
Loss: 2.7969345249057476
Iteration: 3420000
Coef: [15.30091679 30.00704406]
Loss: 2.772652478513958
Iteration: 343000

Iteration: 4390000
Coef: [16.91582739 30.00464347]
Loss: 1.1943768057334587
Iteration: 4400000
Coef: [16.9291863  30.00460155]
Loss: 1.1840575355514054
Iteration: 4410000
Coef: [16.94248696 30.00460356]
Loss: 1.173843885478301
Iteration: 4420000
Coef: [16.95573039 30.00456175]
Loss: 1.1636963151377757
Iteration: 4430000
Coef: [16.96891646 30.00456651]
Loss: 1.153637386489028
Iteration: 4440000
Coef: [16.9820493 30.0045441]
Loss: 1.1436383636504213
Iteration: 4450000
Coef: [16.99512095 30.00450418]
Loss: 1.1337516422211091
Iteration: 4460000
Coef: [17.00813605 30.00450394]
Loss: 1.1239700775252537
Iteration: 4470000
Coef: [17.02109523 30.00446452]
Loss: 1.1142547782562147
Iteration: 4480000
Coef: [17.03399828 30.00446789]
Loss: 1.1046246701161462
Iteration: 4490000
Coef: [17.04684914 30.00444667]
Loss: 1.0950548844588193
Iteration: 4500000
Coef: [17.05964028 30.00440739]
Loss: 1.0855878522675995
Iteration: 4510000
Coef: [17.07237596 30.00440775]
Loss: 1.076225804764797
Iteration: 452000

Iteration: 5480000
Coef: [18.07849468 30.00289315]
Loss: 0.4636067976988376
Iteration: 5490000
Coef: [18.08682035 30.00288067]
Loss: 0.459595703492724
Iteration: 5500000
Coef: [18.09510697 30.00285511]
Loss: 0.4556229682634268
Iteration: 5510000
Coef: [18.10335772 30.00285572]
Loss: 0.4516937117445467
Iteration: 5520000
Coef: [18.11157295 30.00282887]
Loss: 0.44779316305397787
Iteration: 5530000
Coef: [18.11975208 30.00283091]
Loss: 0.4439115450872519
Iteration: 5540000
Coef: [18.12789903 30.00281892]
Loss: 0.44007146398340347
Iteration: 5550000
Coef: [18.13600773 30.00279332]
Loss: 0.43626835502436284
Iteration: 5560000
Coef: [18.14408108 30.00279374]
Loss: 0.4325016074075246
Iteration: 5570000
Coef: [18.15212002 30.00276901]
Loss: 0.42876579047418495
Iteration: 5580000
Coef: [18.16012356 30.00276971]
Loss: 0.4250521159182018
Iteration: 5590000
Coef: [18.16809563 30.00275812]
Loss: 0.42137589599841246
Iteration: 5600000
Coef: [18.1760303  30.00273356]
Loss: 0.41773438690051307
Iterati

Iteration: 6560000
Coef: [18.79765511 30.00181026]
Loss: 0.18152232025345888
Iteration: 6570000
Coef: [18.80286309 30.00179337]
Loss: 0.17995478409822988
Iteration: 6580000
Coef: [18.80804814 30.00179477]
Loss: 0.17839554187603365
Iteration: 6590000
Coef: [18.81321277 30.00178728]
Loss: 0.17685256887923406
Iteration: 6600000
Coef: [18.818353   30.00177128]
Loss: 0.175327680696032
Iteration: 6610000
Coef: [18.8234712 30.0017715]
Loss: 0.17381125960705884
Iteration: 6620000
Coef: [18.82856719 30.00175554]
Loss: 0.17230802325858194
Iteration: 6630000
Coef: [18.83364097 30.00175567]
Loss: 0.17081594373592204
Iteration: 6640000
Coef: [18.8386949  30.00174941]
Loss: 0.16934026977896818
Iteration: 6650000
Coef: [18.84372462 30.00173354]
Loss: 0.16787476898480283
Iteration: 6660000
Coef: [18.84873294 30.00173332]
Loss: 0.16642698134404227
Iteration: 6670000
Coef: [18.85371952 30.00171774]
Loss: 0.16498832554971224
Iteration: 6680000
Coef: [18.85868436 30.00171828]
Loss: 0.1635598979627377
Iter

Iteration: 7630000
Coef: [19.24438202 30.00113809]
Loss: 0.07169243277188218
Iteration: 7640000
Coef: [19.24765606 30.00113342]
Loss: 0.0710723918837545
Iteration: 7650000
Coef: [19.25091454 30.001123  ]
Loss: 0.07045728631446607
Iteration: 7660000
Coef: [19.25415927 30.00112361]
Loss: 0.06985101230952585
Iteration: 7670000
Coef: [19.25738957 30.00111331]
Loss: 0.0692449755359827
Iteration: 7680000
Coef: [19.26060611 30.00111333]
Loss: 0.06864639922602854
Iteration: 7690000
Coef: [19.26380985 30.00110889]
Loss: 0.06805293030191761
Iteration: 7700000
Coef: [19.26699851 30.00109848]
Loss: 0.06746658094487716
Iteration: 7710000
Coef: [19.27017347 30.00109946]
Loss: 0.06688355365227162
Iteration: 7720000
Coef: [19.27333443 30.00108935]
Loss: 0.06630340822982293
Iteration: 7730000
Coef: [19.27648196 30.00108944]
Loss: 0.06573019050252882
Iteration: 7740000
Coef: [19.27961694 30.00108513]
Loss: 0.06516195953075081
Iteration: 7750000
Coef: [19.28273703 30.00107527]
Loss: 0.06459815383524649
I

Iteration: 8700000
Coef: [19.52512974 30.00071178]
Loss: 0.028314840967803123
Iteration: 8710000
Coef: [19.52718663 30.00071212]
Loss: 0.028070817158993844
Iteration: 8720000
Coef: [19.52923451 30.00070553]
Loss: 0.02782776796274784
Iteration: 8730000
Coef: [19.53127362 30.000706  ]
Loss: 0.027587172962010745
Iteration: 8740000
Coef: [19.53330444 30.00070248]
Loss: 0.027348282858139013
Iteration: 8750000
Coef: [19.53532593 30.00069643]
Loss: 0.027112727105924207
Iteration: 8760000
Coef: [19.53733857 30.00069665]
Loss: 0.026878095394471363
Iteration: 8770000
Coef: [19.5393425  30.00069049]
Loss: 0.026645488319232618
Iteration: 8780000
Coef: [19.54133783 30.00069081]
Loss: 0.02641519863537807
Iteration: 8790000
Coef: [19.54332507 30.00068739]
Loss: 0.02618647484004495
Iteration: 8800000
Coef: [19.54530315 30.00068142]
Loss: 0.025961014568726747
Iteration: 8810000
Coef: [19.54727261 30.00068174]
Loss: 0.025736312626232678
Iteration: 8820000
Coef: [19.54923352 30.00067557]
Loss: 0.02551362

Iteration: 9760000
Coef: [19.70026786 30.00045123]
Loss: 0.011280681293946786
Iteration: 9770000
Coef: [19.70156614 30.00044721]
Loss: 0.011183194864081217
Iteration: 9780000
Coef: [19.70285875 30.00044748]
Loss: 0.011086443396906629
Iteration: 9790000
Coef: [19.70414621 30.00044547]
Loss: 0.01099053259478486
Iteration: 9800000
Coef: [19.7054277  30.00044126]
Loss: 0.010896054561845822
Iteration: 9810000
Coef: [19.7067035  30.00044145]
Loss: 0.010801402527678148
Iteration: 9820000
Coef: [19.70797391 30.00043767]
Loss: 0.010708080327339213
Iteration: 9830000
Coef: [19.70923878 30.00043781]
Loss: 0.010615446365376029
Iteration: 9840000
Coef: [19.71049859 30.0004359 ]
Loss: 0.010523636494929411
Iteration: 9850000
Coef: [19.71175258 30.00043174]
Loss: 0.010433219907372816
Iteration: 9860000
Coef: [19.713001   30.00043201]
Loss: 0.010342562443904548
Iteration: 9870000
Coef: [19.71424414 30.00042821]
Loss: 0.010253212090917134
Iteration: 9880000
Coef: [19.71548186 30.00042852]
Loss: 0.010164

In [8]:
def min_max_normaliz(lst):
    """
    Helper function for normalize for faster training.
    """
    maximum = np.max(lst)
    minimum = np.min(lst)

    return (lst - minimum) / (maximum - minimum)

### We generate some easy data for testing. We should fit a line with, $Y = 30 * X + 20$

In [9]:
X = np.array(np.mat(np.arange(1, 1000, 5)).T)
y = np.array((30 * X)).flatten() +  20

#### Do NOT modify the following line, just run it when you are done.  You can also try different initialization, you will notice different coef at the end.

In [10]:
clf = Linear_Regression(alpha = 1, num_iter = 10000000, init_weight= np.mat([15,25]).T)
clf.fit(X,y)

Iteration: 0
Coef: [ 0.42098047 -0.74923399]
Loss: 6.922957560642464e+23
Iteration: 10000
Coef: [ 0.5502826  30.02928257]
Loss: 47.4996584138212
Iteration: 20000
Coef: [ 0.63452696 30.0290271 ]
Loss: 47.089956401100125
Iteration: 30000
Coef: [ 0.71840969 30.02904733]
Loss: 46.68332213380334
Iteration: 40000
Coef: [ 0.80191907 30.02878545]
Loss: 46.278465676570356
Iteration: 50000
Coef: [ 0.88510615 30.02864223]
Loss: 45.87857211055696
Iteration: 60000
Coef: [ 0.96789761 30.02865621]
Loss: 45.481827704173845
Iteration: 70000
Coef: [ 1.05033338 30.02839836]
Loss: 45.08977629266094
Iteration: 80000
Coef: [ 1.1324124  30.02841669]
Loss: 44.69978528920356
Iteration: 90000
Coef: [ 1.21413249 30.02816001]
Loss: 44.31266652231411
Iteration: 100000
Coef: [ 1.29553081 30.02803047]
Loss: 43.929488315075616
Iteration: 110000
Coef: [ 1.37654505 30.02804007]
Loss: 43.549653580926204
Iteration: 120000
Coef: [ 1.45721145 30.02778834]
Loss: 43.17428376974248
Iteration: 130000
Coef: [ 1.53752833 30.0278

KeyboardInterrupt: 

####  As the number of iteration increase, you should notice the coeficient converges to [20, 30]. 
#### It maybe very slow update. Feel free to stop.

In [11]:
clf.coef

array([ 3.46593641, 30.02489476])

In [12]:
np.array(clf.predict(X))

array([   33.49083117,   183.61530499,   333.73977881,   483.86425263,
         633.98872645,   784.11320027,   934.23767409,  1084.36214791,
        1234.48662173,  1384.61109555,  1534.73556936,  1684.86004318,
        1834.984517  ,  1985.10899082,  2135.23346464,  2285.35793846,
        2435.48241228,  2585.6068861 ,  2735.73135992,  2885.85583374,
        3035.98030756,  3186.10478137,  3336.22925519,  3486.35372901,
        3636.47820283,  3786.60267665,  3936.72715047,  4086.85162429,
        4236.97609811,  4387.10057193,  4537.22504575,  4687.34951957,
        4837.47399338,  4987.5984672 ,  5137.72294102,  5287.84741484,
        5437.97188866,  5588.09636248,  5738.2208363 ,  5888.34531012,
        6038.46978394,  6188.59425776,  6338.71873158,  6488.84320539,
        6638.96767921,  6789.09215303,  6939.21662685,  7089.34110067,
        7239.46557449,  7389.59004831,  7539.71452213,  7689.83899595,
        7839.96346977,  7990.08794359,  8140.2124174 ,  8290.33689122,
      

#### Please try to normalize the X and fit again with normalized X. You should find something interesting. Also think about what you should do for predicting.

##### You can also try this with the wine dataset we use in HW1. Try fit this function to that dataset with same features. If you look closely to the updates of coefficients. What do you find? This could be mentioned in your report. 

In [15]:
from sklearn.linear_model import LinearRegression

In [16]:
import ssl
import urllib
ssl._create_default_https_context = ssl._create_unverified_context

url_Wine = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
wine = urllib.request.urlopen(url_Wine)
wine = pd.read_csv(wine, delimiter=';')
X = wine[['density','alcohol']]
y = wine.quality

In [17]:
lr = LinearRegression()
lr.fit(X,y)
## Squared Error with sklearn.
sum((lr.predict(X) - y)**2)

800.667698877433

#### You will notice different coefficients, but the loss is very close to each other like 805. In your report, briefly discuss this problem.

In [18]:
clf = Linear_Regression(alpha = 1, num_iter = 5000000)
clf.fit(X,y)

Iteration: 0
Coef: [ 0.28648257  0.29078582 -0.73242488]
Loss: 2593711756702.465
Iteration: 10000
Coef: [0.74186652 0.75252285 0.39747606]
Loss: 0.25266317955751927
Iteration: 20000
Coef: [0.86238171 0.87845194 0.3737943 ]
Loss: 0.2519973588859396
Iteration: 30000
Coef: [0.9031094  0.92409811 0.36569768]
Loss: 0.2519161201603733
Iteration: 40000
Coef: [0.91585542 0.94158892 0.36279308]
Loss: 0.2519059710624162
Iteration: 50000
Coef: [0.91880748 0.94922594 0.36179877]
Loss: 0.25190453565964555
Iteration: 60000
Coef: [0.91832607 0.95340697 0.36145585]
Loss: 0.251904176444995
Iteration: 70000
Coef: [0.91664141 0.95637696 0.3613293 ]
Loss: 0.2519039467244294
Iteration: 80000
Coef: [0.91453661 0.95892422 0.3612932 ]
Loss: 0.2519037338428599
Iteration: 90000
Coef: [0.91228342 0.96132186 0.36127434]
Loss: 0.2519035229335041
Iteration: 100000
Coef: [0.9099787  0.96366862 0.36127191]
Loss: 0.25190331187009035
Iteration: 110000
Coef: [0.90765706 0.96599679 0.36127715]
Loss: 0.2519031020685916
It

KeyboardInterrupt: 

In [19]:
sum((clf.predict(X) - y)**2)

805.5669096664781