In [51]:
import numpy as np
import pandas as pd
import warnings

# own implementation (source dir)
from source.coordinate_descent import CoordinateDescent
from source.utils.visualization import visualize_cost

# sklearn
from sklearn.preprocessing import Normalizer
from sklearn.linear_model import Lasso
from sklearn.datasets import load_diabetes, load_boston

## Loading Data

In [62]:
X_diab, y_diab = load_diabetes(return_X_y=True)

y_diab = y_diab.reshape((-1, 1))
X_diab = np.hstack([np.ones(X_diab.shape[0]).reshape((X_diab.shape[0], 1)), X_diab])


# normalization
X_diab = X_diab / (np.linalg.norm(X_diab, axis=0))
y_diab = y_diab / (np.linalg.norm(y_diab, axis=0))

In [63]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    X_bost, y_bost = load_boston(return_X_y=True)

y_bost = y_bost.reshape((-1, 1))
X_bost = np.hstack([np.ones(X_bost.shape[0]).reshape((X_bost.shape[0], 1)), X_bost])

# normalization
X_bost = X_bost / (np.linalg.norm(X_bost, axis=0))
y_bost = y_bost / (np.linalg.norm(y_bost, axis=0))

***

## Lasso Regression with our implementation of Coordinate Descent algorithm

## 1. Diabetes dataset

### 1a. Greedy index selection method

In [4]:
lambda_ = 0.01
cd_diab = CoordinateDescent(lambda_ = lambda_)
cd_diab.fit(X_diab, y_diab, tol = 10e-6, method='greedy')

  9%|▊         | 87/1000 [00:00<00:00, 1245.75it/s]


In [5]:
visualize_cost(cd_diab.costs)

Lasso Feature Selection

In [6]:
diab_greedy_theta = cd_diab.theta
diab_greedy_theta

array([ 0.89221356,  0.        , -0.04742073,  0.14476481,  0.0788673 ,
       -0.01958458,  0.        , -0.0599571 ,  0.        ,  0.13793761,
        0.01141546])

### 1b. Cyclic index selection method

In [7]:
lambda_ = 0.01
cd_diab = CoordinateDescent(lambda_ = lambda_)
cd_diab.fit(X_diab, y_diab, tol = 10e-6, method='cyclic')

 23%|██▎       | 234/1000 [00:00<00:00, 8689.32it/s]


In [8]:
visualize_cost(cd_diab.costs)

 Lasso Feature Selection

In [9]:
diab_cyclic_theta = cd_diab.theta
diab_cyclic_theta

array([ 0.89221356,  0.        , -0.04742201,  0.14476189,  0.07885744,
       -0.0195965 ,  0.        , -0.05995561,  0.        ,  0.13795199,
        0.01142408])

### 1c. Randomized index selection method

In [13]:
lambda_ = 0.01
cd_diab = CoordinateDescent(lambda_ = lambda_)
cd_diab.fit(X_diab, y_diab, tol = 10e-6, method='randomized')

 29%|██▉       | 291/1000 [00:00<00:00, 6078.73it/s]


In [14]:
visualize_cost(cd_diab.costs)

Lasso Feature Selection

In [10]:
diab_rand_theta = cd_diab.theta
diab_rand_theta

array([ 0.89221356,  0.        , -0.04739701,  0.14477692,  0.07884914,
       -0.01963394,  0.        , -0.05991778,  0.        ,  0.1379822 ,
        0.01141614])

### 1d. Sklearn Lasso Regression


In [80]:
l = Lasso(alpha=0.00005) # strange alpha value due to our unusual normalization after data loading
l.fit(X_diab, y_diab)
diab_sklearn_theta = l.coef_
diab_sklearn_theta[0] = l.intercept_

diab_sklearn_theta

array([ 0.04243827,  0.        , -0.0258781 ,  0.14275015,  0.06779125,
       -0.        , -0.        , -0.05088302,  0.        ,  0.12594076,
        0.00162157])

### 1e. Comparison of thetas' estimations for each method

'Cyclic' index selection

In [81]:
diab_cyclic_theta

array([ 0.89221356,  0.        , -0.04742201,  0.14476189,  0.07885744,
       -0.0195965 ,  0.        , -0.05995561,  0.        ,  0.13795199,
        0.01142408])

'Randomized' index selection

In [82]:
diab_rand_theta

array([ 0.89221356,  0.        , -0.04739701,  0.14477692,  0.07884914,
       -0.01963394,  0.        , -0.05991778,  0.        ,  0.1379822 ,
        0.01141614])

'Greedy' index selection

In [83]:
diab_greedy_theta

array([ 0.89221356,  0.        , -0.04742073,  0.14476481,  0.0788673 ,
       -0.01958458,  0.        , -0.0599571 ,  0.        ,  0.13793761,
        0.01141546])

Standard deviation for coordinate-descent results

In [85]:
diab_std = np.std(np.array([diab_cyclic_theta, diab_rand_theta, diab_greedy_theta]), axis=0)
diab_std

array([0.00000000e+00, 0.00000000e+00, 1.14952976e-05, 6.50708503e-06,
       7.42290150e-06, 2.10297398e-05, 0.00000000e+00, 1.81946005e-05,
       0.00000000e+00, 1.85822394e-05, 3.91308915e-06])

Mean for coordinate-descent results

In [86]:
diab_mean = np.mean(np.array([diab_cyclic_theta, diab_rand_theta, diab_greedy_theta]), axis=0)
diab_mean

array([ 0.89221356,  0.        , -0.04741325,  0.14476787,  0.07885796,
       -0.01960501,  0.        , -0.0599435 ,  0.        ,  0.13795727,
        0.01141856])

The difference between mean coordinate descent results and sklearn Lasso results

In [88]:
diab_diff = np.abs(diab_mean - diab_sklearn_theta)
diab_diff

array([0.84977529, 0.        , 0.02153515, 0.00201772, 0.01106671,
       0.01960501, 0.        , 0.00906048, 0.        , 0.01201651,
       0.00979699])

*******************

## 2. Boston dataset

### 2a. Greedy index selection method

In [17]:
lambda_ = 0.05
cd_bost = CoordinateDescent(lambda_ = lambda_)
cd_bost.fit(X_bost, y_bost, tol = 10e-4, method='greedy')

  6%|▌         | 62/1000 [00:00<00:00, 959.31it/s]


In [18]:
visualize_cost(cd_bost.costs)

Lasso Feature Selection

In [7]:
bost_greedy_theta = cd_bost.theta
bost_greedy_theta

array([ 1.21124026, -0.01699858,  0.01325271,  0.        ,  0.00638783,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.32929425])

### 2b. Cyclic index selection method

In [21]:
lambda_ = 0.05
cd_bost = CoordinateDescent(lambda_ = lambda_)
cd_bost.fit(X_bost, y_bost, tol = 10e-4, method='cyclic')

 34%|███▎      | 336/1000 [00:00<00:00, 5615.09it/s]


In [22]:
visualize_cost(cd_bost.costs)

Lasso Feature Selection

In [5]:
bost_cyclic_theta = cd_bost.theta
bost_cyclic_theta

array([ 1.21439916, -0.01774763,  0.01210772,  0.        ,  0.00578174,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.33104679])

### 2c. Randomized index selection method

In [65]:
lambda_ = 0.05
cd_bost = CoordinateDescent(lambda_ = lambda_)
cd_bost.fit(X_bost, y_bost, tol = 10e-4, method='randomized')

100%|██████████| 1000/1000 [00:00<00:00, 4846.47it/s]


In [25]:
visualize_cost(cd_bost.costs)

Lasso Feature Selection

In [3]:
bost_rand_theta = cd_bost.theta
bost_rand_theta

array([ 1.20107082, -0.01889477,  0.01495472,  0.        ,  0.00604809,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.31674859])

### 2d. Sklearn Lasso Regression

In [92]:
l = Lasso(alpha=0.00005) # strange alpha value due to our unusual normalization after data loading
l.fit(X_bost, y_bost)
bost_sklearn_theta = l.coef_
bost_sklearn_theta[0] = l.intercept_

bost_sklearn_theta


array([ 0.05724642, -0.02000438,  0.02120922, -0.        ,  0.02949011,
       -0.        ,  0.        , -0.        , -0.        , -0.        ,
       -0.        , -0.        ,  0.        , -0.42596015])

### 2e. Comparison of thetas' estimations for each method

'Cyclic' index selection

In [97]:
bost_cyclic_theta

array([ 1.21439916, -0.01774763,  0.01210772,  0.        ,  0.00578174,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.33104679])

'Randomized' index selection

In [98]:
bost_rand_theta

array([ 1.20107082, -0.01889477,  0.01495472,  0.        ,  0.00604809,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.31674859])

'Greedy' index selection

In [17]:
bost_greedy_theta

array([ 1.21124026, -0.01699858,  0.01325271,  0.        ,  0.00638783,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.32929425])

Standard deviation for coordinate-descent results

In [93]:
boston_std = np.std(np.array([bost_cyclic_theta, bost_rand_theta, bost_greedy_theta]), axis=0)
boston_std

array([0.00568664, 0.00077978, 0.00116967, 0.        , 0.00024804,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.00636748])

Mean for coordinate-descent results

In [94]:
boston_mean = np.mean(np.array([bost_cyclic_theta, bost_rand_theta, bost_greedy_theta]), axis=0)
boston_mean

array([ 1.20890341, -0.01788033,  0.01343838,  0.        ,  0.00607255,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.32569654])

The difference between mean coordinate descent results and sklearn Lasso results

In [96]:
boston_diff = np.abs(boston_mean - bost_sklearn_theta)
boston_diff

array([1.15165699, 0.00212405, 0.00777084, 0.        , 0.02341756,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.10026361])

***

## 3. BlogFeedback Dataset
#samples: 52 387 

#features: 280

[Source](https://archive.ics.uci.edu/ml/datasets/BlogFeedback)

## Loading data

In [99]:
df = pd.read_csv('./data/blogData_train.csv', header=None)
y = df[280].to_numpy()
X = df.drop(280, axis=1).to_numpy()

In [100]:
X.shape

(52397, 280)

In [101]:
y = y.reshape((-1, 1))
X = np.hstack([np.ones(X.shape[0]).reshape((X.shape[0], 1)), X])


# normalization
n = Normalizer()

X = n.fit_transform(X)
y = n.fit_transform(y)

### 3a. Greedy index selection method

In [114]:
lambda_ = 0.001
cd = CoordinateDescent(lambda_ = lambda_)
cd.fit(X, y, iters=100, tol = 10e-12, method='greedy')
blog_greedy_theta = cd.theta

100%|██████████| 100/100 [00:07<00:00, 12.90it/s]


In [115]:
blog_greedy_theta

array([-2.34573259e+00,  0.00000000e+00, -5.54468490e-01, -7.64237639e+00,
        7.35817950e-02, -1.98789605e-01,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  2.02484923e-01,  0.00000000e+00,  0.00000000e+00,
       -2.44491350e+00,  0.00000000e+00,  0.00000000e+00,  1.22677932e+01,
       -5.90559993e-01,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  9.31925351e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -2.20023061e-01,
        0.00000000e+00,  0.00000000e+00,  7.63167758e+00, -2.38830772e+01,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -1.30073148e+01,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        3.40147206e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

Number of irrelevant features:

In [116]:
np.count_nonzero(np.abs(blog_greedy_theta) < 10**-12)

211

In [148]:
visualize_cost(cd.costs)

### 3b. Cyclic index selection method

In [120]:
lambda_ = 0.001
cd = CoordinateDescent(lambda_=lambda_)
cd.fit(X, y, iters=100, tol=10e-12, method='cyclic')
blog_cyclic_theta = cd.theta


100%|██████████| 100/100 [00:08<00:00, 11.95it/s]


In [122]:
blog_cyclic_theta

array([ 0.00000000e+00,  2.57445332e+00,  1.90492387e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  2.19927290e-01, -1.32342753e+01,  0.00000000e+00,
        5.87275173e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -1.24202533e+01,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -4.52561300e+00,
        0.00000000e+00,  0.00000000e+00,  1.75438306e+00, -1.48655345e+01,
        7.89755665e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -4.16753603e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
       -1.06169835e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        2.13272803e+00,  

Number of irrelevant features:

In [123]:
np.count_nonzero(np.abs(blog_cyclic_theta) < 10**-12)

206

In [121]:
visualize_cost(cd.costs)

### 3c. Randomized index selection method

In [129]:
lambda_ = 0.001
cd = CoordinateDescent(lambda_ = lambda_)
cd.fit(X, y, iters=100, tol = 10e-12, method='randomized')
blog_randomized_theta = cd.theta

100%|██████████| 100/100 [00:07<00:00, 12.74it/s]


In [130]:
blog_randomized_theta

array([-8.92307636e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  7.37295098e-01,  0.00000000e+00,  1.28658539e+01,
        0.00000000e+00,  0.00000000e+00,  4.50289294e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  6.35357458e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -9.13374223e+00, -1.02909651e+00,
        9.47703980e-01,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  1.73571474e+01,  0.00000000e+00,  7.89458764e-01,
        0.00000000e+00,  0.00000000e+00, -6.27276917e+00, -1.10032567e+01,
        1.12551435e+00,  0.00000000e+00,  0.00000000e+00, -2.26675406e+00,
        0.00000000e+00, -2.09372763e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

Number of irrelevant features:

In [131]:
np.count_nonzero(np.abs(blog_randomized_theta) < 10**-12)

216

In [107]:
visualize_cost(cd.costs)

### 3d. Sklearn Lasso Regression

In [139]:
l = Lasso(alpha=0.001, max_iter=100) # strange alpha value due to our unusual normalization after data loading
l.fit(X, y)
blog_sklearn_theta = l.coef_
blog_sklearn_theta[0] = l.intercept_

blog_sklearn_theta

array([-0.12946457,  0.        , -0.        , -0.        ,  0.80449642,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        , -0.        ,  0.36414409,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
       -0.        ,  0.        , -0.        ,  0.        , -0.        ,
        0.        ,  0.        , -0.        ,  0.        , -0.        ,
        0.        ,  0.        , -0.        ,  0.        , -0.        ,
        0.        ,  0.        , -0.        ,  0.        , -0.        ,
        0.        ,  0.        , -0.        ,  0.        , -0.        ,
       -0.        ,  1.03939191,  2.17165443,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.27610316,  0.36957734, -0.        ,  0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.  

In [140]:
np.count_nonzero(np.abs(blog_sklearn_theta) < 10**-12)

274

### 3e. Comparison of thetas' estimations for each method

'Cyclic' index selection

In [134]:
blog_cyclic_theta

array([ 0.00000000e+00,  2.57445332e+00,  1.90492387e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  2.19927290e-01, -1.32342753e+01,  0.00000000e+00,
        5.87275173e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -1.24202533e+01,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -4.52561300e+00,
        0.00000000e+00,  0.00000000e+00,  1.75438306e+00, -1.48655345e+01,
        7.89755665e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -4.16753603e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
       -1.06169835e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        2.13272803e+00,  

'Randomized' index selection

In [136]:
blog_randomized_theta

array([-8.92307636e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  7.37295098e-01,  0.00000000e+00,  1.28658539e+01,
        0.00000000e+00,  0.00000000e+00,  4.50289294e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  6.35357458e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -9.13374223e+00, -1.02909651e+00,
        9.47703980e-01,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  1.73571474e+01,  0.00000000e+00,  7.89458764e-01,
        0.00000000e+00,  0.00000000e+00, -6.27276917e+00, -1.10032567e+01,
        1.12551435e+00,  0.00000000e+00,  0.00000000e+00, -2.26675406e+00,
        0.00000000e+00, -2.09372763e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

'Greedy' index selection

In [137]:
blog_greedy_theta

array([-2.34573259e+00,  0.00000000e+00, -5.54468490e-01, -7.64237639e+00,
        7.35817950e-02, -1.98789605e-01,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  2.02484923e-01,  0.00000000e+00,  0.00000000e+00,
       -2.44491350e+00,  0.00000000e+00,  0.00000000e+00,  1.22677932e+01,
       -5.90559993e-01,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  9.31925351e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -2.20023061e-01,
        0.00000000e+00,  0.00000000e+00,  7.63167758e+00, -2.38830772e+01,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -1.30073148e+01,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        3.40147206e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

Standard deviation for coordinate-descent results

In [141]:
blog_std = np.std(np.array([blog_cyclic_theta, blog_randomized_theta, blog_greedy_theta]), axis=0)
blog_std

array([3.77690650e+00, 1.21360893e+00, 1.05329014e+00, 3.60265078e+00,
       3.46867908e-02, 4.02682111e-01, 0.00000000e+00, 6.06502167e+00,
       0.00000000e+00, 9.54523080e-02, 2.12268409e+00, 0.00000000e+00,
       1.15254328e+00, 0.00000000e+00, 0.00000000e+00, 5.78309319e+00,
       2.78392650e-01, 1.03674719e-01, 6.23869720e+00, 0.00000000e+00,
       2.76844171e+00, 2.99510378e+00, 4.39313824e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 5.80379877e+00,
       0.00000000e+00, 0.00000000e+00, 6.85360449e+00, 1.00512105e+01,
       4.46751941e-01, 0.00000000e+00, 8.27024105e-01, 6.61334793e+00,
       3.72294391e+00, 8.18223775e+00, 0.00000000e+00, 3.72154430e-01,
       0.00000000e+00, 0.00000000e+00, 2.60657281e+00, 5.18698498e+00,
       5.30572553e-01, 0.00000000e+00, 0.00000000e+00, 1.06855811e+00,
       1.90371291e+00, 9.86992672e-01, 0.00000000e+00, 0.00000000e+00,
       1.00537763e+00, 1.65166236e+00, 4.23027476e-01, 1.14547122e+00,
      

Mean for coordinate-descent results

In [143]:
blog_mean = np.mean(np.array([blog_cyclic_theta, blog_randomized_theta, blog_greedy_theta]), axis=0)
blog_mean

array([-3.75626965e+00,  8.58151107e-01,  4.50151793e-01, -2.54745880e+00,
        2.45272650e-02,  1.79501831e-01,  0.00000000e+00,  4.28861795e+00,
        0.00000000e+00,  6.74949743e-02,  1.50096431e+00,  0.00000000e+00,
       -8.14971166e-01,  0.00000000e+00,  0.00000000e+00,  4.08926441e+00,
       -1.96853331e-01,  7.33090967e-02, -4.41142510e+00,  0.00000000e+00,
        1.95758391e+00,  2.11785819e+00,  3.10641784e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -4.21342546e+00,
        0.00000000e+00,  0.00000000e+00, -5.00688219e-01, -9.81259558e+00,
        3.15901327e-01,  0.00000000e+00,  5.84794353e-01, -9.29094980e+00,
        2.63251888e+00,  5.78571580e+00,  0.00000000e+00,  2.63152921e-01,
        0.00000000e+00,  0.00000000e+00, -3.48010173e+00, -3.66775225e+00,
        3.75171450e-01,  0.00000000e+00,  0.00000000e+00, -7.55584687e-01,
        7.79924572e-01, -6.97909211e-01,  0.00000000e+00,  0.00000000e+00,
        7.10909342e-01,  

The difference between mean coordinate descent results and sklearn Lasso results

In [144]:
blog_diff = np.abs(blog_mean - blog_sklearn_theta)
blog_diff

array([3.62680508e+00, 8.58151107e-01, 4.50151793e-01, 2.54745880e+00,
       7.79969152e-01, 1.79501831e-01, 0.00000000e+00, 4.28861795e+00,
       0.00000000e+00, 6.74949743e-02, 1.50096431e+00, 0.00000000e+00,
       8.14971166e-01, 0.00000000e+00, 0.00000000e+00, 4.08926441e+00,
       1.96853331e-01, 7.33090967e-02, 4.41142510e+00, 3.64144091e-01,
       1.95758391e+00, 2.11785819e+00, 3.10641784e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.21342546e+00,
       0.00000000e+00, 0.00000000e+00, 5.00688219e-01, 9.81259558e+00,
       3.15901327e-01, 0.00000000e+00, 5.84794353e-01, 9.29094980e+00,
       2.63251888e+00, 5.78571580e+00, 0.00000000e+00, 2.63152921e-01,
       0.00000000e+00, 0.00000000e+00, 3.48010173e+00, 3.66775225e+00,
       3.75171450e-01, 0.00000000e+00, 0.00000000e+00, 7.55584687e-01,
       7.79924572e-01, 6.97909211e-01, 0.00000000e+00, 1.03939191e+00,
       1.46074509e+00, 1.16790165e+00, 2.65376360e-01, 8.09970466e-01,
      