In [1]:
from time import time
from scipy import sparse
from scipy import linalg

from sklearn.datasets.samples_generator import make_regression
from sklearn.linear_model import Lasso

#### Lasso implementation on dense data

In [2]:
X,y = make_regression(n_samples=200,n_features=5000,random_state=0)

In [3]:
X_sp = sparse.coo_matrix(X)

In [5]:
X_sp

<200x5000 sparse matrix of type '<class 'numpy.float64'>'
	with 1000000 stored elements in COOrdinate format>

In [9]:
alpha=1
sparse_lasso = Lasso(alpha=alpha,fit_intercept=False,max_iter=1000)
dense_lasso = Lasso(alpha=alpha,fit_intercept=False,max_iter=1000)

In [17]:
t0 = time()
sparse_lasso.fit(X_sp,y)
print("Sparse lasso done in %fs" %(time()-t0))

Sparse lasso done in 0.143328s


In [18]:
t0 = time()
dense_lasso.fit(X,y)
print("Dense lasso done in %fs" %(time()-t0))

Dense lasso done in 0.071854s


In [19]:
print("Distance between coeffecients : %s" 
      % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))

Distance between coeffecients : 8.015287401035549e-14


#### Lasso implementations on Sparse data

In [20]:
Xs = X.copy()

In [22]:
Xs[Xs<2.5]=0.0

In [23]:
Xs = sparse.coo_matrix(Xs)
Xs = Xs.tocsc()

In [26]:
print("Matrix density : %s %%" %(Xs.nnz/float(X.size)*100))

Matrix density : 0.6263000000000001 %


In [27]:
alpha = 0.1
sparse_lasso = Lasso(alpha=alpha,fit_intercept=False, max_iter=10000)
dense_lasso = Lasso(alpha=alpha,fit_intercept=False, max_iter=10000)

In [28]:
to=time()
sparse_lasso.fit(Xs,y)
print("Sparse Lasso done in %fs" % (time()-t0))

Sparse Lasso done in 409.278520s


In [29]:
to=time()
dense_lasso.fit(Xs.toarray(),y)
print("Dense Lasso done in %fs" % (time()-t0))

Dense Lasso done in 457.229538s


In [30]:
print("Distance between coeefecients : %s"
     % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))

Distance between coeefecients : 8.034211286713951e-12
