In [123]:
import numpy as np
import pandas as pd
from source.coordinate_descent import CoordinateDescent
from sklearn.datasets import load_diabetes, load_boston
from source.utils.visualization import visualize_cost

from sklearn.preprocessing import Normalizer

## Loading Data

In [2]:
X_diab, y_diab = load_diabetes(return_X_y=True)

y_diab = y_diab.reshape((-1, 1))
X_diab = np.hstack([np.ones(X_diab.shape[0]).reshape((X_diab.shape[0], 1)), X_diab])


# normalization
X_diab = X_diab / (np.linalg.norm(X_diab, axis=0))
y_diab = y_diab / (np.linalg.norm(y_diab, axis=0))

In [None]:
X_bost, y_bost = load_boston(return_X_y=True)

y_bost = y_bost.reshape((-1, 1))
X_bost = np.hstack([np.ones(X_bost.shape[0]).reshape((X_bost.shape[0], 1)), X_bost])

# normalization
X_bost = X_bost / (np.linalg.norm(X_bost, axis=0))
y_bost = y_bost / (np.linalg.norm(y_bost, axis=0))

## Lasso Regression with our implementation of Coordinate Descent algorithm

## 1. Diabetes dataset

### 1a. Greedy index selection method

In [4]:
lambda_ = 0.01
cd_diab = CoordinateDescent(lambda_ = lambda_)
cd_diab.fit(X_diab, y_diab, tol = 10e-6, method='greedy')

  9%|▊         | 87/1000 [00:00<00:00, 1245.75it/s]


In [5]:
visualize_cost(cd_diab.costs)

Lasso Feature Selection

In [6]:
cd_diab.theta

array([ 0.89221356,  0.        , -0.04742073,  0.14476481,  0.0788673 ,
       -0.01958458,  0.        , -0.0599571 ,  0.        ,  0.13793761,
        0.01141546])

### 1b. Cyclic index selection method

In [7]:
lambda_ = 0.01
cd_diab = CoordinateDescent(lambda_ = lambda_)
cd_diab.fit(X_diab, y_diab, tol = 10e-6, method='cyclic')

 23%|██▎       | 234/1000 [00:00<00:00, 8689.32it/s]


In [8]:
visualize_cost(cd_diab.costs)

 Lasso Feature Selection

In [9]:
cd_diab.theta

array([ 0.89221356,  0.        , -0.04742201,  0.14476189,  0.07885744,
       -0.0195965 ,  0.        , -0.05995561,  0.        ,  0.13795199,
        0.01142408])

### 1c. Randomized index selection method

In [13]:
lambda_ = 0.01
cd_diab = CoordinateDescent(lambda_ = lambda_)
cd_diab.fit(X_diab, y_diab, tol = 10e-6, method='randomized')

 29%|██▉       | 291/1000 [00:00<00:00, 6078.73it/s]


In [14]:
visualize_cost(cd_diab.costs)

Lasso Feature Selection

In [15]:
cd_diab.theta

array([ 0.89221356,  0.        , -0.04739701,  0.14477692,  0.07884914,
       -0.01963394,  0.        , -0.05991778,  0.        ,  0.1379822 ,
        0.01141614])

*******************

## 2. Boston dataset

### 2a. Greedy index selection method

In [17]:
lambda_ = 0.05
cd_bost = CoordinateDescent(lambda_ = lambda_)
cd_bost.fit(X_bost, y_bost, tol = 10e-4, method='greedy')

  6%|▌         | 62/1000 [00:00<00:00, 959.31it/s]


In [18]:
visualize_cost(cd_bost.costs)

Lasso Feature Selection

In [20]:
cd_bost.theta

array([ 1.21124026, -0.01699858,  0.01325271,  0.        ,  0.00638783,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.32929425])

### 2b. Cyclic index selection method

In [21]:
lambda_ = 0.05
cd_bost = CoordinateDescent(lambda_ = lambda_)
cd_bost.fit(X_bost, y_bost, tol = 10e-4, method='cyclic')

 34%|███▎      | 336/1000 [00:00<00:00, 5615.09it/s]


In [22]:
visualize_cost(cd_bost.costs)

Lasso Feature Selection

In [23]:
cd_bost.theta

array([ 1.21439916, -0.01774763,  0.01210772,  0.        ,  0.00578174,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.33104679])

### 2c. Randomized index selection method

In [65]:
lambda_ = 0.05
cd_bost = CoordinateDescent(lambda_ = lambda_)
cd_bost.fit(X_bost, y_bost, tol = 10e-4, method='randomized')

100%|██████████| 1000/1000 [00:00<00:00, 4846.47it/s]


In [25]:
visualize_cost(cd_bost.costs)

Lasso Feature Selection

In [26]:
cd_bost.theta

array([ 1.20107082, -0.01889477,  0.01495472,  0.        ,  0.00604809,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.31674859])

***

## 3. BlogFeedback Dataset
#samples: 52 387 

#features: 280

[Source](https://archive.ics.uci.edu/ml/datasets/BlogFeedback)

## Loading data

In [142]:
df = pd.read_csv('./data/blogData_train.csv', header=None)
y = df[280].to_numpy()
X = df.drop(280, axis=1).to_numpy()

In [143]:
X.shape

(52397, 280)

In [144]:
y = y.reshape((-1, 1))
X = np.hstack([np.ones(X.shape[0]).reshape((X.shape[0], 1)), X])


# normalization
n = Normalizer()

X = n.fit_transform(X)
y = n.fit_transform(y)

### 3a. Randomized index selection method

In [147]:
lambda_ = 0.001
cd = CoordinateDescent(lambda_ = lambda_)
cd.fit(X, y, iters=100, tol = 10e-12, method='randomized')

100%|██████████| 100/100 [00:07<00:00, 13.74it/s]


In [148]:
visualize_cost(cd.costs)

In [149]:
cd.theta

array([-3.30413645e+01, -3.71363074e-01,  4.97190938e+00,  8.09018268e+00,
        0.00000000e+00,  0.00000000e+00, -2.01154868e-01,  0.00000000e+00,
        0.00000000e+00,  9.50731135e-02,  0.00000000e+00,  3.38381163e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00, -4.91524388e-01,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  3.78719767e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  5.34740488e+00,  0.00000000e+00,  1.10003178e+01,
        5.27522618e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  4.86092654e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -1.80889163e+00,
        4.42353232e+00,  0.00000000e+00, -7.02301417e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  2.99011025e+00,
        0.00000000e+00, -

In [164]:
# number of irrelevant features
np.count_nonzero(np.abs(cd.theta) < 10**-12)

211