### Testing the implementation of the Convergence Tolerance in Lasso Regression

In [1]:
from si.io.csv_file import read_csv
from si.data.dataset import Dataset

# Load CPU dataset
data = read_csv("../datasets/cpu/cpu.csv", features=True, label=True)

print(f"Shape: {data.shape()}")
print(f"Has Label?: {data.has_label()}\n")
print(f"Label/Classes: \n{data.get_classes()}\n")
print(f"Features: {data.features}\n")
print(f"Dataset Summary:\n {data.summary()}")

data = Dataset(X=data.X, y=data.y, features=data.features, label=data.label)

Shape: (209, 6)
Has Label?: True

Label/Classes: 
[   6    7    8   10   11   12   13   14   16   17   18   19   20   21
   22   23   24   25   26   27   28   29   30   31   32   33   34   35
   36   37   38   40   41   42   44   45   46   49   50   51   52   53
   54   56   58   60   61   62   63   64   65   66   67   69   70   71
   72   74   75   76   77   80   84   86   92   93  100  105  106  109
  110  113  114  116  120  130  132  133  134  136  138  140  141  143
  144  172  173  185  188  189  198  208  212  214  220  237  248  259
  269  274  277  307  318  326  367  368  370  397  405  465  489  510
  636  915 1144 1150]

Features: Index(['syct', 'mmin', 'mmax', 'cach', 'chmin', 'chmax'], dtype='object')

Dataset Summary:
                 syct          mmin          mmax         cach      chmin  \
mean      203.822967  2.867981e+03  1.179615e+04    25.205742   4.698565   
median    110.000000  2.000000e+03  8.000000e+03     8.000000   2.000000   
min        17.000000  6.4000

In [2]:
from si.models.lasso_regression import LassoRegression
import time

# Test with tolerance
lasso_with_tol = LassoRegression(tolerance=1e-4)
start_with_tol = time.time()
lasso_with_tol.fit(data)
end_with_tol = time.time()

# Test without tolerance
lasso_no_tol = LassoRegression(tolerance=0)  # Set tolerance to 0 to disable it
start_no_tol = time.time()
lasso_no_tol.fit(data)
end_no_tol = time.time()

# Compare runtime
print(f"Runtime with tolerance: {end_with_tol - start_with_tol:.4f} seconds")
print(f"Runtime without tolerance: {end_no_tol - start_no_tol:.4f} seconds")

# Compare accuracy
predictions_with_tol = lasso_with_tol.predict(data)
predictions_no_tol = lasso_no_tol.predict(data)

result_with_tol = lasso_with_tol.score(data)
result_no_tol = lasso_with_tol.score(data)
print(f"MSE with tolerance: {round(result_with_tol, 4)}")
print(f"MSE without tolerance: {round(result_no_tol, 4)}")
print("Lasso with tolerance MSE == Lasso no tolerance MSE?:", result_with_tol == result_no_tol)

Runtime with tolerance: 0.0120 seconds
Runtime without tolerance: 0.2130 seconds
MSE with tolerance: 3478.4781
MSE without tolerance: 3478.4781
Lasso with tolerance MSE == Lasso no tolerance MSE?: True
