**This is also called L1 regularization**

In [102]:
from sklearn.datasets import fetch_california_housing

ca_housing = fetch_california_housing()
X = ca_housing.data
y = ca_housing.target

In [103]:
ca_housing.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [104]:
ca_housing.target_names

['MedHouseVal']

In [105]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [106]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [107]:
from sklearn.linear_model import Lasso

lasso = Lasso()
lasso.fit(X_train, y_train)

0,1,2
,alpha,1.0
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [108]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = lasso.predict(X_test)
print(mean_absolute_error(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
print(r2_score(y_test, y_pred))

0.9090896299308748
1.3136235330402446
-0.0014734336890012134


In [109]:
param_grid = {
    'alpha' : [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
}

In [110]:
from sklearn.model_selection import GridSearchCV

lasso_cv = GridSearchCV(lasso, param_grid, cv=3, n_jobs=-1)
lasso_cv.fit(X_train, y_train)

0,1,2
,estimator,Lasso()
,param_grid,"{'alpha': [0.0001, 0.001, ...]}"
,scoring,
,n_jobs,-1
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,alpha,0.001
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [111]:
y_pred2 = lasso_cv.predict(X_test)
print(mean_absolute_error(y_test, y_pred2))
print(mean_squared_error(y_test, y_pred2))
print(r2_score(y_test, y_pred2))

0.5342626936582984
0.5230711968360002
0.6012237186808103


In [112]:
print(lasso_cv.best_estimator_)

Lasso(alpha=0.001)


In [113]:
lasso3 = Lasso(alpha=0.001)
lasso3.fit(X_train, y_train)
print(lasso3.intercept_)
print(lasso3.coef_)

2.0773506419571537
[ 0.82427787  0.12286936 -0.25903017  0.30596875 -0.00275208 -0.03906354
 -0.89943919 -0.87299682]


In [114]:
import pandas as pd
feature_names = [
    'MedInc',
    'HouseAge',
    'AveRooms',
    'AveBedrms',
    'Population',
    'AveOccup',
    'Latitude',
    'Longitude'
]
df = pd.DataFrame({'Feature_Names': feature_names, 'Coef': lasso3.coef_})
df

Unnamed: 0,Feature_Names,Coef
0,MedInc,0.824278
1,HouseAge,0.122869
2,AveRooms,-0.25903
3,AveBedrms,0.305969
4,Population,-0.002752
5,AveOccup,-0.039064
6,Latitude,-0.899439
7,Longitude,-0.872997
