In [22]:
import pandas as pd
import numpy as np

In [28]:
from sklearn.datasets import load_diabetes

In [81]:
data = load_diabetes()
X = pd.DataFrame(data['data'],columns=data['feature_names'])
y = pd.Series(data['target'],name='target')

In [86]:
X.shape

(442, 10)

In [87]:
X.head(5)

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641


In [88]:
y.head()

0    151.0
1     75.0
2    141.0
3    206.0
4    135.0
Name: target, dtype: float64

---

In [89]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

In [121]:
lr = LinearRegression().fit(X,y)
lr_train_score = lr.score(X,y)
print(f'lr training set score: {lr_score:0.2f}')

lr training set score: 0.52


In [122]:
scores = cross_val_score(LinearRegression(),X,y,cv=5)
lr_cv_score = np.mean(scores)
print(f'lr cv score: {lr_cv_score:0.2f}')

lr cv score: 0.48


In [123]:
sorted(lr.coef_)

[-792.1841616283061,
 -239.81908936565472,
 -10.012197817470847,
 67.62538639104386,
 101.04457032134488,
 177.0641762322512,
 324.3904276893763,
 476.74583782366255,
 519.8397867901343,
 751.2793210873945]

---

In [124]:
from sklearn.linear_model import Ridge

In [125]:
ridge = Ridge().fit(X,y)
ridge_train_score = ridge.score(X,y)
print(f'lr training set score: {lr_train_score:0.2f}')
print(f'ridge training set score: {ridge_train_score:0.2f}')

lr training set score: 0.52
ridge training set score: 0.45


In [126]:
scores = cross_val_score(Ridge(),X,y,cv=5)
ridge_cv_score = np.mean(scores)
print(f'lr cv score: {lr_cv_score:0.2f}')
print(f'ridge cv score: {ridge_cv_score:0.2f}')

lr cv score: 0.48
ridge cv score: 0.41


In [127]:
ridge

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [128]:
sorted(ridge.coef_)

[-152.04046539318358,
 -83.15488546325011,
 -29.515926646857444,
 5.909368961868377,
 29.465745644229376,
 111.87871800112734,
 117.31171538202865,
 201.6294338387023,
 262.944995326854,
 306.3516270563722]

---

In [100]:
from sklearn.linear_model import RidgeCV

In [130]:
ridgecv = RidgeCV().fit(X,y)
ridgecv_train_score = ridgecv.score(X,y)
print(f'lr training set score: {lr_train_score:0.2f}')
print(f'ridge training set score: {ridge_train_score:0.2f}')
print(f'ridgecv training set score: {ridgecv_train_score:0.2f}')

lr training set score: 0.52
ridge training set score: 0.45
ridgecv training set score: 0.51


In [131]:
scores = cross_val_score(RidgeCV(),X,y,cv=5)
ridgecv_cv_score = np.mean(scores)
print(f'lr cv score: {lr_cv_score:0.2f}')
print(f'ridge cv score: {ridge_cv_score:0.2f}')
print(f'ridgecv cv score: {ridgecv_cv_score:0.2f}')

lr cv score: 0.48
ridge cv score: 0.41
ridgecv cv score: 0.48


In [132]:
ridgecv

RidgeCV(alphas=array([ 0.1,  1. , 10. ]), cv=None, fit_intercept=True,
        gcv_mode=None, normalize=False, scoring=None, store_cv_values=False)

In [133]:
ridgecv.alpha_

0.1

In [134]:
sorted(ridgecv.coef_)

[-207.19481363268872,
 -188.68016350975154,
 -83.46607377280492,
 -70.82809550533162,
 1.3073489468086006,
 86.74853944271896,
 115.71270250414933,
 301.76943731602574,
 443.8140541197255,
 489.69108008500336]

---

In [135]:
from sklearn.linear_model import Lasso

In [137]:
lasso = Lasso().fit(X,y)
lasso_train_score = lasso.score(X,y)
print(f'lr training set score: {lr_train_score:0.2f}')
print(f'ridge training set score: {ridge_train_score:0.2f}')
print(f'ridgecv training set score: {ridgecv_train_score:0.2f}')
print(f'lasso training set score: {lasso_train_score:0.2f}')

lr training set score: 0.52
ridge training set score: 0.45
ridgecv training set score: 0.51
lasso training set score: 0.36


In [138]:
scores = cross_val_score(Lasso(),X,y,cv=5)
lasso_cv_score = np.mean(scores)
print(f'lr cv score: {lr_cv_score:0.2f}')
print(f'ridge cv score: {ridge_cv_score:0.2f}')
print(f'ridgecv cv score: {ridgecv_cv_score:0.2f}')
print(f'lasso cv score: {lasso_cv_score:0.2f}')

lr cv score: 0.48
ridge cv score: 0.41
ridgecv cv score: 0.48
lasso cv score: 0.34


In [140]:
lasso

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [141]:
sorted(lasso.coef_)

[0.0,
 -0.0,
 0.0,
 0.0,
 -0.0,
 0.0,
 0.0,
 6.301904186054684,
 307.60570000448166,
 367.7018520698162]

---

In [142]:
from sklearn.linear_model import LassoCV

In [144]:
lassocv = LassoCV().fit(X,y)
lassocv_train_score = lassocv.score(X,y)
print(f'lr training set score: {lr_train_score:0.2f}')
print(f'ridge training set score: {ridge_train_score:0.2f}')
print(f'ridgecv training set score: {ridgecv_train_score:0.2f}')
print(f'lasso training set score: {lasso_train_score:0.2f}')
print(f'lassocv training set score: {lassocv_train_score:0.2f}')

lr training set score: 0.52
ridge training set score: 0.45
ridgecv training set score: 0.51
lasso training set score: 0.36
lassocv training set score: 0.52


In [145]:
scores = cross_val_score(LassoCV(),X,y,cv=5)
lassocv_cv_score = np.mean(scores)
print(f'lr cv score: {lr_cv_score:0.2f}')
print(f'ridge cv score: {ridge_cv_score:0.2f}')
print(f'ridgecv cv score: {ridgecv_cv_score:0.2f}')
print(f'lasso cv score: {lasso_cv_score:0.2f}')
print(f'lassocv cv score: {lassocv_cv_score:0.2f}')

lr cv score: 0.48
ridge cv score: 0.41
ridgecv cv score: 0.48
lasso cv score: 0.34
lassocv cv score: 0.48


In [147]:
lassocv

LassoCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True,
        max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,
        positive=False, precompute='auto', random_state=None,
        selection='cyclic', tol=0.0001, verbose=False)

In [148]:
lassocv.alpha_

0.003753767152692203

In [149]:
sorted(lassocv.coef_)

[-569.4426837980648,
 -235.99640533733526,
 -6.496844548604692,
 -0.0,
 66.83430445374282,
 143.6995665012068,
 302.45627915380334,
 321.06689245006095,
 521.738542606749,
 669.9263311180067]

---

In [151]:
from sklearn.linear_model import ElasticNet

In [152]:
en = ElasticNet().fit(X,y)
en_train_score = en.score(X,y)
print(f'lr training set score: {lr_train_score:0.2f}')
print(f'ridge training set score: {ridge_train_score:0.2f}')
print(f'ridgecv training set score: {ridgecv_train_score:0.2f}')
print(f'lasso training set score: {lasso_train_score:0.2f}')
print(f'lassocv training set score: {lassocv_train_score:0.2f}')
print(f'en training set score: {en_train_score:0.2f}')

lr training set score: 0.52
ridge training set score: 0.45
ridgecv training set score: 0.51
lasso training set score: 0.36
lassocv training set score: 0.52
en training set score: 0.01


In [153]:
scores = cross_val_score(ElasticNet(),X,y,cv=5)
en_cv_score = np.mean(scores)
print(f'lr cv score: {lr_cv_score:0.2f}')
print(f'ridge cv score: {ridge_cv_score:0.2f}')
print(f'ridgecv cv score: {ridgecv_cv_score:0.2f}')
print(f'lasso cv score: {lasso_cv_score:0.2f}')
print(f'lassocv cv score: {lassocv_cv_score:0.2f}')
print(f'en cv score: {en_cv_score:0.2f}')

lr cv score: 0.48
ridge cv score: 0.41
ridgecv cv score: 0.48
lasso cv score: 0.34
lassocv cv score: 0.48
en cv score: -0.02


In [154]:
en

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [156]:
sorted(en.coef_)

[-1.86136320233039,
 0.0,
 0.25093508318279734,
 0.3590178701173919,
 0.528645560002729,
 1.7698509616010372,
 2.114454026654008,
 2.204355574953443,
 3.105840727076851,
 3.25976724407584]

In [162]:
ElasticNet?

---

In [157]:
from sklearn.linear_model import ElasticNetCV

In [163]:
encv = ElasticNetCV(l1_ratio=[.001,.5,1]).fit(X,y)
encv_train_score = encv.score(X,y)
print(f'lr training set score: {lr_train_score:0.2f}')
print(f'ridge training set score: {ridge_train_score:0.2f}')
print(f'ridgecv training set score: {ridgecv_train_score:0.2f}')
print(f'lasso training set score: {lasso_train_score:0.2f}')
print(f'lassocv training set score: {lassocv_train_score:0.2f}')
print(f'en training set score: {en_train_score:0.2f}')
print(f'encv training set score: {encv_train_score:0.2f}')

lr training set score: 0.52
ridge training set score: 0.45
ridgecv training set score: 0.51
lasso training set score: 0.36
lassocv training set score: 0.52
en training set score: 0.01
encv training set score: 0.52


In [164]:
scores = cross_val_score(ElasticNetCV(l1_ratio=[.001,.5,1]),X,y,cv=5)
encv_cv_score = np.mean(scores)
print(f'lr cv score: {lr_cv_score:0.2f}')
print(f'ridge cv score: {ridge_cv_score:0.2f}')
print(f'ridgecv cv score: {ridgecv_cv_score:0.2f}')
print(f'lasso cv score: {lasso_cv_score:0.2f}')
print(f'lassocv cv score: {lassocv_cv_score:0.2f}')
print(f'en cv score: {en_cv_score:0.2f}')
print(f'encv cv score: {encv_cv_score:0.2f}')

lr cv score: 0.48
ridge cv score: 0.41
ridgecv cv score: 0.48
lasso cv score: 0.34
lassocv cv score: 0.48
en cv score: -0.02
encv cv score: 0.48


In [165]:
encv

ElasticNetCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True,
             l1_ratio=[0.001, 0.5, 1], max_iter=1000, n_alphas=100, n_jobs=None,
             normalize=False, positive=False, precompute='auto',
             random_state=None, selection='cyclic', tol=0.0001, verbose=0)

In [166]:
encv.alpha_

0.003753767152692203

In [167]:
encv.l1_ratio_

1.0

In [168]:
sorted(encv.coef_)

[-569.4426837980648,
 -235.99640533733526,
 -6.496844548604692,
 -0.0,
 66.83430445374282,
 143.6995665012068,
 302.45627915380334,
 321.06689245006095,
 521.738542606749,
 669.9263311180067]

---

In [169]:
from sklearn.linear_model import LogisticRegression

In [171]:
LogisticRegression().penalty

'l2'

In [175]:
LogisticRegression(penalty='elasticnet',l1_ratio=.5)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=0.5, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='elasticnet',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [177]:
LogisticRegression().C

1.0

In [181]:
help(LogisticRegression())

Help on LogisticRegression in module sklearn.linear_model._logistic object:

class LogisticRegression(sklearn.base.BaseEstimator, sklearn.linear_model._base.LinearClassifierMixin, sklearn.linear_model._base.SparseCoefMixin)
 |  LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)
 |  
 |  Logistic Regression (aka logit, MaxEnt) classifier.
 |  
 |  In the multiclass case, the training algorithm uses the one-vs-rest (OvR)
 |  scheme if the 'multi_class' option is set to 'ovr', and uses the
 |  cross-entropy loss if the 'multi_class' option is set to 'multinomial'.
 |  (Currently the 'multinomial' option is supported only by the 'lbfgs',
 |  'sag', 'saga' and 'newton-cg' solvers.)
 |  
 |  This class implements regularized logistic regression using the
 |  'liblinear' library, 'newton-cg', 'sag