In [19]:
import numpy as np
from sklearn.linear_model import Ridge, SGDRegressor, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error
from sklearn.base import clone
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [2]:
#Creating linear data for running the Regression Model
X = 2 * np.random.rand(100, 1) #Creating X-values using Uniform Distribution Noise
y = 4 + 3 * X + np.random.randn(100, 1) #Creating Y-values using Normal Distribution Noise

# Ridge Regression
Uses 1/2 l2 norm for regularization

### Using Scikit by Cholesky Method

In [5]:
#Using Cholesky's Matrix Factorization Technique
ridge_reg = Ridge(alpha=1, solver="cholesky")
ridge_reg.fit(X, y)
ridge_reg.predict([[1.5]])

array([[8.38395043]])

### Using SGD Regressor Model

In [7]:
#Using SGD Regressor with penalty value of l2 i.e. we need to add regularization term equal to half of l2 norm (a.k.a. Ridge Regression)
sgd_reg = SGDRegressor(penalty="l2")
sgd_reg.fit(X, y.ravel())
sgd_reg.predict([[1.5]])

array([8.51414351])

# Lasso Regression
Uses l1 norm for regularization

### Using Scikit Lasso class

In [9]:
#Using the built-in Lasso Regressor Model
lasso_reg = Lasso(alpha=0.1)
lasso_reg.fit(X,y)
lasso_reg.predict([[1.5]])

array([8.28456303])

### Using SGD Regressor

In [12]:
#Using the SGD Regressor with l1 penalty
sgd_reg = SGDRegressor(penalty="l1")
sgd_reg.fit(X,y.ravel())
sgd_reg.predict([[1.5]])

array([8.51160654])

# Elastic Net 
Uses a combination of both l1 and l2 norm for regularization

In [14]:
#Using scikit's Elastic Net Model
elastic_reg = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic_reg.fit(X,y)
elastic_reg.predict([[1.5]])

array([8.19926824])

# Early Stopping

In [27]:
#Using early stopping on SGD Regressor
sgd_reg = SGDRegressor(n_iter_no_change=1, warm_start=True, penalty=None, learning_rate="constant", eta0=0.0005)

#Transforming given features to scaled polynomial
pipeline = Pipeline((
    ("poly_features:", PolynomialFeatures(degree=2, include_bias=False)),
    ("std. scalar:", StandardScaler())
))

X_poly_scaled = pipeline.fit_transform(X, y)

#Defining variables to be used for early stopping of SGD Regressor
X_train_poly_scaled, X_val_poly_scaled, y_train, y_val = train_test_split(X_poly_scaled, y, test_size=0.2)
min_val_error = float("inf")
best_epoch = None
best_model = None

#Running the early stopping method
for epoch in range(1000):
  sgd_reg.fit(X_train_poly_scaled, y_train.ravel())
  y_val_predict = sgd_reg.predict(X_val_poly_scaled)
  val_error = mean_squared_error(y_val_predict, y_val)
  if val_error < min_val_error:
    min_val_error = val_error
    best_epoch = epoch
    best_model = clone(sgd_reg)

In [31]:
#Viewing the results of early stopping
best_model.fit(X,y.ravel())
best_model.predict([[1.5]])

array([8.57984548])