# Chapter 21 - Regularization for Linear Models
## Building Machine Learning and Deep Learning Models on Google Cloud Platform
### Ekaba Bisong

## Linear Regression with Regularization

In [0]:
# import packages
from sklearn.linear_model import Ridge
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.preprocessing import PolynomialFeatures

In [0]:
# load dataset
data = datasets.load_boston()

In [0]:
# separate features and target
X = data.data
y = data.target

In [0]:
# create polynomial features
polynomial_features = PolynomialFeatures(2)
X_higher_order = polynomial_features.fit_transform(X)

In [0]:
# split in train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_higher_order, y, shuffle=True)

In [0]:
# create the model. The parameter alpha represent the regularization magnitude
linear_reg = Ridge(alpha=1.0)

In [7]:
# fit the model on the training set
linear_reg.fit(X_train, y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [0]:
# make predictions on the test set
predictions = linear_reg.predict(X_test)

In [9]:
# evaluate the model performance using the root mean square error metric
print("Root mean squared error (RMSE): %.2f" % sqrt(mean_squared_error(y_test, predictions)))

Root mean squared error (RMSE): 3.55


## Logistic Regression with Regularization

In [0]:
# import packages
from sklearn.linear_model import RidgeClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [0]:
# load dataset
data = datasets.load_iris()

In [0]:
# separate features and target
X = data.data
y = data.target

In [0]:
# split in train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True)

In [0]:
# create the logistic regression model
logistic_reg = RidgeClassifier()

In [15]:
# fit the model on the training set
logistic_reg.fit(X_train, y_train)

RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,
                max_iter=None, normalize=False, random_state=None,
                solver='auto', tol=0.001)

In [0]:
# make predictions on the test set
predictions = logistic_reg.predict(X_test)

In [17]:
# evaluate the model performance using accuracy metric
print("Accuracy: %.2f" % accuracy_score(y_test, predictions))

Accuracy: 0.82
