Chapter 13. Linear Regression

13.1 Fitting a Line

In [1]:
# Load libraries
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
# Generate features matrix, target vector
features, target = make_regression(n_samples = 100,
n_features = 3,
n_informative = 2,
n_targets = 1,
noise = 0.2,
coef = False,
random_state = 1)
# Create linear regression
regression = LinearRegression()
# Fit the linear regression
model = regression.fit(features, target)

In [3]:
# View the intercept
model.intercept_


np.float64(-0.00965011817881578)

# View the feature coefficients
model.coef_


In [4]:
# First value in the target vector
target[0]

np.float64(-20.870747595269407)

In [5]:
# Predict the target value of the first observation
model.predict(features)[0]

np.float64(-20.86192770929682)

13.2 Handling Interactive Effects

In [6]:
# Load libraries
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.datasets import make_regression
# Generate features matrix, target vector
features, target = make_regression(n_samples = 100,
n_features = 2,
n_informative = 2,
n_targets = 1,
noise = 0.2,
coef = False,
random_state = 1)
# Create interaction term
interaction = PolynomialFeatures(
degree=3, include_bias=False, interaction_only=True)
features_interaction = interaction.fit_transform(features)
# Create linear regression
regression = LinearRegression()
# Fit the linear regression
model = regression.fit(features_interaction, target)


In [7]:
# View the feature values for first observation
features[0]


array([0.0465673 , 0.80186103])

In [8]:
# Import library
import numpy as np
# For each observation, multiply the values of the first and second feature
interaction_term = np.multiply(features[:, 0], features[:, 1])


In [9]:
# View interaction term for first observation
interaction_term[0]

np.float64(0.037340501965846186)

13.3 Fitting a Nonlinear Relationship

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.datasets import make_regression
# Generate features matrix, target vector
features, target = make_regression(n_samples = 100,
n_features = 3,
n_informative = 2,
n_targets = 1,
noise = 0.2,
coef = False,
random_state = 1)
# Create polynomial features x^2 and x^3
polynomial = PolynomialFeatures(degree=3, include_bias=False)
features_polynomial = polynomial.fit_transform(features)
# Create linear regression
regression = LinearRegression()
# Fit the linear regression
model = regression.fit(features_polynomial, target)

In [11]:
# View first observation
features[0]

array([ 0.58591043,  0.78477065, -0.95542526])

In [12]:
# View first observation raised to the second power, x^2
features[0]**2


array([0.34329103, 0.61586497, 0.91283743])

In [13]:
# View first observation raised to the third power, x^3
features[0]**3

array([ 0.2011378 ,  0.48331276, -0.87214794])

In [14]:
# View the first observation's values for x, x^2, and x^3
features_polynomial[0]


array([ 0.58591043,  0.78477065, -0.95542526,  0.34329103,  0.45980531,
       -0.55979363,  0.61586497, -0.74978971,  0.91283743,  0.2011378 ,
        0.26940473, -0.32798893,  0.36084171, -0.43930961,  0.53484097,
        0.48331276, -0.58841296,  0.71636803, -0.87214794])

13.4 Reducing Variance with Regularization

In [15]:
# Load libraries
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression
# Generate features matrix, target vector
features, target = make_regression(n_samples = 100,
n_features = 3,
n_informative = 2,
n_targets = 1,
noise = 0.2,
coef = False,
random_state = 1)
# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
# Create ridge regression with an alpha value
regression = Ridge(alpha=0.5)
# Fit the linear regression

model = regression.fit(features_standardized, target)

In [16]:
# Load library
from sklearn.linear_model import RidgeCV
# Create ridge regression with three alpha values
regr_cv = RidgeCV(alphas=[0.1, 1.0, 10.0])
# Fit the linear regression
model_cv = regr_cv.fit(features_standardized, target)
# View coefficients

model_cv.coef_

array([1.29223201e-02, 4.40972291e+01, 5.38979372e+01])

In [17]:
# View alpha

model_cv.alpha_

np.float64(0.1)

13.5 Reducing Features with Lasso Regression

In [18]:
# Load library
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression
# Generate features matrix, target vector
features, target = make_regression(n_samples = 100,
n_features = 3,
n_informative = 2,
n_targets = 1,
noise = 0.2,
coef = False,
random_state = 1)
# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
# Create lasso regression with alpha value
regression = Lasso(alpha=0.5)
# Fit the linear regression
model = regression.fit(features_standardized, target)

In [19]:
# View coefficients
model.coef_


array([-0.        , 43.58618393, 53.39523724])

In [20]:
# Create lasso regression with a high alpha
regression_a10 = Lasso(alpha=10)
model_a10 = regression_a10.fit(features_standardized, target)
model_a10.coef_


array([-0.        , 32.92181899, 42.73086731])