# Ridge Regression

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

In [2]:
_df = pd.read_csv('https://raw.githubusercontent.com/PacktWorkshops/The-Data-Science-Workshop/master/Chapter07/Dataset/ccpp.csv')

In [3]:
_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9568 entries, 0 to 9567
Data columns (total 5 columns):
AT    9568 non-null float64
V     9568 non-null float64
AP    9568 non-null float64
RH    9568 non-null float64
PE    9568 non-null float64
dtypes: float64(5)
memory usage: 373.9 KB


In [4]:
X = _df.drop(['PE'], axis=1).values
y = _df['PE'].values

In [5]:
train_X, eval_X, train_y, eval_y = train_test_split(X, y, train_size=0.8, random_state=0)

# Implement a LinearRegression model

In [6]:
lr_model_1 = LinearRegression()

In [7]:
lr_model_1.fit(train_X, train_y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [8]:
lr_model_1_preds = lr_model_1.predict(eval_X)

In [9]:
print('lr_model_1 R2 Score: {}'.format(lr_model_1.score(eval_X, eval_y)))

lr_model_1 R2 Score: 0.9325315554761303


In [10]:
print('lr_model_1 MSE: {}'.format(mean_squared_error(eval_y, lr_model_1_preds)))

lr_model_1 MSE: 19.733699303497637


# Engineer cubic features

In [11]:
steps = [
    ('scaler', MinMaxScaler()),
    ('poly', PolynomialFeatures(degree=3)),
    ('lr', LinearRegression())
]

In [12]:
lr_model_2 = Pipeline(steps)

In [13]:
lr_model_2.fit(train_X, train_y)

Pipeline(memory=None,
         steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('poly',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=False, order='C')),
                ('lr',
                 LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
                                  normalize=False))],
         verbose=False)

In [14]:
print('lr_model_2 R2 Score: {}'.format(lr_model_2.score(eval_X, eval_y)))

lr_model_2 R2 Score: 0.9443678654045208


In [15]:
lr_model_2_preds = lr_model_2.predict(eval_X)

In [16]:
print('lr_model_2 MSE: {}'.format(mean_squared_error(eval_y, lr_model_2_preds)))

lr_model_2 MSE: 16.27172263220766


In [17]:
print(lr_model_2[-1].coef_)

[ 7.72661789e-14 -1.77278028e+02 -4.60337188e+01 -1.60520675e+02
 -1.23076123e+02  6.23358210e+00  8.19655844e+00  1.45478576e+02
  1.88658651e+02  2.43740192e+01  1.80553150e+02 -1.08058561e+02
  1.09713294e+02  1.79121906e+02  1.06460596e+02  2.67290613e+01
  7.79833654e+01  3.69241324e+01 -1.13863997e+02 -1.42673215e+02
 -9.69606773e+01  1.90706809e+02 -5.56429546e+01 -1.32595225e+02
 -9.41682917e+01  9.40112729e+01 -1.18732510e+02 -7.64871610e+01
 -4.18714081e+01  6.36772260e+01  4.42340977e+01 -3.81114691e+01
 -4.71547759e+01 -9.16797074e+01 -2.52346805e+01]


In [18]:
print(len(lr_model_2[-1].coef_))

35


# Engineer polynomial features

In [19]:
steps = [
    ('scaler', MinMaxScaler()),
    ('poly', PolynomialFeatures(degree=10)),
    ('lr', LinearRegression())
]

In [20]:
lr_model_3 = Pipeline(steps)

In [21]:
lr_model_3.fit(train_X, train_y)

Pipeline(memory=None,
         steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('poly',
                 PolynomialFeatures(degree=10, include_bias=True,
                                    interaction_only=False, order='C')),
                ('lr',
                 LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
                                  normalize=False))],
         verbose=False)

In [22]:
print('lr_model_3 R2 Score: {}'.format(lr_model_3.score(eval_X, eval_y)))

lr_model_3 R2 Score: 0.5683441691963667


In [23]:
lr_model_3_preds = lr_model_3.predict(eval_X)

In [24]:
print('lr_model_3 MSE: {}'.format(mean_squared_error(eval_y, lr_model_3_preds)))

lr_model_3 MSE: 126.25407963372724


In [25]:
print(len(lr_model_3[-1].coef_))

1001


In [26]:
print(lr_model_3[-1].coef_[:35])

[ 3.92493073e+05 -6.90880756e+07 -4.12730736e+07  2.27928296e+07
 -4.76786535e+07  2.96660772e+08  2.73269211e+08  1.07842470e+08
  3.73716530e+08  8.79698224e+07 -2.35343276e+07  2.46253110e+08
 -2.61104544e+08  1.86081203e+07  1.41130624e+08 -6.53879247e+08
 -8.90635637e+08 -1.06073301e+09 -1.29263356e+09 -4.28438465e+08
  5.31481876e+07 -1.30409467e+09  4.41030510e+08 -8.86215800e+08
 -8.78154429e+08 -1.97221576e+06 -5.39374275e+08 -3.68353450e+08
  9.82103996e+08 -2.76727773e+08 -6.28826853e+08  8.14255868e+08
  5.43206968e+08 -2.03042423e+08 -2.42927974e+08]


# Implement Ridge on the same pipeline

In [27]:
steps = [
    ('scaler', MinMaxScaler()),
    ('poly', PolynomialFeatures(degree=10)),
    ('lr', Ridge(alpha=0.9))
]

In [28]:
ridge_model = Pipeline(steps)

In [29]:
ridge_model.fit(train_X, train_y)

Pipeline(memory=None,
         steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('poly',
                 PolynomialFeatures(degree=10, include_bias=True,
                                    interaction_only=False, order='C')),
                ('lr',
                 Ridge(alpha=0.9, copy_X=True, fit_intercept=True,
                       max_iter=None, normalize=False, random_state=None,
                       solver='auto', tol=0.001))],
         verbose=False)

In [30]:
print('ridge_model R2 Score: {}'.format(ridge_model.score(eval_X, eval_y)))

ridge_model R2 Score: 0.9451949082623442


In [31]:
ridge_model_preds = ridge_model.predict(eval_X)

In [32]:
print('ridge_model MSE: {}'.format(mean_squared_error(eval_y, ridge_model_preds)))

ridge_model MSE: 16.029822656855167


In [33]:
print(len(ridge_model[-1].coef_))

1001


In [34]:
print(ridge_model[-1].coef_[:35])

[  0.         -39.79803902  -7.77413135   6.07694837   3.10326786
 -18.17945028  -9.45440071  -7.4037462  -16.97192766  -9.10799691
   6.96959155  -1.55574911   4.49242992   0.31127893   5.27565009
  -4.07568831  -0.95958324   2.38995687  -6.1583696   -2.05510604
   2.3741985   -1.30281151  -1.7837005   -4.53024264  -8.30749466
  -3.42801698   0.65288784  -2.74767783   5.47711767   4.68241474
  -2.1214614   -0.47331885   0.43221968  -0.28909998   4.64549348]
