## Regularization Techniques : Ridge Regression

In [1]:
import warnings
warnings.filterwarnings('ignore')

### Load the crime dataset

In [2]:
import pandas as pd
url = './Datasets/communities.data.txt'
crime = pd.read_csv(url, header=None, na_values=['?'])
crime.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,8,,,Lakewoodcity,1,0.19,0.33,0.02,0.9,0.12,...,0.12,0.26,0.2,0.06,0.04,0.9,0.5,0.32,0.14,0.2
1,53,,,Tukwilacity,1,0.0,0.16,0.12,0.74,0.45,...,0.02,0.12,0.45,,,,,0.0,,0.67
2,24,,,Aberdeentown,1,0.0,0.42,0.49,0.56,0.17,...,0.01,0.21,0.02,,,,,0.0,,0.43
3,34,5.0,81440.0,Willingborotownship,1,0.04,0.77,1.0,0.08,0.12,...,0.02,0.39,0.28,,,,,0.0,,0.12
4,42,95.0,6096.0,Bethlehemtownship,1,0.01,0.55,0.02,0.95,0.09,...,0.04,0.09,0.02,,,,,0.0,,0.03


### Prepare the crime dataset

In [3]:
# remove categorical features
crime.drop([0, 1, 2, 3, 4], axis=1, inplace=True)

# remove rows with any missing values
crime.dropna(inplace=True)

# check the shape
crime.shape

(319, 123)

In [4]:
crime.head()

Unnamed: 0,5,6,7,8,9,10,11,12,13,14,...,118,119,120,121,122,123,124,125,126,127
0,0.19,0.33,0.02,0.9,0.12,0.17,0.34,0.47,0.29,0.32,...,0.12,0.26,0.2,0.06,0.04,0.9,0.5,0.32,0.14,0.2
16,0.15,0.31,0.4,0.63,0.14,0.06,0.58,0.72,0.65,0.47,...,0.06,0.39,0.84,0.06,0.06,0.91,0.5,0.88,0.26,0.49
20,0.25,0.54,0.05,0.71,0.48,0.3,0.42,0.48,0.28,0.32,...,0.09,0.46,0.05,0.09,0.05,0.88,0.5,0.76,0.13,0.34
21,1.0,0.42,0.47,0.59,0.12,0.05,0.41,0.53,0.34,0.33,...,1.0,0.07,0.15,1.0,0.35,0.73,0.0,0.31,0.21,0.69
23,0.11,0.43,0.04,0.89,0.09,0.06,0.45,0.48,0.31,0.46,...,0.16,0.12,0.07,0.04,0.01,0.81,1.0,0.56,0.09,0.63


In [5]:
# define X and y
X = crime.drop(127, axis=1)
y = crime[127]

### Split the dataset into training and testing  

In [6]:
# split into training and testing sets
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

### Lets set up a baseline for discussion : Linear regression

In [7]:
# build a linear regression model
from sklearn.linear_model import LinearRegression
linreg = LinearRegression(normalize=True)
linreg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)

In [8]:
# examine the coefficients
print (linreg.coef_)

[ -3.66188167e+00   6.98124465e-01  -2.61955467e-01  -2.85270027e-01
  -1.64740837e-01   2.46972333e-01  -1.09290051e+00  -5.96857796e-01
   1.11200239e+00  -7.21968931e-01   4.27346598e+00  -2.28040268e-01
   8.04875769e-01  -2.57934732e-01  -2.63458023e-01  -1.04616958e+00
   6.07784197e-01   7.73552561e-01   5.96468029e-02   6.90215922e-01
   2.16759430e-02  -4.87802949e-01  -5.18858404e-01   1.39478815e-01
  -1.24417942e-01   3.15003821e-01  -1.52633736e-01  -9.65003927e-01
   1.17142163e+00  -3.08546690e-02  -9.29085548e-01   1.24654586e-01
   1.98104506e-01   7.30804821e-01  -1.77337294e-01   8.32927588e-02
   3.46045601e-01   5.01837338e-01   1.57062958e+00  -4.13478807e-01
   1.39350802e+00  -3.49428114e+00   7.09577818e-01  -8.32141352e-01
  -1.39984927e+00   1.02482840e+00   2.13855006e-01  -6.18937325e-01
   5.28954490e-01   7.98294890e-02   5.93688560e-02  -1.68582667e-01
   7.31264051e-01  -1.39635208e+00   2.38507704e-01   5.50621439e-01
  -5.61447867e-01   6.18989764e-01

In [9]:
# make predictions
y_pred = linreg.predict(X_test)

In [10]:
# calculate RMSE
from sklearn import metrics
import numpy as np
print (np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.233813676495


# Ridge regression



### try alpha = 0  i.e.  equivalent to linear regression

In [11]:
from sklearn.linear_model import Ridge

In [12]:
ridgereg = Ridge(alpha=0,normalize=True)
ridgereg.fit(X_train, y_train)

Ridge(alpha=0, copy_X=True, fit_intercept=True, max_iter=None, normalize=True,
   random_state=None, solver='auto', tol=0.001)

In [13]:
y_pred = ridgereg.predict(X_test)
print (np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.233813676495


## try alpha=0.1

In [14]:
ridgereg = Ridge(alpha=0.1,normalize=True)
ridgereg.fit(X_train, y_train)

Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=True, random_state=None, solver='auto', tol=0.001)

In [15]:
y_pred = ridgereg.predict(X_test)
print (np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.164279068049


In [16]:
# examine the coefficients
print (ridgereg.coef_)

[ -4.00298418e-03   3.51647445e-02   6.03535935e-02  -7.68532502e-02
  -1.76099849e-02   4.53791433e-02   8.81586468e-03  -2.88885814e-02
  -1.92143587e-02   3.36122201e-02   5.71590736e-04  -4.85438136e-02
   5.55725157e-02  -1.15934270e-01  -1.11880845e-01  -3.32742094e-01
  -1.12302031e-02   9.63833243e-02  -8.92057732e-02   8.42691702e-02
  -1.67246717e-02   7.42520308e-03  -1.21294025e-01  -6.70155789e-02
  -1.74250249e-03   1.69446833e-01   3.18217654e-02  -1.00209834e-01
   3.97535644e-02  -1.19173054e-01  -1.04445267e-01  -5.14946676e-03
   1.10071013e-01  -3.22958955e-02  -1.40601627e-01   7.72658029e-02
   9.07962536e-02  -3.78878862e-03   4.61941793e-02   6.30299731e-02
  -3.09236932e-02   1.02883578e-02   9.70425568e-02  -1.28936944e-01
  -1.38268907e-01  -6.37169778e-02  -8.80160419e-02  -4.01991014e-02
   8.11064596e-02  -6.30663975e-02   1.29756859e-01  -6.25210624e-02
   1.60531213e-02  -1.39061824e-01   6.39822353e-02   4.87118744e-02
  -7.68217532e-03  -1.53523412e-03

### Play with higher values of alpha 

In [17]:
ridgereg = Ridge(alpha=10,normalize=True)
ridgereg.fit(X_train, y_train)

Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=True, random_state=None, solver='auto', tol=0.001)

In [18]:
y_pred = ridgereg.predict(X_test)
print (np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.169392116519


# How to find the best alpha 

- [RidgeCV](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeCV.html): ridge regression with built-in cross-validation of the alpha parameter
- **alphas:** array of alpha values to try

In [19]:
# create an array of alpha values
alpha_range = 10.**np.arange(-2, 3)
alpha_range

array([  1.00000000e-02,   1.00000000e-01,   1.00000000e+00,
         1.00000000e+01,   1.00000000e+02])

In [20]:
# select the best alpha with RidgeCV
from sklearn.linear_model import RidgeCV
ridgeregcv = RidgeCV(alphas=alpha_range,normalize=True, scoring='mean_squared_error')
ridgeregcv.fit(X_train, y_train)
ridgeregcv.alpha_

1.0

In [21]:
# predict method uses the best alpha value
y_pred = ridgeregcv.predict(X_test)
print (np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.163129782343


In [22]:
# examine the coefficients
print (ridgereg.coef_)

[ 0.0078057   0.00068067  0.02597071 -0.03081005  0.00151389  0.00321031
  0.01129573  0.00877348  0.00811787 -0.00197469  0.00800121  0.00529215
 -0.02499396 -0.01840776 -0.02774795 -0.03763862  0.00030172  0.02225054
 -0.00847119 -0.02348203 -0.01487199 -0.00191017 -0.02131734 -0.00775974
 -0.0067585   0.00542414 -0.00248941  0.01001449  0.02712893  0.00754837
  0.01406362 -0.00893479  0.02374387 -0.02181405 -0.01666394  0.01267545
  0.00685145 -0.01037955  0.03641986  0.02064673  0.04093183  0.03916969
  0.01229286 -0.03579926 -0.03616494 -0.02910301 -0.03647109  0.00097991
 -0.00569324  0.01558717  0.025181    0.00722778  0.00660626  0.00698107
  0.01288285  0.0150315   0.00156265  0.0021472   0.0029208   0.00326826
 -0.00120609  0.00357636  0.0181875   0.01440221 -0.00235926 -0.00183057
  0.00693026 -0.02280544  0.01368418  0.03099657 -0.00774271  0.00999295
 -0.01572185 -0.0219373   0.01704265  0.00745804 -0.00271318  0.02208461
  0.01633345 -0.00798824 -0.00605147 -0.00397849 -0