# Generalized Linear Model
---

## Import Data

In [9]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
boston = load_boston()

### Features in pandas data frame format

In [25]:
X = pd.DataFrame(boston.data)
X.columns = boston.feature_names
X.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


### Target in pandas data frame format

In [26]:
y = pd.DataFrame(boston.target)
y.columns = ['MEDV']
y.head()

Unnamed: 0,MEDV
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


## Preprocessing data
### Check missing values in features

In [33]:
X.isnull().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
dtype: int64

### Check missing values in target

In [35]:
y.isnull().sum()

MEDV    0
dtype: int64

## Split data into train and test datasets

In [40]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [46]:
print('X train: ', X_train.shape)
print('X test: ', X_test.shape)

X train:  (354, 13)
X test:  (152, 13)


In [47]:
print('y train: ', y_train.shape)
print('y test: ', y_test.shape)

y train:  (354, 1)
y test:  (152, 1)


---
## Ordinary least squares
---

### Estimator: linear regression

In [67]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

### Fit model

In [68]:
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### Prediction

#### prediction and evaluation on train data

In [71]:
pred_train = lr.predict(X_train)

In [72]:
lr.score(X_train, y_train)

0.76811739938128887

#### prediction and evaluation on test data

In [73]:
pred_test = lr.predict(X_test)

In [74]:
lr.score(X_test, y_test)

0.62702475598448937

### Intercept and coefficients

In [75]:
lr.intercept_

array([ 23.85745439])

In [79]:
coef_df = pd.DataFrame(lr.coef_)
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.069525,0.048099,0.062378,3.66253,-17.611557,5.120632,0.001922,-1.326897,0.296098,-0.011203,-0.914047,0.012374,-0.464937


---
## Ridge Regression
---

### Estimator: ridge regression

In [83]:
from sklearn.linear_model import Ridge
rr = Ridge(alpha=1.0)

### Fit model

In [84]:
rr.fit(X_train, y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

### Prediction

#### Prediction and evaluation on train data

In [85]:
pred_train = rr.predict(X_train)
rr.score(X_train, y_train)

0.76563429772514691

#### Prediction and evaluation on test data

In [86]:
pred_test = rr.predict(X_test)
rr.score(X_test, y_test)

0.62545483938198398

#### Intercept and coefficients

In [87]:
rr.intercept_

array([ 18.24700074])

In [89]:
coef_df = pd.DataFrame(rr.coef_)
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.064132,0.047755,0.032516,3.330625,-8.699016,5.14786,-0.006258,-1.18736,0.274698,-0.011829,-0.843291,0.013031,-0.480204


---
## Lasso Regression
---

### Estimator: lasso regression

In [91]:
from sklearn.linear_model import Lasso
lar = Lasso(alpha=1.0)

### Fit model

In [92]:
lar.fit(X_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

### Prediction

#### Prediction and evaluation on train data

In [93]:
pred_train = lar.predict(X_train)
lar.score(X_train, y_train)

0.70938202305180686

#### Prediction and evaluation on test data

In [94]:
pred_test = lar.predict(X_test)
lar.score(X_test, y_test)

0.65976712807891169

### Intercept and coefficients

In [111]:
lar.intercept_

array([ 35.04089598])

In [112]:
coef_df = pd.DataFrame([lar.coef_])
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.019067,0.0455,-0.0,0.0,-0.0,1.813864,0.025582,-0.647293,0.244923,-0.012676,-0.818371,0.010497,-0.741411


---
## Elastic Net Regression
---

### Estimator: elastic net regression

In [114]:
from sklearn.linear_model import ElasticNet
enr = ElasticNet(alpha=1.0, l1_ratio=0.5)

### Fit model

In [115]:
enr.fit(X_train, y_train)

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

### Prediction
#### Prediction and evaluation on train data

In [117]:
pred_train = enr.predict(X_train)
enr.score(X_train, y_train)

0.70103480302101495

#### Prediction and evaluation on test data

In [118]:
pred_test = enr.predict(X_test)
enr.score(X_test, y_test)

0.66293177060042896

### Intercept and coefficients

In [119]:
enr.intercept_

array([ 40.67281305])

In [122]:
coef_df = pd.DataFrame([enr.coef_])
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.05043,0.054588,-0.0,0.0,-0.0,1.213873,0.028948,-0.759284,0.303975,-0.01503,-0.8558,0.010148,-0.776525


---
## Least Angle Regression
---

Least-angle regression (LARS) is a regression algorithm for high-dimensional data (p >> n). The algorithm is based upon an iterative refitting of the residuals, therefore, it is sensitive to the effects of noise.

Here we use the same dataset and see how the LARS algorithm performs on none high-dimensional data.

### Estimator: least angle regression

In [124]:
from sklearn.linear_model import Lars
lars = Lars()

### Fit model

In [125]:
lars.fit(X_train, y_train)

Lars(copy_X=True, eps=2.2204460492503131e-16, fit_intercept=True,
   fit_path=True, n_nonzero_coefs=500, normalize=True, positive=False,
   precompute='auto', verbose=False)

### Prediction
#### Prediction and evaluation on train data

In [129]:
pred_train = lars.predict(X_train)
lars.score(X_train, y_train)

0.76811739938128887

#### Prediction and evaluation on test data

In [130]:
pred_test = lars.predict(X_test)
lars.score(X_test, y_test)

0.62702475598448881

From the evaluation results, it seems the LARS algorithm is more highly overfitted than the previous methods on the same dataset.

### Intercept and coefficients

In [131]:
lars.intercept_

array([ 23.85745439])

In [133]:
coef_df = pd.DataFrame([lars.coef_])
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.069525,0.048099,0.062378,3.66253,-17.611557,5.120632,0.001922,-1.326897,0.296098,-0.011203,-0.914047,0.012374,-0.464937
