# Generalized Linear Model
---

## Import Data

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
boston = load_boston()

### Features in pandas data frame format

In [2]:
X = pd.DataFrame(boston.data)
X.columns = boston.feature_names
X.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


### Target in pandas data frame format

In [3]:
y = pd.DataFrame(boston.target)
y.columns = ['MEDV']
y.head()

Unnamed: 0,MEDV
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


## Preprocessing data
### Check missing values in features

In [4]:
X.isnull().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
dtype: int64

### Check missing values in target

In [5]:
y.isnull().sum()

MEDV    0
dtype: int64

## Split data into train and test datasets

In [46]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [7]:
print('X train: ', X_train.shape)
print('X test: ', X_test.shape)

X train:  (354, 13)
X test:  (152, 13)


In [8]:
print('y train: ', y_train.shape)
print('y test: ', y_test.shape)

y train:  (354, 1)
y test:  (152, 1)


---
## Ordinary least squares
---

### Estimator: linear regression

In [9]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

### Fit model

In [10]:
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### Prediction

#### prediction and evaluation on train data

In [11]:
pred_train = lr.predict(X_train)

In [12]:
lr.score(X_train, y_train)

0.76197804044666506

#### prediction and evaluation on test data

In [13]:
pred_test = lr.predict(X_test)

In [14]:
lr.score(X_test, y_test)

0.678273741437321

### Intercept and coefficients

In [15]:
lr.intercept_

array([ 35.21162512])

In [16]:
coef_df = pd.DataFrame(lr.coef_)
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.138468,0.03889,0.011154,1.85819,-16.28625,4.056239,0.008048,-1.353007,0.336154,-0.014506,-1.022434,0.009879,-0.533994


---
## Ridge Regression
---

### Estimator: ridge regression

In [17]:
from sklearn.linear_model import Ridge
rr = Ridge(alpha=1.0)

### Fit model

In [18]:
rr.fit(X_train, y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

### Prediction

#### Prediction and evaluation on train data

In [19]:
pred_train = rr.predict(X_train)
rr.score(X_train, y_train)

0.75976778494604369

#### Prediction and evaluation on test data

In [20]:
pred_test = rr.predict(X_test)
rr.score(X_test, y_test)

0.67255811695763934

#### Intercept and coefficients

In [21]:
rr.intercept_

array([ 29.78569877])

In [22]:
coef_df = pd.DataFrame(rr.coef_)
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.133954,0.039671,-0.023611,1.71651,-8.688249,4.121326,0.00173,-1.239707,0.327812,-0.01565,-0.9323,0.010283,-0.542773


---
## Lasso Regression
---

### Estimator: lasso regression

In [23]:
from sklearn.linear_model import Lasso
lar = Lasso(alpha=1.0)

### Fit model

In [24]:
lar.fit(X_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

### Prediction

#### Prediction and evaluation on train data

In [25]:
pred_train = lar.predict(X_train)
lar.score(X_train, y_train)

0.707795335174217

#### Prediction and evaluation on test data

In [26]:
pred_test = lar.predict(X_test)
lar.score(X_test, y_test)

0.64256957343317889

### Intercept and coefficients

In [27]:
lar.intercept_

array([ 39.79763004])

In [28]:
coef_df = pd.DataFrame([lar.coef_])
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.080157,0.035737,-0.0,0.0,-0.0,1.33755,0.026113,-0.541623,0.306162,-0.018071,-0.786297,0.008335,-0.758239


---
## Elastic Net Regression
---

### Estimator: elastic net regression

In [29]:
from sklearn.linear_model import ElasticNet
enr = ElasticNet(alpha=1.0, l1_ratio=0.5)

### Fit model

In [30]:
enr.fit(X_train, y_train)

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

### Prediction
#### Prediction and evaluation on train data

In [31]:
pred_train = enr.predict(X_train)
enr.score(X_train, y_train)

0.70708417386644151

#### Prediction and evaluation on test data

In [32]:
pred_test = enr.predict(X_test)
enr.score(X_test, y_test)

0.64159111007151248

### Intercept and coefficients

In [33]:
enr.intercept_

array([ 42.97580169])

In [34]:
coef_df = pd.DataFrame([enr.coef_])
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.099571,0.042531,-0.028024,0.0,-0.0,1.07017,0.027552,-0.684826,0.347969,-0.019365,-0.809658,0.008305,-0.771544


---
## Least Angle Regression
---

Least-angle regression (LARS) is a regression algorithm for high-dimensional data (p >> n). The algorithm is based upon an iterative refitting of the residuals, therefore, it is sensitive to the effects of noise.

Here we use the same dataset and see how the LARS algorithm performs on none high-dimensional data.

### Estimator: least angle regression

In [35]:
from sklearn.linear_model import Lars
lars = Lars()

### Fit model

In [36]:
lars.fit(X_train, y_train)

Lars(copy_X=True, eps=2.2204460492503131e-16, fit_intercept=True,
   fit_path=True, n_nonzero_coefs=500, normalize=True, positive=False,
   precompute='auto', verbose=False)

### Prediction
#### Prediction and evaluation on train data

In [37]:
pred_train = lars.predict(X_train)
lars.score(X_train, y_train)

0.73153809742507492

#### Prediction and evaluation on test data

In [38]:
pred_test = lars.predict(X_test)
lars.score(X_test, y_test)

0.65218297843490258

From the evaluation results, it seems the LARS algorithm is more highly overfitted than the previous methods on the same dataset.

### Intercept and coefficients

In [39]:
lars.intercept_

array([ 32.29924919])

In [40]:
coef_df = pd.DataFrame([lars.coef_])
coef_df.columns = X_train.columns
coef_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.142535,0.019911,-0.175498,2.656075,-20.366855,4.233743,0.004939,-1.476279,-0.093686,0.015742,-1.099128,0.010383,-0.528913


---
## LARS Lasso
---

**LARS Lasso** is a lasso model implemented in the LARS algorithm.

### Estimator: LARS Lasso

In [41]:
from sklearn.linear_model import LassoLars
ll = LassoLars(alpha=1.0)

### Fit model

In [42]:
ll.fit(X_train, y_train)

LassoLars(alpha=1.0, copy_X=True, eps=2.2204460492503131e-16,
     fit_intercept=True, fit_path=True, max_iter=500, normalize=True,
     positive=False, precompute='auto', verbose=False)

### Prediction
#### Prediction and evaluation on train data

In [43]:
pred_train = ll.predict(X_train)
ll.score(X_train, y_train)

0.0

In [44]:
pred_test = ll.predict(X_test)
ll.score(X_test, y_test)

-1.4316551885995564e-05

The evaluation score is 0!!! After checking the predicted values, we found that the model has the same predicted values for all samples.

In [45]:
obs_pred = pd.DataFrame({
    'y': y_train.MEDV,
    'prediction': pred_train
})
obs_pred.head(10)

Unnamed: 0,prediction,y
377,22.54322,13.3
64,22.54322,33.0
144,22.54322,11.8
116,22.54322,21.2
310,22.54322,16.1
295,22.54322,28.6
150,22.54322,21.5
15,22.54322,19.9
175,22.54322,29.4
475,22.54322,13.3


---
## Orthogonal Matching Pursuit
---

**`Orthogonal Matching Pursuit`** is a forward feature selection method which includes the atom most highly correlated with the current residual at each step.

### Estimator: orthogonal matching pursuit

In [63]:
from sklearn.linear_model import OrthogonalMatchingPursuit
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=12)

### Fit the model

In [64]:
omp.fit(X_train, y_train)

OrthogonalMatchingPursuit(fit_intercept=True, n_nonzero_coefs=12,
             normalize=True, precompute='auto', tol=None)

### Prediction
#### Prediction and evaluation on train data

In [65]:
pred_train = omp.predict(X_train)
omp.score(X_train, y_train)

0.74818851611479065

#### Prediction and evaluation on test data

In [66]:
pred_test = omp.predict(X_test)
omp.score(X_test, y_test)

0.7064394787346705

### Intercept and coefficients

In [67]:
omp.intercept_

array([ 33.32895215])

In [69]:
pd.DataFrame([omp.coef_])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,-0.118527,0.061092,-0.014467,4.565906,-18.29068,3.905691,0.0,-1.49358,0.347498,-0.014225,-0.855225,0.011644,-0.467656
