In [1]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression

In [2]:
bean = datasets.load_boston()
print bean.DESCR

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [3]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [4]:
X_train, X_test, y_train, y_test = load_boston()

In [5]:
X_train.shape

(379L, 13L)

### Linear Regression

In [10]:
lireg= LinearRegression()
lireg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [11]:
zip (y_test, lireg.predict(X_test))

[(18.0, 18.329981276907915),
 (19.600000000000001, 20.936854968900182),
 (13.300000000000001, 16.135454796628785),
 (39.799999999999997, 34.731102017054319),
 (50.0, 23.926489074966391),
 (22.399999999999999, 23.26389703822359),
 (22.0, 27.315945487804711),
 (37.299999999999997, 33.594062633344407),
 (43.100000000000001, 36.583109949574762),
 (18.899999999999999, 14.802541562815026),
 (35.200000000000003, 36.086433741271435),
 (34.700000000000003, 30.404464583092373),
 (23.699999999999999, 28.46285023748495),
 (21.699999999999999, 22.369542786256979),
 (23.699999999999999, 27.439357526187333),
 (20.0, 22.988345914257621),
 (26.600000000000001, 27.891089859687035),
 (36.399999999999999, 32.433013266604192),
 (22.600000000000001, 24.588327542762805),
 (24.5, 20.732901775130966),
 (24.699999999999999, 24.008290758616599),
 (37.0, 30.463068929024825),
 (35.399999999999999, 30.383411874607358),
 (37.200000000000003, 32.97477271491379),
 (42.799999999999997, 29.775185723290935),
 (50.0, 42.3

In [12]:
y_LiPrd=lireg.predict(X_test)

In [16]:
mse = mean_squared_error(y_test,y_LiPrd)
mse

20.143431322124787

In [17]:
regscr = r2_score(y_test,y_LiPrd)
regscr

0.75130358621343718

### MSE and R^2 after linear prediction : 
##### MSE = 20.143431322124787 
##### R^2 = 0.75130358621343718

In [19]:
from sklearn.linear_model import Lasso

In [49]:
alpha = 0.04
lasso = Lasso(alpha=alpha)

In [50]:
lasso.fit(X_train,y_train)

Lasso(alpha=0.04, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [51]:
zip(y_test,lasso.predict(X_test))

[(18.0, 18.742120988473445),
 (19.600000000000001, 20.824155709126142),
 (13.300000000000001, 15.901062089986929),
 (39.799999999999997, 34.536770585634102),
 (50.0, 23.679984029203133),
 (22.399999999999999, 23.42140605490804),
 (22.0, 27.250969712754475),
 (37.299999999999997, 33.040851571485057),
 (43.100000000000001, 36.327771028676409),
 (18.899999999999999, 15.30784645645922),
 (35.200000000000003, 35.904326091753418),
 (34.700000000000003, 30.578866540835961),
 (23.699999999999999, 28.267861598139199),
 (21.699999999999999, 22.357414382673664),
 (23.699999999999999, 27.330790186684411),
 (20.0, 23.186760888593824),
 (26.600000000000001, 27.883457118562859),
 (36.399999999999999, 32.617230049417643),
 (22.600000000000001, 25.032680890128496),
 (24.5, 20.737563972345843),
 (24.699999999999999, 24.246669769453447),
 (37.0, 30.613645050940956),
 (35.399999999999999, 30.097837913131563),
 (37.200000000000003, 32.668463752685533),
 (42.799999999999997, 30.232488018747276),
 (50.0, 41.

In [52]:
y_lasoPrd=lasso.predict(X_test)

In [53]:
mselaso = mean_squared_error(y_test,y_lasoPrd)
mselaso

20.13833258666488

In [54]:
r2lasoscr=r2_score(y_test,y_lasoPrd)
r2lasoscr

0.75136653662160979

### MSE and R^2 after  lasso linear prediction :
##### MSE = 20.13833258666488
##### R^2 = 0.75136653662160979