In [10]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso

In [2]:
bean = datasets.load_boston()
print bean.DESCR

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [3]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [4]:
X_train, X_test, y_train, y_test = load_boston()

In [5]:
X_train.shape

(379L, 13L)

In [6]:
clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [7]:
zip (y_test, clf.predict(X_test))

[(16.199999999999999, 20.816279031543992),
 (19.5, 18.436016873653969),
 (44.799999999999997, 37.909562567758385),
 (15.699999999999999, 17.098740555902932),
 (20.399999999999999, 20.4153330466164),
 (24.800000000000001, 25.790603528319568),
 (13.800000000000001, 12.952765661616015),
 (20.399999999999999, 18.991892305100212),
 (21.800000000000001, 20.638158112877207),
 (12.699999999999999, 12.596812935219381),
 (8.6999999999999993, 9.1803582265218111),
 (30.699999999999999, 32.041796521882858),
 (10.199999999999999, 7.3772835255839961),
 (12.0, 12.2790806942171),
 (24.0, 25.707196021629187),
 (23.0, 23.184180332305811),
 (24.699999999999999, 24.613658565769942),
 (18.199999999999999, 13.062101562060029),
 (6.2999999999999998, 11.321366423832698),
 (35.399999999999999, 30.816143640578655),
 (22.399999999999999, 21.912374140472863),
 (19.600000000000001, 17.50337595458533),
 (21.100000000000001, 22.959652083241991),
 (33.0, 22.704308308962229),
 (20.100000000000001, 14.362812145287451),


In [8]:
r2_score(y_train,clf.predict(X_train))

0.73485950564836

In [9]:
mean_squared_error(y_train,clf.predict(X_train))

21.714523316328375

In [12]:
la = Lasso(alpha=.01)
la.fit(X_train, y_train)

Lasso(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [13]:
r2_score(y_test,la.predict(X_test))

0.73630107154008928

In [14]:
la = Lasso(alpha=.05)
la.fit(X_train, y_train)

Lasso(alpha=0.05, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [15]:
r2_score(y_test,la.predict(X_test))

0.74218871169449341

In [16]:
la = Lasso(alpha=.1)
la.fit(X_train, y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [17]:
r2_score(y_test,la.predict(X_test))

0.74071210265524701

In [23]:
la = Lasso(alpha=.5)
la.fit(X_train, y_train)

Lasso(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [24]:
r2_score(y_test,la.predict(X_test))

0.67965227010747453

In [20]:
la = Lasso(alpha=1.0)
la.fit(X_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [21]:
r2_score(y_test,la.predict(X_test))

0.65686237540021064

r2_score is larger with alpha=.05 so selecting alpha as .05

In [25]:
la = Lasso(alpha=.05)
la.fit(X_train, y_train)

Lasso(alpha=0.05, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [26]:
r2_score(y_test,la.predict(X_test))

0.74218871169449341

In [28]:
mean_squared_error(y_test,la.predict(X_test))

23.700627378985235