# Boston Housing Assignment

In [9]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge

In [10]:
bean = datasets.load_boston()
print bean.DESCR

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [11]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [13]:
X_train, X_test, y_train, y_test = load_boston()

In [14]:
X_train.shape

(379L, 13L)

In [6]:
Lreg= LinearRegression()
Lreg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [7]:
zip (y_test, Lreg.predict(X_test))

[(23.0, 24.093521954875296),
 (18.899999999999999, 18.05892606745341),
 (19.399999999999999, 23.97786045555506),
 (13.9, 16.861774630379486),
 (14.9, 17.532644528784097),
 (17.0, 22.562318661081065),
 (12.800000000000001, 13.052066515270697),
 (50.0, 35.276405331841787),
 (23.899999999999999, 26.854202754683662),
 (30.100000000000001, 28.866379825551704),
 (38.700000000000003, 33.994214569968499),
 (25.0, 24.931878677981622),
 (26.600000000000001, 22.464454522709197),
 (28.5, 33.776047020286171),
 (19.199999999999999, 23.355052337632113),
 (48.5, 41.180598783835521),
 (24.300000000000001, 20.459598534662508),
 (13.6, 12.120448236206673),
 (29.800000000000001, 25.123552867978333),
 (31.600000000000001, 31.229417577585007),
 (18.100000000000001, 16.502179625985207),
 (34.899999999999999, 33.985323612191678),
 (24.199999999999999, 25.5494562650814),
 (22.600000000000001, 24.697629367671293),
 (50.0, 24.775129519524722),
 (16.5, 10.120726750890343),
 (27.100000000000001, 26.618833229465576

In [16]:
y_LinearPrediction=Lreg.predict(X_test)

In [17]:
Mean_square_Value=mean_squared_error(y_test,y_LinearPrediction)

In [18]:
Mean_square_Value

23.909644224772311

In [19]:
r_Score=r2_score(y_test,y_LinearPrediction)
r_Score

0.66541782530911764

# Now Mean Squared Error and R squared values are 

Mean Squared Value 23.909644224772311  R Squared Value 0.66541782530911764

In [22]:
Ridge=Ridge(alpha=0.01)

In [23]:
Ridge.fit(X_train,y_train)

Ridge(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [24]:
zip(y_test,ridge.predict(X_test))

[(32.0, 34.520334513850315),
 (15.0, 26.102383554282383),
 (18.100000000000001, 17.560814042557027),
 (24.300000000000001, 24.374554284526464),
 (17.800000000000001, 8.8268012084437117),
 (12.5, 18.901886397399846),
 (12.300000000000001, 12.478670226188358),
 (17.300000000000001, 16.28780926310543),
 (23.699999999999999, 28.666924856630175),
 (17.100000000000001, 20.040060117903316),
 (24.800000000000001, 26.378227437605702),
 (27.899999999999999, 32.525605392902271),
 (20.199999999999999, 16.236418171720725),
 (20.0, 20.249159507537328),
 (24.300000000000001, 30.030739216228124),
 (34.899999999999999, 34.855323534498197),
 (14.9, 17.040911564259762),
 (11.699999999999999, 16.266322294136252),
 (10.4, 18.816930306169034),
 (10.4, 7.1445299387755643),
 (28.100000000000001, 25.484117567355199),
 (21.0, 23.238574654514085),
 (22.199999999999999, 19.187195195763255),
 (48.799999999999997, 41.000331584061364),
 (22.899999999999999, 29.687001258403782),
 (21.399999999999999, 23.1850173151506

In [27]:
y_RegressionPredict=ridge.predict(X_test)

In [28]:
r2RegressionScore=r2_score(y_test,y_RegressionPredict)

In [29]:
r2RegressionScore

0.64759847971039775

In [31]:
mseRegressionScore=mean_squared_error(y_test,y_RegressionPredict)

In [32]:
mseRegressionScore

25.183036072312557

# Now Mean Square Error and R Squared value after applying Ridge linear Model is

Mean Square error is 25.183036072312557 R Squared Value 0.64759847971039775