In [1]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression

In [2]:
bean = datasets.load_boston()
print(bean.DESCR)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [3]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [4]:
X_train, X_test, y_train, y_test = load_boston()

In [5]:
X_train.shape

(379, 13)

Fitting Linear Regression

In [6]:
LinReg= LinearRegression()
LinReg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [8]:
list(zip (y_test, LinReg.predict(X_test)))

[(17.0, 22.411783393671538),
 (13.4, 13.137304539053881),
 (22.600000000000001, 26.861729135882541),
 (20.300000000000001, 23.272907537172234),
 (15.6, 11.995310069128902),
 (23.0, 30.383112933973013),
 (34.899999999999999, 34.541268546547194),
 (19.0, 14.227150630622489),
 (23.100000000000001, 24.492782745621817),
 (20.100000000000001, 23.780889463549705),
 (35.100000000000001, 35.743521415265022),
 (12.5, 19.494825113921092),
 (24.800000000000001, 26.828440534087104),
 (29.100000000000001, 31.527473452319519),
 (22.600000000000001, 23.06651189930172),
 (24.0, 29.967324925816353),
 (29.800000000000001, 32.492211684405675),
 (18.0, 19.160213968921965),
 (22.600000000000001, 19.223636586512654),
 (16.300000000000001, 12.484719589223372),
 (22.0, 26.330466097941958),
 (19.0, 21.325535944511032),
 (23.100000000000001, 24.740839324845528),
 (19.199999999999999, 20.26468935295453),
 (9.6999999999999993, 10.156952910255502),
 (13.800000000000001, 20.531155238261146),
 (24.399999999999999, 24

To get the Mean Squared Error and R Squared values before applying Ridge linear model is as follows

In [9]:
from sklearn.linear_model import Lasso

In [10]:
from sklearn.linear_model import Ridge

In [11]:
y_LinPrediction=LinReg.predict(X_test)

In [12]:
Mn_sqr_val=mean_squared_error(y_test,y_LinPrediction)

In [13]:
Mn_sqr_val

16.144071345323891

In [14]:
R_sqr_val=r2_score(y_test,y_LinPrediction)

In [15]:
R_sqr_val

0.75886686055456987

So, the Mean squared error is 16.144071345323891 and R squared value is 0.75886686055456987

To get the Mean Squared Error and R Squared values after applying Ridge linear model is as follows

Let us consider alpha=0.01

In [17]:
alpha=0.01

In [18]:
Rid_LM=Ridge(alpha=alpha)

Using Fit function

In [19]:
Rid_LM.fit(X_train,y_train)

Ridge(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

Making Prediction

In [21]:
list(zip(y_test,Rid_LM.predict(X_test)))

[(17.0, 22.412607663722632),
 (13.4, 13.137005095735233),
 (22.600000000000001, 26.861431996697096),
 (20.300000000000001, 23.271362531444396),
 (15.6, 11.996027203376107),
 (23.0, 30.382598372683088),
 (34.899999999999999, 34.541448659406463),
 (19.0, 14.226950373625691),
 (23.100000000000001, 24.492820878894467),
 (20.100000000000001, 23.780640765947734),
 (35.100000000000001, 35.742631843122567),
 (12.5, 19.494706273045225),
 (24.800000000000001, 26.82812866751598),
 (29.100000000000001, 31.527306625852944),
 (22.600000000000001, 23.06621756968627),
 (24.0, 29.967800422643272),
 (29.800000000000001, 32.492267917340435),
 (18.0, 19.160552835008055),
 (22.600000000000001, 19.223767449618506),
 (16.300000000000001, 12.484301006598814),
 (22.0, 26.330566003829126),
 (19.0, 21.325440550442298),
 (23.100000000000001, 24.741237498208029),
 (19.199999999999999, 20.265026427528205),
 (9.6999999999999993, 10.156831603909982),
 (13.800000000000001, 20.5309668251696),
 (24.399999999999999, 24.1

In [22]:
y_RegPrediction=Rid_LM.predict(X_test)

In [23]:
Rid_Mn_sqr_val=mean_squared_error(y_test,y_RegPrediction)

In [24]:
Rid_Mn_sqr_val

16.143398675252879

In [25]:
Rid_R_sqr_val=r2_score(y_test,y_RegPrediction)

In [26]:
Rid_R_sqr_val

0.75887690777516026

So, after applying ridge model the Mean squared error is 16.143398675252879 and R squared value is 0.75887690777516026