In [1]:
import pandas as pd

In [3]:
from sklearn.datasets import load_boston

In [4]:
boston = load_boston()

In [6]:
print(boston.DESCR)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [7]:
x = boston.data
y = boston.target

In [15]:
x.shape

(506, 13)

In [16]:
y.shape

(506,)

In [9]:
from sklearn.cross_validation import train_test_split

In [28]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=.25,random_state=2018)

#### 标准化

In [11]:
from sklearn.preprocessing import StandardScaler

In [29]:
ss_x = StandardScaler()

In [30]:
x_train = ss_x.fit_transform(x_train)
x_test = ss_x.transform(x_test)

 ### 模型 1 LinearRegression

In [33]:
from sklearn.linear_model import LinearRegression

In [34]:
lr = LinearRegression()

In [36]:
lr.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [37]:
y_test_pred_lr = lr.predict(x_test)

### 模型2 SGDRegressor

In [38]:
from sklearn.linear_model import SGDRegressor

In [39]:
sgdr = SGDRegressor()



In [40]:
sgdr.fit(x_train,y_train)

SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
       loss='squared_loss', max_iter=5, n_iter=None, penalty='l2',
       power_t=0.25, random_state=None, shuffle=True, tol=None, verbose=0,
       warm_start=False)

In [41]:
y_test_pred_sgdr = sgdr.predict(x_test)

### 模型3 SVR  (kernel = 'linear')

In [42]:
from sklearn.svm import SVR

In [43]:
svr_linear = SVR(kernel='linear')

In [44]:
svr_linear.fit(x_train,y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [45]:
y_test_pred_svr_linear = svr_linear.predict(x_test)

### 模型4 SVR (kernel='poly')

In [46]:
svr_poly = SVR(kernel='poly')

In [47]:
svr_poly.fit(x_train,y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [48]:
y_test_pred_svr_poly = svr_poly.predict(x_test)

### 模型5 SVR (kernel='rbf')

In [49]:
svr_rbf = SVR(kernel='rbf')

In [50]:
svr_rbf.fit(x_train,y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [51]:
y_test_pred_svr_rbf = svr_rbf.predict(x_test)

### 模型6 KNN (weights='uniform')

In [52]:
from sklearn.neighbors import KNeighborsRegressor

In [53]:
knr_uniform = KNeighborsRegressor(weights='uniform')

In [54]:
knr_uniform.fit(x_train,y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform')

In [55]:
y_test_pred_knr_uniform = knr_uniform.predict(x_test)

### 模型7 KNN (weights='distance')

In [56]:
knr_distance = KNeighborsRegressor(weights='distance')

In [57]:
knr_distance.fit(x_train,y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='distance')

In [58]:
y_test_pred_knr_distance = knr_distance.predict(x_test)

### 模型8 DecisionTreeRegressor

In [59]:
from sklearn.tree import DecisionTreeRegressor

In [60]:
dtr = DecisionTreeRegressor()

In [61]:
dtr.fit(x_train,y_train)

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [62]:
y_test_pred_dtr = dtr.predict(x_test)

### 模型9 RandomForestRegressor

In [63]:
from sklearn.ensemble import RandomForestRegressor

In [64]:
rfr = RandomForestRegressor()

In [65]:
rfr.fit(x_train,y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [66]:
y_test_pred_rfr = rfr.predict(x_test)

### 模型10 ExtraTreesRegressor

In [67]:
from sklearn.ensemble import ExtraTreesRegressor

In [68]:
etr = ExtraTreesRegressor()

In [70]:
etr.fit(x_train,y_train)

ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
          max_features='auto', max_leaf_nodes=None,
          min_impurity_decrease=0.0, min_impurity_split=None,
          min_samples_leaf=1, min_samples_split=2,
          min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
          oob_score=False, random_state=None, verbose=0, warm_start=False)

In [71]:
y_test_pred_etr = etr.predict(x_test)

### 模型11 GradientBoostingRegressor

In [72]:
from sklearn.ensemble import GradientBoostingRegressor

In [73]:
gbr = GradientBoostingRegressor()

In [74]:
gbr.fit(x_train,y_train)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False)

In [75]:
y_test_pred_gbr = gbr.predict(x_test)

### 评价指标 R-squared, MSE,  MAE

In [82]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [97]:
ranks = []

In [98]:
method_dict = dict()
method_dict['Regressors'] = 'LinearRegressor'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_lr)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_lr)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_lr)

ranks.append(method_dict)

In [99]:
method_dict = dict()
method_dict['Regressors'] = 'SGDRegressor'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_sgdr)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_sgdr)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_sgdr)

ranks.append(method_dict)

In [100]:
method_dict = dict()
method_dict['Regressors'] = 'SVM Regressor (Linear Kernel)'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_svr_linear)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_svr_linear)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_svr_linear)

ranks.append(method_dict)

In [101]:
method_dict = dict()
method_dict['Regressors'] = 'SVM Regressor (Poly Kernel)'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_svr_poly)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_svr_poly)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_svr_poly)

ranks.append(method_dict)

In [102]:
method_dict = dict()
method_dict['Regressors'] = 'SVM Regressor (RBF Kernel)'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_svr_rbf)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_svr_rbf)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_svr_rbf)

ranks.append(method_dict)

In [103]:
method_dict = dict()
method_dict['Regressors'] = 'KNN Regressor (Uniform-weighted)'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_knr_uniform)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_knr_uniform)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_knr_uniform)

ranks.append(method_dict)

In [104]:
method_dict = dict()
method_dict['Regressors'] = 'KNN Regressor (Distance-weighted)'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_knr_distance)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_knr_distance)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_knr_distance)

ranks.append(method_dict)

In [105]:
method_dict = dict()
method_dict['Regressors'] = 'RandomForestRegressor'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_rfr)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_rfr)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_rfr)

ranks.append(method_dict)

In [106]:
method_dict = dict()
method_dict['Regressors'] = 'ExtraTreesRegressor'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_etr)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_etr)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_etr)

ranks.append(method_dict)

In [107]:
method_dict = dict()
method_dict['Regressors'] = 'GradientBoostingRegressorr'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_gbr)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_gbr)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_gbr)

ranks.append(method_dict)

In [110]:
method_dict = dict()
method_dict['Regressors'] = 'DecisionTreeRegressor'
method_dict['R-squared'] = r2_score(y_test,y_test_pred_dtr)
method_dict['MSE'] = mean_squared_error(y_test,y_test_pred_dtr)
method_dict['MAE'] = mean_absolute_error(y_test,y_test_pred_dtr)

ranks.append(method_dict)

In [114]:
ranks_df = pd.DataFrame(ranks).sort_values(by='R-squared',ascending=False)

In [115]:
ranks_df

Unnamed: 0,MAE,MSE,R-squared,Regressors
9,1.981583,6.699335,0.910906,GradientBoostingRegressorr
8,2.051024,7.941902,0.894381,ExtraTreesRegressor
7,2.214961,10.097493,0.865713,RandomForestRegressor
6,2.21872,11.489549,0.847201,KNN Regressor (Distance-weighted)
10,2.869291,16.039685,0.786688,DecisionTreeRegressor
2,2.849033,16.939402,0.774723,SVM Regressor (Linear Kernel)
0,3.212816,17.182899,0.771485,LinearRegressor
1,3.042936,17.489407,0.767408,SGDRegressor
5,2.530079,18.756346,0.750559,KNN Regressor (Uniform-weighted)
4,2.928358,26.228056,0.651193,SVM Regressor (RBF Kernel)


In [116]:
y_test

array([13.1, 19.7, 33.3, 20.3, 22.7, 28.2, 22.8, 14.1, 19.4, 20.4, 21.9,
       26.6, 28.4, 42.3, 22.3, 30.7, 27.5, 21. , 25. , 22.4, 20.6, 13.8,
       16.7, 22. , 50. , 20.2, 18.5, 18.1, 20.4, 14.5, 15. , 22.7, 24.1,
       30.1, 23. , 11.3, 22.5, 50. , 32.7, 29.6, 16. , 19.1, 28.7, 36.1,
       25. , 21.4, 16.4, 21.5, 14.9,  8.3, 24.6, 20.4, 13.4, 18.8, 50. ,
       18.9, 48.3, 31. , 16.7, 23.2, 24.8, 22.2, 50. , 16.5, 19.8, 21.2,
       22. , 20.1, 10.5, 12.5, 36. , 23.1, 32.2, 30.1, 19. , 19.5, 14.4,
       23.5, 17.8, 23.2, 23.4, 43.1, 18.3, 23.1, 28.7, 18.7, 32.4, 10.5,
       33.1, 10.2, 46.7, 19.9, 18.9, 24.4, 20.4, 13.6, 24.1, 44.8, 20.3,
       22.6, 35.1, 22.8, 23.3, 31.7, 23.8, 25.1, 21.7, 16.8, 29.6, 16.2,
       21.4, 22.3, 23.2, 22.9, 19.7, 22. , 17.8, 23. , 13.5, 29. , 21.5,
       21.2, 15.3, 22.9, 10.4, 25. , 24.6])

In [117]:
y_test_pred_gbr

array([15.4182649 , 16.72691592, 38.00828898, 22.85156734, 21.21464625,
       28.36580974, 28.60471415, 14.89769617, 20.13648799, 22.89294675,
       17.14624526, 30.56774538, 26.62128786, 43.93614788, 21.77630199,
       27.52732366, 23.02533999, 20.18476406, 23.44450549, 21.01312472,
       15.86369063, 16.37252223, 14.48148141, 21.84579585, 48.42404887,
       20.36935549, 16.59971796, 16.54577189, 21.05807529, 14.36133987,
       24.15067408, 21.20078576, 22.23765068, 31.80938305, 20.39368828,
       10.81071234, 20.42860041, 46.77797046, 30.61458464, 34.40350067,
       18.31180323, 16.60675583, 24.50215992, 34.08737538, 24.61174755,
       19.88208813, 15.21832274, 19.8315004 , 16.49526956,  6.98862124,
       23.14617962, 19.77842564, 14.16370287, 18.2465484 , 50.14259173,
       18.1996107 , 42.9170679 , 28.51585568, 16.75262562, 15.44208874,
       22.84864677, 20.34335989, 47.2045178 , 17.1803975 , 20.35906298,
       21.05756507, 23.46074974, 20.45192035,  6.93253671, 15.32