In [26]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [3]:
# Data prepared
boston = load_boston()
data = pd.DataFrame(boston.data, columns=boston.feature_names)
x = data.copy(deep=True)
y = data["y"] = boston.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
ss_x = StandardScaler()
ss_y = StandardScaler()
x_train = ss_x.fit_transform(x_train)
x_test = ss_x.transform(x_test)
# reshape make y to a column-like array
y_train = ss_y.fit_transform(y_train.reshape(-1, 1))
y_test = ss_y.transform(y_test.reshape(-1, 1))
ori_y_test = ss_y.inverse_transform(y_test)

In [4]:
# Linear regression
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_y_predict = lr.predict(x_test)

# SGD regression
sgdr = SGDRegressor()
sgdr.fit(x_train, y_train)
sgdr_y_predict = sgdr.predict(x_test)

for model, result in [(lr, lr_y_predict), (sgdr, sgdr_y_predict)]:
    modelName = model.__class__.__name__
    ori_result = ss_y.inverse_transform(result)
    # default score is precisely the r2_score
    modelScore = model.score(x_test, y_test)
    mse = mean_squared_error(y_true=ori_y_test, y_pred=ori_result)
    mae = mean_absolute_error(y_true=ori_y_test, y_pred=ori_result)
    r2 = r2_score(y_true=ori_y_test, y_pred=ori_result)
    print("the default measure score of %s is %f" % (modelName, modelScore))
    print("the mae of %s is %f" % (modelName, mae))
    print("the mse of %s is %f" % (modelName, mse))
    print("the r2_score of %s is %f" % (modelName, r2))
    print("\n")

  y = column_or_1d(y, warn=True)


In [7]:
# SVR regression
# ****实践了一些反射技巧
for kernel in ['linear', 'poly', 'rbf']:
    var = kernel + "_svr"
    locals()[var] = SVR(kernel=kernel)
    obj = eval(var)
    obj.fit(x_train, y_train)
    result = obj.predict(x_test)
    ori_result = ss_y.inverse_transform(result)
    r2 = r2_score(y_true=ori_y_test, y_pred=ori_result)
    #r2 score doesn't shift wherever result is standarized back.
    #r2_s = r2_score(y_true=y_test, y_pred=result)
    mae = mean_absolute_error(y_true=ori_y_test, y_pred=ori_result)
    mse = mean_squared_error(y_true=ori_y_test, y_pred=ori_result)
    print("R2 value of kernel %s is %f" % (kernel, r2))
    #print("R2-s value of kernel %s is %f" % (kernel, r2_s))
    print("mae value of kernel %s is %f" % (kernel, mae))
    print("mse value of kernel %s is %f" % (kernel, mse))
    print("\n")

R2 value of kernel linear is 0.651717
mae value of kernel linear is 3.426673
mse value of kernel linear is 27.006307


R2 value of kernel poly is 0.404454
mae value of kernel poly is 3.752059
mse value of kernel poly is 46.179403


R2 value of kernel rbf is 0.756407
mae value of kernel rbf is 2.607563
mse value of kernel rbf is 18.888525




  y = column_or_1d(y, warn=True)


In [8]:
# KNN regression 
## 平均回归 和 按距离加权回归
for weights in ["uniform", "distance"]:
    knr = KNeighborsRegressor(weights=weights)
    knr.fit(x_train, y_train)
    result = knr.predict(x_test)
    ori_result = ss_y.inverse_transform(result)
    r2 = r2_score(y_true=ori_y_test, y_pred=ori_result) 
    mae = mean_absolute_error(y_true=ori_y_test, y_pred=ori_result)
    mse = mean_squared_error(y_true=ori_y_test, y_pred=ori_result)
    print("R2 value of weights %s is %f" % (weights, r2))
    print("mae value of weights %s is %f" % (weights, mae))
    print("mse value of weights %s is %f" % (weights, mse))
    print("\n")

R2 value of weights uniform is 0.690345
mae value of weights uniform is 2.968031
mse value of weights uniform is 24.011014


R2 value of weights distance is 0.719759
mae value of weights distance is 2.805057
mse value of weights distance is 21.730250




In [10]:
# Tree regression
dtr = DecisionTreeRegressor()
dtr.fit(x_train, y_train)
result = dtr.predict(x_test)
ori_result = ss_y.inverse_transform(result)
r2 = r2_score(y_true=ori_y_test, y_pred=ori_result) 
mae = mean_absolute_error(y_true=ori_y_test, y_pred=ori_result)
mse = mean_squared_error(y_true=ori_y_test, y_pred=ori_result)
print("R2 value of DecisionTreeRegressor is %f" % r2)
print("mae value of DecisionTreeRegressor is %f" % mae)
print("mse value of DecisionTreeregressor is %f" % mse)

R2 value of DecisionTreeRegressor is 0.687423
mae value of DecisionTreeRegressor is 3.166929
mse value of DecisionTreeregressor is 24.237638


In [59]:
# Ensemble regression
ensembleList = [RandomForestRegressor(), ExtraTreesRegressor(), GradientBoostingRegressor()]
for model in ensembleList:
    modelName = model.__class__.__name__
    model.fit(x_train, y_train)
    result = model.predict(x_test)
    ori_result = ss_y.inverse_transform(result)
    r2 = r2_score(y_true=ori_y_test, y_pred=ori_result) 
    mae = mean_absolute_error(y_true=ori_y_test, y_pred=ori_result)
    mse = mean_squared_error(y_true=ori_y_test, y_pred=ori_result)
    for metric in ["r2", "mae", "mse"]:
        print("%s value of %s is %f" % (metric.upper(), modelName, eval(metric)))
    feature_importance = list(zip(model.feature_importances_, x.columns))
    print(np.sort(feature_importance, axis=0))
    print("\n")

R2 value of RandomForestRegressor is 0.876303
MAE value of RandomForestRegressor is 2.006378
MSE value of RandomForestRegressor is 9.591661
[['0.00109016252986' 'AGE']
 ['0.00136835861722' 'B']
 ['0.00243582853636' 'CHAS']
 ['0.00990893338272' 'CRIM']
 ['0.0114319784677' 'DIS']
 ['0.0116363054894' 'INDUS']
 ['0.0144954544645' 'LSTAT']
 ['0.0146451700992' 'NOX']
 ['0.0207787785061' 'PTRATIO']
 ['0.0289121968839' 'RAD']
 ['0.0704169568404' 'RM']
 ['0.374746984909' 'TAX']
 ['0.438132891274' 'ZN']]


R2 value of ExtraTreesRegressor is 0.752318
MAE value of ExtraTreesRegressor is 2.552362
MSE value of ExtraTreesRegressor is 19.205609
[['0.0030758558381' 'AGE']
 ['0.01231499201' 'B']
 ['0.0145772379232' 'CHAS']
 ['0.016167974625' 'CRIM']
 ['0.020712304012' 'DIS']
 ['0.0259257135341' 'INDUS']
 ['0.0269976396145' 'LSTAT']
 ['0.0278202744685' 'NOX']
 ['0.029000402234' 'PTRATIO']
 ['0.0440690602255' 'RAD']
 ['0.0602057888861' 'RM']
 ['0.358774348297' 'TAX']
 ['0.360358408332' 'ZN']]


R2 value o

  """
  """
  y = column_or_1d(y, warn=True)
