forked from nd009/boston_housing
/
bj-housing.py
36 lines (30 loc) · 1.19 KB
/
bj-housing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# coding=utf-8
# 引入一堆库.
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import ShuffleSplit, train_test_split
from sklearn.metrics import make_scorer
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
# 读取帝都历史房价.
data = pd.read_csv('bj_housing.csv')
features = data.drop('Value', axis = 1) # 房屋特征.
prices = data['Value'] # 价格.
# 分割数据.
# 测试数据占 25%.
X_train, X_test = train_test_split(features, test_size = 0.25)
y_train, y_test = train_test_split(prices, test_size = 0.25)
# 模型训练函数.
def fitModel (X, y):
cv_set = ShuffleSplit(n_splits = 10, test_size = 0.25, random_state = 0)
regressor = DecisionTreeRegressor()
params = {'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
scoring_fnc = make_scorer(r2_score) # 使用 R2 检查函数创建打分.
grid = GridSearchCV(regressor, params, scoring = scoring_fnc, cv = cv_set)
grid = grid.fit(X, y)
return grid.best_estimator_
# 建立模型.
myModel = fitModel(X_train, y_train)
# 预测一下.
# 100 平方的 3 室 1 厅的 2017 年的 21 层学区破房.
print myModel.predict([100, 3, 1, 1, 2017, 21])