## Sklearn 模型预测性能的评估方法

### Estimator 对象的 score 方法

### 交叉验证中使用 scoring 参数

#### 交叉验证中使用预定义参数


In [23]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.cluster import KMeans
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

digits = load_digits()
data, target = digits.data, digits.target
scaler = StandardScaler().fit(data)
data = scaler.transform(data)

for k in np.linspace(1, 20, 20, dtype=int):
    # 虽然 Python 不要求数据类型 , 但是对于 n_clusters 还是要指定 dtype = int 
    kmeans = KMeans(n_clusters=k)
    # 聚类使用交叉验证没有意义 , 
    print(cross_val_score(kmeans, data, target, n_jobs=-1, scoring='adjusted_rand_score').mean())

<class 'numpy.int32'>


0.0


0.12468043656


0.236773870912


0.287496296433


0.33091287308


0.378333917719


0.364796338985


0.411355438258


0.45589648797


0.470657404921


0.481350033594


0.498253970154


0.545996730526


0.550833412492


0.55853611869


0.569085060084


0.548579909339


0.572941542773


0.604602192358


0.570767223669


#### 交叉验证中使用自定义参数 
把 sklearn.metric 中已有的度量指标封装成符合 "scoring" 参数要求的形式

##### 交叉验证中使用自定义参数 
完全自定义自己的度量指标然后用 "make_scorer" 函数转换成符合 "scoring" 参数要求的形式

In [7]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


# 这里随便定义了一个损失函数 , 自己定义损失函数还需要学习
def my_custom_lossfunc(ground_truth, predictions):
     if accuracy_score(ground_truth, predictions) > 0:
         return 1
     else:
         return 0

iris = load_iris()
loss = make_scorer(my_custom_lossfunc, greater_is_better=False)

grid = GridSearchCV(SVC(), param_grid={'C': [1, 10]}, scoring=loss)
grid.fit(iris.data, iris.target)
grid.cv_results_

{'mean_fit_time': array([ 0.00066749,  0.00066845]),
 'mean_score_time': array([ 0.00033625,  0.00066845]),
 'mean_test_score': array([-1., -1.]),
 'mean_train_score': array([-1., -1.]),
 'param_C': masked_array(data = [1 10],
              mask = [False False],
        fill_value = ?),
 'params': ({'C': 1}, {'C': 10}),
 'rank_test_score': array([1, 1]),
 'split0_test_score': array([-1., -1.]),
 'split0_train_score': array([-1., -1.]),
 'split1_test_score': array([-1., -1.]),
 'split1_train_score': array([-1., -1.]),
 'split2_test_score': array([-1., -1.]),
 'split2_train_score': array([-1., -1.]),
 'std_fit_time': array([ 0.00047199,  0.00047266]),
 'std_score_time': array([ 0.00047553,  0.00047266]),
 'std_test_score': array([ 0.,  0.]),
 'std_train_score': array([ 0.,  0.])}