In [118]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

In [119]:
class LasVegasWrapper:
    def __init__(self,estimator,n_iter=100,max_features=None):
        self.estimator = estimator
        self.n_iter = n_iter
        self.max_features = max_features
        
    def fit(self,X,y):
        n_samples, n_features = X.shape
        if self.max_features is None:
            self.max_features = n_features
        best_score = 0
        best_features = None
        for _ in range(self.n_iter):
            subset = np.random.choice(n_features,self.max_features,replace=False)
            X_subset = X[:,subset]
            scores = cross_val_score(self.estimator,X_subset,y,cv=5)
            score = np.mean(scores)
            if score > best_score:
                best_score = score
                best_features = subset
        self.best_score = best_score
        self.best_subset = best_features
        
    def transform(self,X):
        return X[:,self.best_subset]

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split


In [3]:
data = load_iris()
X,y = data.data,data.target
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [122]:
estimator = DecisionTreeClassifier()
lv_wrapper = LasVegasWrapper(estimator,n_iter=50,max_features=4)
lv_wrapper.fit(X_train,y_train)


In [123]:
X_train_best = lv_wrapper.transform(X_train)
X_test_best = lv_wrapper.transform(X_test)
estimator.fit(X_train_best,y_train)
print("测试集评分：", estimator.score(X_test_best, y_test))
print("最优特征子集索引：", lv_wrapper.best_subset)

测试集评分： 1.0
最优特征子集索引： [0 3 2 1]


In [4]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

# 初始化逻辑回归模型
model = LogisticRegression()

# 使用递归特征消除法选择前5个最佳特征
rfe = RFE(estimator=model, n_features_to_select=5)
X_rfe = rfe.fit_transform(X, y)

# 打印所选择的特征
print(rfe.support_)


[ True  True  True  True]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
