In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from eli5.sklearn import PermutationImportance
import itertools
from lightgbm import LGBMClassifier, LGBMRegressor
import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings("ignore", category=UserWarning, module="lightgbm")
import tqdm
# 加载波士顿房价数据集
data = pd.read_csv("C:/Users/nigel/Desktop/中兴/Data/train.csv").drop(columns = ['sample_id'])
X = data.drop(columns = ['label'])
y = data.label

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 定义初始特征（在实际应用中，您可能需要根据具体问题自定义初始特征）
initial_features = X_train.columns[:10]

# 对其他特征子集进行排列组合
other_features = X_train.columns[10:]
subsets = list(itertools.combinations(other_features, 2))
np.random.shuffle(subsets)  # 打乱子集顺序

def lgb_model(x_train,y_train,x_test,y_test):
    # dtrain = lgb.Dataset(x, y, free_raw_data=False, silent=True)
    lgb_params= {
        'bagging_freq': 5,
        'bagging_fraction': 0.9,
        'boost_from_average':'false',
        'boosting_type': 'gbdt',
        'feature_fraction': 1.0,
        'learning_rate': 0.05,
        'max_depth': -1,
        'min_data_in_leaf': 10,
        'min_sum_hessian_in_leaf': 10.0,
        'num_leaves': 4,
        'n_jobs': 12,
        'tree_learner': 'serial',
        'objective': 'multiclass',#多变量，单变量：binary
        'num_classes':6,
        'verbose': -1,
        }
    
    # Fit the model
    # clf = lgb.train(params=lgb_params, train_set=dtrain, num_boost_round=200)

    lgb = LGBMClassifier(n_estimators=10000, 
                         early_stopping_round=100,
                         **lgb_params)
    
    lgb.fit(x_train, y_train, eval_set=(x_test, y_test),verbose=-100,eval_metric='auc_mu')

    return lgb

for subset in tqdm.tqdm(subsets):
    # 添加特征子集
    current_features = list(set(initial_features).union(set(subset)))

    print(f"current_features:{current_features}")
    X_train_subset = X_train[current_features]
    X_test_subset = X_test[current_features]

    # 训练模型
    model = lgb_model(X_train_subset,y_train,X_test_subset,y_test)
    #model.fit(X_train_subset, y_train)

    # 计算排列重要性
    perm = PermutationImportance(model, random_state=42).fit(X_test_subset, y_test)
    
    # 删除负均值特征
    mean_importances = perm.feature_importances_
    negative_features = np.array(current_features)[mean_importances < 0]
    print(f"negative_features{negative_features}")
    initial_features = [feature for feature in current_features if feature not in negative_features]
    print(f"initial_features{initial_features}")

# # 使用筛选后的特征训练最终模型
# X_train_final = X_train[initial_features]
# X_test_final = X_test[initial_features]
# model.fit(X_train_final, y_train)
